public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r13-407] [Ada] CUDA: use binder to generate kernel-registration code
@ 2022-05-13  8:09 Pierre-Marie de Rodat
  0 siblings, 0 replies; only message in thread
From: Pierre-Marie de Rodat @ 2022-05-13  8:09 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:a2cff9e9af15b179ff7a2c7e147e88ec8ce52936

commit r13-407-ga2cff9e9af15b179ff7a2c7e147e88ec8ce52936
Author: Ghjuvan Lacambre <lacambre@adacore.com>
Date:   Tue Sep 14 10:49:08 2021 +0200

    [Ada] CUDA: use binder to generate kernel-registration code
    
    Compiling CUDA code requires compiling code for the host (= CPU) and for
    the device (= GPU). Device code is embedded into the host code and must
    be registered with the CUDA runtime by the host.
    
    The original approach we took for registering CUDA kernels was to
    generate the registration-code on a unit basis, i.e. each unit took care
    of registering its own kernels. Unfortunately, this makes linking
    kernels and device functions that belong to different units much harder.
    
    We thus rework this approach in order to have GNAT generate kernel names
    in ALI files. The binder reads the ALI files and generates kernel
    registration code for each of the kernels found in ALI files.
    
    gcc/ada/
    
            * ali.adb: Introduce new 'K' line in ALI files, used to
            represent CUDA kernel entries.
            * ali.ads: Create new CUDA_Kernels table, which contains entries
            of type CUDA_Kernel_Record. Each CUDA_Kernel_Record corresponds
            to a K line in an ali file.
            * bindgen.adb: Introduce new Gen_CUDA_Init procedure in the
            binder, which generates CUDA kernel registration code.
            * gnat_cuda.adb: Move Get_CUDA_Kernels spec to package spec to
            make it available to bindgen.adb.
            * gnat_cuda.ads: Likewise.
            * lib-writ.adb: Introduce new Output_CUDA_Symbols procedure,
            which generates one 'K' line in the ALI file per visible CUDA
            kernel.
            * opt.ads: Introduce Enable_CUDA_Expansion option, triggered by
            using the -gnatd_c flag.
            * switch-b.adb: Likewise.
            * switch-c.adb: Likewise.

Diff:
---
 gcc/ada/ali.adb       |  23 +++++++-
 gcc/ada/ali.ads       |  30 +++++++++++
 gcc/ada/bindgen.adb   | 141 ++++++++++++++++++++++++++++++++++++++++++++++++++
 gcc/ada/gnat_cuda.adb |   5 --
 gcc/ada/gnat_cuda.ads |   5 ++
 gcc/ada/lib-writ.adb  |  49 ++++++++++++++++++
 gcc/ada/opt.ads       |   6 +++
 gcc/ada/switch-b.adb  |   3 ++
 gcc/ada/switch-c.adb  |   3 ++
 9 files changed, 259 insertions(+), 6 deletions(-)

diff --git a/gcc/ada/ali.adb b/gcc/ada/ali.adb
index 90fcfad5bbe..a5fba5dffd1 100644
--- a/gcc/ada/ali.adb
+++ b/gcc/ada/ali.adb
@@ -252,6 +252,7 @@ package body ALI is
       'E' | --  external
       'G' | --  invocation graph
       'I' | --  interrupt
+      'K' | --  CUDA kernels
       'L' | --  linker option
       'M' | --  main program
       'N' | --  notes
@@ -269,7 +270,7 @@ package body ALI is
 
       --  Still available:
 
-      'B' | 'F' | 'H' | 'J' | 'K' | 'O' | 'Q' => False);
+      'B' | 'F' | 'H' | 'J' | 'O' | 'Q' => False);
 
    ------------------------------
    -- Add_Invocation_Construct --
@@ -1743,12 +1744,14 @@ package body ALI is
       ALIs.Table (Id) := (
         Afile                        => F,
         Compile_Errors               => False,
+        First_CUDA_Kernel            => CUDA_Kernels.Last + 1,
         First_Interrupt_State        => Interrupt_States.Last + 1,
         First_Sdep                   => No_Sdep_Id,
         First_Specific_Dispatching   => Specific_Dispatching.Last + 1,
         First_Unit                   => No_Unit_Id,
         GNATprove_Mode               => False,
         Invocation_Graph_Encoding    => No_Encoding,
+        Last_CUDA_Kernel             => CUDA_Kernels.Last,
         Last_Interrupt_State         => Interrupt_States.Last,
         Last_Sdep                    => No_Sdep_Id,
         Last_Specific_Dispatching    => Specific_Dispatching.Last,
@@ -1915,6 +1918,24 @@ package body ALI is
          C := Getc;
       end loop A_Loop;
 
+      --  Acquire 'K' lines if present
+
+      Check_Unknown_Line;
+
+      while C = 'K' loop
+         if Ignore ('K') then
+            Skip_Line;
+
+         else
+            Skip_Space;
+            CUDA_Kernels.Append ((Kernel_Name => Get_Name));
+            ALIs.Table (Id).Last_CUDA_Kernel := CUDA_Kernels.Last;
+            Skip_Eol;
+         end if;
+
+         C := Getc;
+      end loop;
+
       --  Acquire P line
 
       Check_Unknown_Line;
diff --git a/gcc/ada/ali.ads b/gcc/ada/ali.ads
index 7419c57191f..a5af75e84cf 100644
--- a/gcc/ada/ali.ads
+++ b/gcc/ada/ali.ads
@@ -46,6 +46,9 @@ package ALI is
    type ALI_Id is range 0 .. 99_999_999;
    --  Id values used for ALIs table entries
 
+   type CUDA_Kernel_Id is range 0 .. 99_999_999;
+   --  Id values used for CUDA_Kernel table entries
+
    type Unit_Id is range 0 .. 99_999_999;
    --  Id values used for Unit table entries
 
@@ -254,6 +257,12 @@ package ALI is
       Restrictions : Restrictions_Info;
       --  Restrictions information reconstructed from R lines
 
+      First_CUDA_Kernel : CUDA_Kernel_Id;
+      Last_CUDA_Kernel  : CUDA_Kernel_Id'Base;
+      --  These point to the first and last entries in the CUDA_Kernels table
+      --  for this unit. If there are no entries, First_CUDA_Kernel =
+      --  Last_CUDA_Kernel + 1.
+
       First_Interrupt_State : Interrupt_State_Id;
       Last_Interrupt_State  : Interrupt_State_Id'Base;
       --  These point to the first and last entries in the interrupt state
@@ -290,6 +299,27 @@ package ALI is
      Table_Increment      => 200,
      Table_Name           => "ALIs");
 
+   ---------------------------
+   -- CUDA Kernels Table --
+   ---------------------------
+
+   --  An entry is made in this table for each K (CUDA Kernel) line
+   --  encountered in the input ALI file. The First/Last_CUDA_Kernel_Id
+   --  fields of the ALI file entry show the range of entries defined
+   --  within a particular ALI file.
+
+   type CUDA_Kernel_Record is record
+      Kernel_Name : Name_Id;
+   end record;
+
+   package CUDA_Kernels is new Table.Table (
+     Table_Component_Type => CUDA_Kernel_Record,
+     Table_Index_Type     => CUDA_Kernel_Id'Base,
+     Table_Low_Bound      => CUDA_Kernel_Id'First,
+     Table_Initial        => 100,
+     Table_Increment      => 200,
+     Table_Name           => "Cuda_Kernels");
+
    ----------------
    -- Unit Table --
    ----------------
diff --git a/gcc/ada/bindgen.adb b/gcc/ada/bindgen.adb
index d7ba26798e5..35587087178 100644
--- a/gcc/ada/bindgen.adb
+++ b/gcc/ada/bindgen.adb
@@ -317,6 +317,9 @@ package body Bindgen is
    procedure Gen_CodePeer_Wrapper;
    --  For CodePeer, generate wrapper which calls user-defined main subprogram
 
+   procedure Gen_CUDA_Init;
+   --  When CUDA registration code is needed.
+
    procedure Gen_Elab_Calls (Elab_Order : Unit_Id_Array);
    --  Generate sequence of elaboration calls
 
@@ -1239,6 +1242,137 @@ package body Bindgen is
       Bind_Env_String_Built := True;
    end Gen_Bind_Env_String;
 
+   -------------------
+   -- Gen_CUDA_Init --
+   -------------------
+
+   procedure Gen_CUDA_Init is
+      Unit_Name : constant String :=
+        Get_Name_String (Units.Table (First_Unit_Entry).Uname);
+      Unit : constant String :=
+        Unit_Name (Unit_Name'First .. Unit_Name'Last - 2);
+   begin
+      if not Enable_CUDA_Expansion then
+         return;
+      end if;
+
+      WBI ("");
+      WBI ("   ");
+
+      WBI ("   function CUDA_Register_Function");
+      WBI ("      (Fat_Binary_Handle : System.Address;");
+      WBI ("       Func : System.Address;");
+      WBI ("       Kernel_Name : Interfaces.C.Strings.chars_ptr;");
+      WBI ("       Kernel_Name_2 : Interfaces.C.Strings.chars_ptr;");
+      WBI ("       Minus_One : Integer;");
+      WBI ("       Nullptr1 : System.Address;");
+      WBI ("       Nullptr2 : System.Address;");
+      WBI ("       Nullptr3 : System.Address;");
+      WBI ("       Nullptr4 : System.Address;");
+      WBI ("       Nullptr5 : System.Address) return Boolean;");
+      WBI ("   pragma Import");
+      WBI ("     (Convention => C,");
+      WBI ("      Entity => CUDA_Register_Function,");
+      WBI ("      External_Name => ""__cudaRegisterFunction"");");
+      WBI ("");
+      WBI ("   function CUDA_Register_Fat_Binary");
+      WBI ("     (Fat_Binary : System.Address)");
+      WBI ("      return System.Address;");
+      WBI ("    pragma Import");
+      WBI ("      (Convention => C,");
+      WBI ("       Entity => CUDA_Register_Fat_Binary,");
+      WBI ("       External_Name => ""__cudaRegisterFatBinary"");");
+      WBI ("");
+      WBI ("   function CUDA_Register_Fat_Binary_End");
+      WBI ("     (Fat_Binary : System.Address) return Boolean;");
+      WBI ("   pragma Import");
+      WBI ("     (Convention => C,");
+      WBI ("      Entity => CUDA_Register_Fat_Binary_End,");
+      WBI ("      External_Name => ""__cudaRegisterFatBinaryEnd"");");
+      WBI ("");
+      WBI ("   type Fatbin_Wrapper is record");
+      WBI ("      Magic   : Interfaces.C.int;");
+      WBI ("      Version : Interfaces.C.int;");
+      WBI ("      Data    : System.Address;");
+      WBI ("      Filename_Or_Fatbins : System.Address;");
+      WBI ("   end record;");
+      WBI ("");
+      WBI ("   Fat_Binary : System.Address;");
+      WBI ("   pragma Import");
+      WBI ("      (Convention    => C,");
+      WBI ("       Entity        => Fat_Binary,");
+      WBI ("       External_Name => ""_binary_" & Unit & "_fatbin_start"");");
+      WBI ("");
+      WBI ("   Wrapper : Fatbin_Wrapper :=");
+      WBI ("     (16#466243b1#,");
+      WBI ("      1,");
+      WBI ("      Fat_Binary'Address,");
+      WBI ("      System.Null_Address);");
+      WBI ("");
+      WBI ("   Fat_Binary_Handle : System.Address :=");
+      WBI ("     CUDA_Register_Fat_Binary (Wrapper'Address);");
+      WBI ("");
+
+      for K in CUDA_Kernels.First .. CUDA_Kernels.Last loop
+         declare
+            K_String : constant String := CUDA_Kernel_Id'Image (K);
+            N : constant String :=
+              K_String (K_String'First + 1 .. K_String'Last);
+            Kernel_Symbol : constant String := "Kernel_" & N;
+            --  K_Symbol is a unique identifier used to derive all symbol names
+            --  related to kernel K.
+
+            Kernel_Addr : constant String := Kernel_Symbol & "_Addr";
+            --  Kernel_Addr is the name of the symbol representing the address
+            --  of the host-side procedure of the kernel. The address is
+            --  pragma-imported and then used while registering the kernel with
+            --  the CUDA runtime.
+            Kernel_String : constant String := Kernel_Symbol & "_String";
+            --  Kernel_String is the name of the C-string containing the name
+            --  of the kernel. It is used for registering the kernel with the
+            --  CUDA runtime.
+            Kernel_Name : constant String :=
+               Get_Name_String (CUDA_Kernels.Table (K).Kernel_Name);
+            --  Kernel_Name is the name of the kernel, after package expansion.
+
+         begin
+            --  Import host-side kernel address.
+            WBI ("   " & Kernel_Addr & " : constant System.Address;");
+            WBI ("   pragma Import");
+            WBI ("      (Convention    => C,");
+            WBI ("       Entity        => " & Kernel_Addr & ",");
+            WBI ("       External_Name => """ & Kernel_Name & """);");
+            WBI ("");
+
+            --  Generate C-string containing name of kernel.
+            WBI
+              ("   " & Kernel_String & " : Interfaces.C.Strings.Chars_Ptr :=");
+            WBI ("    Interfaces.C.Strings.New_Char_Array ("""
+                  & Kernel_Name
+                  & """);");
+            WBI ("");
+
+            --  Generate call to CUDA runtime to register function.
+            WBI ("   CUDA_Register" & N & " : Boolean :=");
+            WBI ("     CUDA_Register_Function (");
+            WBI ("       Fat_Binary_Handle, ");
+            WBI ("       " & Kernel_Addr & ",");
+            WBI ("       " & Kernel_String & ",");
+            WBI ("       " & Kernel_String & ",");
+            WBI ("       -1,");
+            WBI ("       System.Null_Address,");
+            WBI ("       System.Null_Address,");
+            WBI ("       System.Null_Address,");
+            WBI ("       System.Null_Address,");
+            WBI ("       System.Null_Address);");
+            WBI ("");
+         end;
+      end loop;
+
+      WBI ("   CUDA_End : Boolean := ");
+      WBI ("      CUDA_Register_Fat_Binary_End(Fat_Binary_Handle);");
+   end Gen_CUDA_Init;
+
    --------------------------
    -- Gen_CodePeer_Wrapper --
    --------------------------
@@ -2353,6 +2487,11 @@ package body Bindgen is
          WBI ("with System.Secondary_Stack;");
       end if;
 
+      if Enable_CUDA_Expansion then
+         WBI ("with Interfaces.C;");
+         WBI ("with Interfaces.C.Strings;");
+      end if;
+
       Resolve_Binder_Options (Elab_Order);
 
       --  Generate standard with's
@@ -2502,6 +2641,8 @@ package body Bindgen is
            Get_Main_Name & """);");
       end if;
 
+      Gen_CUDA_Init;
+
       --  Generate version numbers for units, only if needed. Be very safe on
       --  the condition.
 
diff --git a/gcc/ada/gnat_cuda.adb b/gcc/ada/gnat_cuda.adb
index 2a0a450d886..4bb8c5aa3a7 100644
--- a/gcc/ada/gnat_cuda.adb
+++ b/gcc/ada/gnat_cuda.adb
@@ -118,11 +118,6 @@ package body GNAT_CUDA is
    --  are declared within package body Pack_Body. Returns No_Elist if Pack_Id
    --  does not contain such entities.
 
-   function Get_CUDA_Kernels (Pack_Id : Entity_Id) return Elist_Id;
-   --  Returns an Elist of all procedures marked with pragma CUDA_Global that
-   --  are declared within package body Pack_Body. Returns No_Elist if Pack_Id
-   --  does not contain such procedures.
-
    procedure Remove_CUDA_Device_Entities (Pack_Id : Entity_Id);
    --  Removes all entities marked with the CUDA_Device pragma from package
    --  Pack_Id. Must only be called when compiling for the host.
diff --git a/gcc/ada/gnat_cuda.ads b/gcc/ada/gnat_cuda.ads
index b5fcf8ff631..e756162fa01 100644
--- a/gcc/ada/gnat_cuda.ads
+++ b/gcc/ada/gnat_cuda.ads
@@ -92,4 +92,9 @@ package GNAT_CUDA is
    --  - Empty content of CUDA_Global procedures.
    --  - Remove declarations of CUDA_Device entities.
 
+   function Get_CUDA_Kernels (Pack_Id : Entity_Id) return Elist_Id;
+   --  Returns an Elist of all procedures marked with pragma CUDA_Global that
+   --  are declared within package body Pack_Body. Returns No_Elist if Pack_Id
+   --  does not contain such procedures.
+
 end GNAT_CUDA;
diff --git a/gcc/ada/lib-writ.adb b/gcc/ada/lib-writ.adb
index 59a9170972b..556df9a7b73 100644
--- a/gcc/ada/lib-writ.adb
+++ b/gcc/ada/lib-writ.adb
@@ -30,6 +30,7 @@ with Debug;          use Debug;
 with Einfo;          use Einfo;
 with Einfo.Entities; use Einfo.Entities;
 with Einfo.Utils;    use Einfo.Utils;
+with Elists;         use Elists;
 with Errout;         use Errout;
 with Fname;          use Fname;
 with Fname.UF;       use Fname.UF;
@@ -37,6 +38,7 @@ with Lib.Util;       use Lib.Util;
 with Lib.Xref;       use Lib.Xref;
 with Nlists;         use Nlists;
 with Gnatvsn;        use Gnatvsn;
+with GNAT_CUDA;      use GNAT_CUDA;
 with Opt;            use Opt;
 with Osint;          use Osint;
 with Osint.C;        use Osint.C;
@@ -268,6 +270,10 @@ package body Lib.Writ is
       --  Collect with lines for entries in the context clause of the given
       --  compilation unit, Cunit.
 
+      procedure Output_CUDA_Symbols (Unit_Num : Unit_Number_Type);
+      --  Output CUDA symbols, so that the rest of the toolchain may know what
+      --  symbols need registering with the CUDA runtime.
+
       procedure Write_Unit_Information (Unit_Num : Unit_Number_Type);
       --  Write out the library information for one unit for which code is
       --  generated (includes unit line and with lines).
@@ -386,6 +392,41 @@ package body Lib.Writ is
          end loop;
       end Collect_Withs;
 
+      -------------------------
+      -- Output_CUDA_Symbols --
+      -------------------------
+
+      procedure Output_CUDA_Symbols (Unit_Num : Unit_Number_Type) is
+         Unit_Id     : constant Node_Id := Unit (Cunit (Unit_Num));
+         Spec_Id     : Node_Id;
+         Kernels     : Elist_Id;
+         Kernel_Elm  : Elmt_Id;
+         Kernel      : Entity_Id;
+      begin
+         if not Enable_CUDA_Expansion then
+            return;
+         end if;
+         Spec_Id := (if Nkind (Unit_Id) = N_Package_Body
+           then Corresponding_Spec (Unit_Id)
+           else Defining_Unit_Name (Specification (Unit_Id)));
+         Kernels := Get_CUDA_Kernels (Spec_Id);
+         if No (Kernels) then
+            return;
+         end if;
+
+         Kernel_Elm := First_Elmt (Kernels);
+         while Present (Kernel_Elm) loop
+            Kernel := Node (Kernel_Elm);
+
+            Write_Info_Initiate ('K');
+            Write_Info_Char (' ');
+            Write_Info_Name (Chars (Kernel));
+            Write_Info_Terminate;
+            Next_Elmt (Kernel_Elm);
+         end loop;
+
+      end Output_CUDA_Symbols;
+
       ----------------------------
       -- Write_Unit_Information --
       ----------------------------
@@ -1166,6 +1207,14 @@ package body Lib.Writ is
          Write_Info_Terminate;
       end loop;
 
+      --  Output CUDA Kernel lines
+
+      for Unit in Units.First .. Last_Unit loop
+         if Present (Cunit (Unit)) then
+            Output_CUDA_Symbols (Unit);
+         end if;
+      end loop;
+
       --  Output parameters ('P') line
 
       Write_Info_Initiate ('P');
diff --git a/gcc/ada/opt.ads b/gcc/ada/opt.ads
index 0d8b25f13a0..e747397b9fc 100644
--- a/gcc/ada/opt.ads
+++ b/gcc/ada/opt.ads
@@ -527,6 +527,12 @@ package Opt is
 
    --  WARNING: There is a matching C declaration of this variable in fe.h
 
+   Enable_CUDA_Expansion : Boolean := False;
+   --  GNAT, GNATBIND
+   --  Set to True to enable CUDA host expansion:
+   --    - Removal of CUDA_Global and CUDA_Device symbols
+   --    - Generation of kernel registration code in packages
+
    Error_Msg_Line_Length : Nat := 0;
    --  GNAT
    --  Records the error message line length limit. If this is set to zero,
diff --git a/gcc/ada/switch-b.adb b/gcc/ada/switch-b.adb
index 780a0710aeb..10feb234833 100644
--- a/gcc/ada/switch-b.adb
+++ b/gcc/ada/switch-b.adb
@@ -158,6 +158,9 @@ package body Switch.B is
 
                elsif Underscore then
                   Set_Underscored_Debug_Flag (C);
+                  if Debug_Flag_Underscore_C then
+                     Enable_CUDA_Expansion := True;
+                  end if;
                   Underscore := False;
 
                --    letter
diff --git a/gcc/ada/switch-c.adb b/gcc/ada/switch-c.adb
index a34e8410be8..522cdf658ed 100644
--- a/gcc/ada/switch-c.adb
+++ b/gcc/ada/switch-c.adb
@@ -390,6 +390,9 @@ package body Switch.C is
                      elsif Underscore then
                         Set_Underscored_Debug_Flag (C);
                         Store_Compilation_Switch ("-gnatd_" & C);
+                        if Debug_Flag_Underscore_C then
+                           Enable_CUDA_Expansion := True;
+                        end if;
 
                      --  Normal flag


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-05-13  8:09 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-13  8:09 [gcc r13-407] [Ada] CUDA: use binder to generate kernel-registration code Pierre-Marie de Rodat

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).