public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [committed] amdgcn: Handle early debug info in mkoffload
@ 2020-07-16 15:06 Andrew Stubbs
  2020-07-16 15:42 ` [committed,OG10] " Andrew Stubbs
  2020-07-17  6:20 ` [committed] " Thomas Schwinge
  0 siblings, 2 replies; 10+ messages in thread
From: Andrew Stubbs @ 2020-07-16 15:06 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 709 bytes --]

This patch adds debug support to mkoffload, similar to what happens in 
lto-wrapper.

Unlike lto-wrapper, we must deal with mismatched architectures and 
mismatched program scope. These are fixed up using manual ELF patching 
because there's no useful support in simple_object (yet). Should this be 
something that other offload targets want to do then that probably ought 
to be changed in future, but it isn't necessary yet.

This is enough to prevent rocgdb choking on malformed debug information 
and debug simple offload testcases; it may be that further adjustment is 
needed.

I have not attempted to make similar changes to the other instance of 
mkoffload as nvptx has no use for debug info.

Andrew

[-- Attachment #2: 200716-early-debug.patch --]
[-- Type: text/x-patch, Size: 11764 bytes --]

amdgcn: Handle early debug info in mkoffload

Forward the early debug information from the input LTO file to the output
HSACO file, in the same way lto-wrapper does.  This is a little more
complicated, however, because the ELF file containing the debug needs to be
converted from x86_64 to amdgcn, and because the offloaded code will have less
content than the host program the debug info describes.

gcc/ChangeLog:

	* config/gcn/mkoffload.c: Include simple-object.h and elf.h.
	(EM_AMDGPU): New macro.
	(ELFOSABI_AMDGPU_HSA): New macro.
	(ELFABIVERSION_AMDGPU_HSA): New macro.
	(EF_AMDGPU_MACH_AMDGCN_GFX803): New macro.
	(EF_AMDGPU_MACH_AMDGCN_GFX900): New macro.
	(EF_AMDGPU_MACH_AMDGCN_GFX906): New macro.
	(R_AMDGPU_NONE): New macro.
	(R_AMDGPU_ABS32_LO): New macro.
	(R_AMDGPU_ABS32_HI): New macro.
	(R_AMDGPU_ABS64): New macro.
	(R_AMDGPU_REL32): New macro.
	(R_AMDGPU_REL64): New macro.
	(R_AMDGPU_ABS32): New macro.
	(R_AMDGPU_GOTPCREL): New macro.
	(R_AMDGPU_GOTPCREL32_LO): New macro.
	(R_AMDGPU_GOTPCREL32_HI): New macro.
	(R_AMDGPU_REL32_LO): New macro.
	(R_AMDGPU_REL32_HI): New macro.
	(reserved): New macro.
	(R_AMDGPU_RELATIVE64): New macro.
	(gcn_s1_name): Delete global variable.
	(gcn_s2_name): Delete global variable.
	(gcn_o_name): Delete global variable.
	(gcn_cfile_name): Delete global variable.
	(files_to_cleanup): New global variable.
	(offload_abi): New global variable.
	(tool_cleanup): Use files_to_cleanup, not explicit list.
	(copy_early_debug_info): New function.
	(main): New local variables gcn_s1_name, gcn_s2_name, gcn_o_name,
	gcn_cfile_name.
	Create files_to_cleanup obstack.
	Recognize -march options.
	Copy early debug info from input .o files.

diff --git a/gcc/config/gcn/mkoffload.c b/gcc/config/gcn/mkoffload.c
index 0415d945e72..553f25e70df 100644
--- a/gcc/config/gcn/mkoffload.c
+++ b/gcc/config/gcn/mkoffload.c
@@ -33,31 +33,53 @@
 #include <libgen.h>
 #include "collect-utils.h"
 #include "gomp-constants.h"
+#include "simple-object.h"
+#include "elf.h"
+
+/* These probably won't be in elf.h for a while.  */
+#ifndef EM_AMDGPU
+#define EM_AMDGPU		0xe0;
+
+#define ELFOSABI_AMDGPU_HSA	 64
+#define ELFABIVERSION_AMDGPU_HSA 1
+
+#define EF_AMDGPU_MACH_AMDGCN_GFX803 0x2a
+#define EF_AMDGPU_MACH_AMDGCN_GFX900 0x2c
+#define EF_AMDGPU_MACH_AMDGCN_GFX906 0x2f
+
+#define R_AMDGPU_NONE		0
+#define R_AMDGPU_ABS32_LO	1	/* (S + A) & 0xFFFFFFFF  */
+#define R_AMDGPU_ABS32_HI	2	/* (S + A) >> 32  */
+#define R_AMDGPU_ABS64		3	/* S + A  */
+#define R_AMDGPU_REL32		4	/* S + A - P  */
+#define R_AMDGPU_REL64		5	/* S + A - P  */
+#define R_AMDGPU_ABS32		6	/* S + A  */
+#define R_AMDGPU_GOTPCREL	7	/* G + GOT + A - P  */
+#define R_AMDGPU_GOTPCREL32_LO	8	/* (G + GOT + A - P) & 0xFFFFFFFF  */
+#define R_AMDGPU_GOTPCREL32_HI	9	/* (G + GOT + A - P) >> 32  */
+#define R_AMDGPU_REL32_LO	10	/* (S + A - P) & 0xFFFFFFFF  */
+#define R_AMDGPU_REL32_HI	11	/* (S + A - P) >> 32  */
+#define reserved		12
+#define R_AMDGPU_RELATIVE64	13	/* B + A  */
+#endif
 
 const char tool_name[] = "gcn mkoffload";
 
-/* Files to unlink.  */
-static const char *gcn_s1_name;
-static const char *gcn_s2_name;
-static const char *gcn_o_name;
-static const char *gcn_cfile_name;
 static const char *gcn_dumpbase;
+static struct obstack files_to_cleanup;
 
 enum offload_abi offload_abi = OFFLOAD_ABI_UNSET;
+uint32_t elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX803;  // Default GPU architecture.
 
 /* Delete tempfiles.  */
 
 void
 tool_cleanup (bool from_signal ATTRIBUTE_UNUSED)
 {
-  if (gcn_cfile_name)
-    maybe_unlink (gcn_cfile_name);
-  if (gcn_s1_name)
-    maybe_unlink (gcn_s1_name);
-  if (gcn_s2_name)
-    maybe_unlink (gcn_s2_name);
-  if (gcn_o_name)
-    maybe_unlink (gcn_o_name);
+  obstack_ptr_grow (&files_to_cleanup, NULL);
+  const char **files = XOBFINISH (&files_to_cleanup, const char **);
+  for (int i = 0; files[i]; i++)
+    maybe_unlink (files[i]);
 }
 
 static void
@@ -204,6 +226,180 @@ access_check (const char *name, int mode)
   return access (name, mode);
 }
 
+/* Copy the early-debug-info from the incoming LTO object to a new object
+   that will be linked into the output HSACO file.  The host relocations
+   must be translated into GCN relocations, and any global undefined symbols
+   must be weakened (so as not to have the debug info try to pull in host
+   junk).
+
+   Returns true if the file was created, false otherwise.  */
+
+static bool
+copy_early_debug_info (const char *infile, const char *outfile)
+{
+  const char *errmsg;
+  int err;
+
+  /* The simple_object code can handle extracting the debug sections.
+     This code is based on that in lto-wrapper.c.  */
+  int infd = open (infile, O_RDONLY | O_BINARY);
+  if (infd == -1)
+    return false;
+  simple_object_read *inobj = simple_object_start_read (infd, 0,
+							"__GNU_LTO",
+							&errmsg, &err);
+  if (!inobj)
+    return false;
+
+  off_t off, len;
+  if (simple_object_find_section (inobj, ".gnu.debuglto_.debug_info",
+				  &off, &len, &errmsg, &err) != 1)
+    {
+      simple_object_release_read (inobj);
+      close (infd);
+      return false;
+    }
+
+  errmsg = simple_object_copy_lto_debug_sections (inobj, outfile, &err, true);
+  if (errmsg)
+    {
+      unlink_if_ordinary (outfile);
+      return false;
+    }
+
+  simple_object_release_read (inobj);
+  close (infd);
+
+  /* Open the file we just created for some adjustments.
+     The simple_object code can't do this, so we do it manually.  */
+  FILE *outfd = fopen (outfile, "r+b");
+  if (!outfd)
+    return false;
+
+  Elf64_Ehdr ehdr;
+  if (fread (&ehdr, sizeof (ehdr), 1, outfd) != 1)
+    {
+      fclose (outfd);
+      return true;
+    }
+
+  /* We only support host relocations of x86_64, for now.  */
+  gcc_assert (ehdr.e_machine == EM_X86_64);
+
+  /* Patch the correct elf architecture flag into the file.  */
+  ehdr.e_ident[7] = ELFOSABI_AMDGPU_HSA;
+  ehdr.e_ident[8] = ELFABIVERSION_AMDGPU_HSA;
+  ehdr.e_type = ET_REL;
+  ehdr.e_machine = EM_AMDGPU;
+  ehdr.e_flags = elf_arch;
+
+  /* Load the section headers so we can walk them later.  */
+  Elf64_Shdr *sections = (Elf64_Shdr *)xmalloc (sizeof (Elf64_Shdr)
+						* ehdr.e_shnum);
+  if (fseek (outfd, ehdr.e_shoff, SEEK_SET) == -1
+      || fread (sections, sizeof (Elf64_Shdr), ehdr.e_shnum,
+		outfd) != ehdr.e_shnum)
+    {
+      free (sections);
+      fclose (outfd);
+      return true;
+    }
+
+  /* Convert the host relocations to target relocations.  */
+  for (int i = 0; i < ehdr.e_shnum; i++)
+    {
+      if (sections[i].sh_type != SHT_RELA)
+	continue;
+
+      char *data = (char *)xmalloc (sections[i].sh_size);
+      if (fseek (outfd, sections[i].sh_offset, SEEK_SET) == -1
+	  || fread (data, sections[i].sh_size, 1, outfd) != 1)
+	{
+	  free (data);
+	  continue;
+	}
+
+      for (size_t offset = 0;
+	   offset < sections[i].sh_size;
+	   offset += sections[i].sh_entsize)
+	{
+	  Elf64_Rela *reloc = (Elf64_Rela *) (data + offset);
+
+	  /* Map the host relocations to GCN relocations.
+	     Only relocations that can appear in DWARF need be handled.  */
+	  switch (ELF64_R_TYPE (reloc->r_info))
+	    {
+	    case R_X86_64_32:
+	    case R_X86_64_32S:
+	      reloc->r_info = R_AMDGPU_ABS32;
+	      break;
+	    case R_X86_64_PC32:
+	      reloc->r_info = R_AMDGPU_REL32;
+	      break;
+	    case R_X86_64_PC64:
+	      reloc->r_info = R_AMDGPU_REL64;
+	      break;
+	    case R_X86_64_64:
+	      reloc->r_info = R_AMDGPU_ABS64;
+	      break;
+	    case R_X86_64_RELATIVE:
+	      reloc->r_info = R_AMDGPU_RELATIVE64;
+	      break;
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+
+      /* Write back our relocation changes.  */
+      if (fseek (outfd, sections[i].sh_offset, SEEK_SET) != -1)
+	fwrite (data, sections[i].sh_size, 1, outfd);
+
+      free (data);
+    }
+
+  /* Weaken any global undefined symbols that would pull in unwanted
+     objects.  */
+  for (int i = 0; i < ehdr.e_shnum; i++)
+    {
+      if (sections[i].sh_type != SHT_SYMTAB)
+	continue;
+
+      char *data = (char *)xmalloc (sections[i].sh_size);
+      if (fseek (outfd, sections[i].sh_offset, SEEK_SET) == -1
+	  || fread (data, sections[i].sh_size, 1, outfd) != 1)
+	{
+	  free (data);
+	  continue;
+	}
+
+      for (size_t offset = 0;
+	   offset < sections[i].sh_size;
+	   offset += sections[i].sh_entsize)
+	{
+	  Elf64_Sym *sym = (Elf64_Sym *) (data + offset);
+	  int type = ELF64_ST_TYPE (sym->st_info);
+	  int bind = ELF64_ST_BIND (sym->st_info);
+
+	  if (bind == STB_GLOBAL && sym->st_shndx == 0)
+	    sym->st_info = ELF64_ST_INFO (STB_WEAK, type);
+	}
+
+      /* Write back our symbol changes.  */
+      if (fseek (outfd, sections[i].sh_offset, SEEK_SET) != -1)
+	fwrite (data, sections[i].sh_size, 1, outfd);
+
+      free (data);
+    }
+  free (sections);
+
+  /* Write back our header changes.  */
+  rewind (outfd);
+  fwrite (&ehdr, sizeof (ehdr), 1, outfd);
+
+  fclose (outfd);
+  return true;
+}
+
 /* Parse an input assembler file, extract the offload tables etc.,
    and output (1) the assembler code, minus the tables (which can contain
    problematic relocations), and (2) a C file with the offload tables
@@ -538,9 +734,15 @@ main (int argc, char **argv)
   FILE *cfile = stdout;
   const char *outname = 0;
 
+  const char *gcn_s1_name;
+  const char *gcn_s2_name;
+  const char *gcn_o_name;
+  const char *gcn_cfile_name;
+
   progname = "mkoffload";
   diagnostic_initialize (global_dc, 0);
 
+  obstack_init (&files_to_cleanup);
   if (atexit (mkoffload_cleanup) != 0)
     fatal_error (input_location, "atexit failed");
 
@@ -632,7 +834,14 @@ main (int argc, char **argv)
       else if (strcmp (argv[i], "-dumpbase") == 0
 	       && i + 1 < argc)
 	dumppfx = argv[++i];
+      else if (strcmp (argv[i], "-march=fiji") == 0)
+	elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX803;
+      else if (strcmp (argv[i], "-march=gfx900") == 0)
+	elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX900;
+      else if (strcmp (argv[i], "-march=gfx906") == 0)
+	elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX906;
     }
+
   if (!(fopenacc ^ fopenmp))
     fatal_error (input_location, "either -fopenacc or -fopenmp must be set");
 
@@ -693,6 +902,10 @@ main (int argc, char **argv)
       gcn_o_name = make_temp_file (".mkoffload.hsaco");
       gcn_cfile_name = make_temp_file (".c");
     }
+  obstack_ptr_grow (&files_to_cleanup, gcn_s1_name);
+  obstack_ptr_grow (&files_to_cleanup, gcn_s2_name);
+  obstack_ptr_grow (&files_to_cleanup, gcn_o_name);
+  obstack_ptr_grow (&files_to_cleanup, gcn_cfile_name);
 
   obstack_ptr_grow (&cc_argv_obstack, "-dumpdir");
   obstack_ptr_grow (&cc_argv_obstack, "");
@@ -710,6 +923,39 @@ main (int argc, char **argv)
   struct obstack ld_argv_obstack;
   obstack_init (&ld_argv_obstack);
   obstack_ptr_grow (&ld_argv_obstack, driver);
+
+  /* Extract early-debug information from the input objects.
+     This loop finds all the inputs that end ".o" and aren't the output.  */
+  int dbgcount = 0;
+  for (int ix = 1; ix != argc; ix++)
+    {
+      if (!strcmp (argv[ix], "-o") && ix + 1 != argc)
+	++ix;
+      else
+	{
+	  if (strcmp (argv[ix] + strlen(argv[ix]) - 2, ".o") == 0)
+	    {
+	      char *dbgobj;
+	      if (save_temps)
+		{
+		  char buf[10];
+		  sprintf (buf, "%d", dbgcount++);
+		  dbgobj = concat (dumppfx, ".mkoffload.dbg", buf, ".o", NULL);
+		}
+	      else
+		dbgobj = make_temp_file (".mkoffload.dbg.o");
+
+	      /* If the copy fails then just ignore it.  */
+	      if (copy_early_debug_info (argv[ix], dbgobj))
+		{
+		  obstack_ptr_grow (&ld_argv_obstack, dbgobj);
+		  obstack_ptr_grow (&files_to_cleanup, dbgobj);
+		}
+	      else
+		free (dbgobj);
+	    }
+	}
+    }
   obstack_ptr_grow (&ld_argv_obstack, gcn_s2_name);
   obstack_ptr_grow (&ld_argv_obstack, "-lgomp");
 

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2020-07-20 12:11 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-16 15:06 [committed] amdgcn: Handle early debug info in mkoffload Andrew Stubbs
2020-07-16 15:42 ` [committed,OG10] " Andrew Stubbs
2020-07-17  6:20 ` [committed] " Thomas Schwinge
2020-07-17 11:29   ` Andrew Stubbs
2020-07-17 11:49     ` Andrew Stubbs
2020-07-20  7:35     ` Richard Biener
2020-07-20  8:40       ` Andrew Stubbs
2020-07-20 10:01         ` Richard Biener
2020-07-20 11:04           ` Andrew Stubbs
2020-07-20 12:11             ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).