public inbox for binutils@sourceware.org
 help / color / mirror / Atom feed
* [PATCH v2] pe/coff - add support for base64 encoded long section names
@ 2023-05-22 19:48 Tristan Gingold
  2023-05-23  6:20 ` Jan Beulich
  2023-05-23  9:17 ` Jose E. Marchesi
  0 siblings, 2 replies; 7+ messages in thread
From: Tristan Gingold @ 2023-05-22 19:48 UTC (permalink / raw)
  To: binutils

Hello,

so new version of the patch, including generation of base64 encoded 
section name indexes.

I was able to assemble the test from PR 30444 (using -mbig-obj to 
overcome the 2^16 sections number limit).

I haven't added a test due to its size.

No failures on x86_64-gnu-linux for binutils configured for 
x86_64-pc-mingw64.

Tristan.

2023-05-22  Tristan Gingold  <tgingold@thinkpad-e15>

	PR 30444
	* coffcode.h (coff_write_object_contents): Handle base64 encoding
	on PE.  Also check for too large string table.
	* coffgen.c (extract_long_section_name): New function extracted
	from ...
	(make_a_section_from_file): ... here.  Add support for base64
	long section names.
	(decode_base64): New function.

diff --git a/bfd/coffcode.h b/bfd/coffcode.h
index 974c8ad9854..097afe9d16c 100644
--- a/bfd/coffcode.h
+++ b/bfd/coffcode.h
@@ -3625,18 +3625,54 @@ coff_write_object_contents (bfd * abfd)
  	  len = strlen (current->name);
  	  if (len > SCNNMLEN)
  	    {
-	      /* The s_name field is defined to be NUL-padded but need not be
-		 NUL-terminated.  We use a temporary buffer so that we can still
-		 sprintf all eight chars without splatting a terminating NUL
-		 over the first byte of the following member (s_paddr).  */
-	      /* PR 21096: The +20 is to stop a bogus warning from gcc7 about
-		 a possible buffer overflow.  */
-	      char s_name_buf[SCNNMLEN + 1 + 20];

  	      /* An inherent limitation of the /nnnnnnn notation used to indicate
  		 the offset of the long name in the string table is that we
  		 cannot address entries beyone the ten million byte boundary.  */
-	      if (string_size >= 10000000)
+	      if (string_size < 10000000)
+		{
+		  /* The s_name field is defined to be NUL-padded but need not
+		     be NUL-terminated.  We use a temporary buffer so that we
+		     can still sprintf all eight chars without splatting a
+		     terminating NUL over the first byte of the following
+		     member (s_paddr).  */
+		  /* PR 21096: The +20 is to stop a bogus warning from gcc7
+		     about a possible buffer overflow.  */
+		  char s_name_buf[SCNNMLEN + 1 + 20];
+
+		  /* We do not need to use snprintf here as we have already
+		     verified that string_size is not too big, plus we have
+		     an overlarge buffer, just in case.  */
+		  sprintf (s_name_buf, "/%lu", (unsigned long) string_size);
+		  /* Then strncpy takes care of any padding for us.  */
+		  strncpy (section.s_name, s_name_buf, SCNNMLEN);
+		}
+	      else
+#ifdef COFF_WITH_PE
+		{
+		  /* PE use a bae64 encoding for long section names whose
+		     index is very large.  */
+		  static const char base64[] =
+		    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+		    "abcdefghijklmnopqrstuvwxyz"
+		    "0123456789+/";
+		  unsigned long off = string_size;
+		  unsigned i;
+
+		  section.s_name[0] = '/';
+		  section.s_name[1] = '/';
+		  for (i = SCNNMLEN - 1; i >=2; i--)
+		    {
+		      section.s_name[i] = base64[off & 0x3f];
+		      off >>= 6;
+		    }
+		}
+#endif
+	      if (string_size > 0xffffffffUL - (len + 1)
+#ifndef COFF_WITH_PE
+		  || string_size >= 10000000
+#endif
+		  )
  		{
  		  bfd_set_error (bfd_error_file_too_big);
  		  _bfd_error_handler
@@ -3646,12 +3682,6 @@ coff_write_object_contents (bfd * abfd)
  		  return false;
  		}

-	      /* We do not need to use snprintf here as we have already verfied
-		 that string_size is not too big, plus we have an overlarge
-		 buffer, just in case.  */
-	      sprintf (s_name_buf, "/%lu", (unsigned long) string_size);
-	      /* Then strncpy takes care of any padding for us.  */
-	      strncpy (section.s_name, s_name_buf, SCNNMLEN);
  	      string_size += len + 1;
  	      long_section_names = true;
  	    }
diff --git a/bfd/coffgen.c b/bfd/coffgen.c
index ac936def566..970e06bf2c0 100644
--- a/bfd/coffgen.c
+++ b/bfd/coffgen.c
@@ -43,6 +43,69 @@
  #include "coff/internal.h"
  #include "libcoff.h"

+/* Extract a long section name at STRINDEX and copy it to the bfd objstack.
+   Return NULL in case of error.  */
+
+static char *
+extract_long_section_name(bfd *abfd, unsigned long strindex)
+{
+  const char *strings;
+  char *name;
+
+  strings = _bfd_coff_read_string_table (abfd);
+  if (strings == NULL)
+    return NULL;
+  if ((bfd_size_type)(strindex + 2) >= obj_coff_strings_len (abfd))
+    return NULL;
+  strings += strindex;
+  name = (char *) bfd_alloc (abfd, (bfd_size_type) strlen (strings) + 1);
+  if (name == NULL)
+    return NULL;
+  strcpy (name, strings);
+
+  return name;
+}
+
+/* Decode a base64 coded string at STR of length LEN, and write the result
+   to RES.  Return true on success.
+   Return false in case of invalid character or overflow.  */
+
+static bool
+decode_base64 (const char *str, unsigned len, uint32_t *res)
+{
+  unsigned i;
+  uint32_t val;
+
+  val = 0;
+  for (i = 0; i < len; i++)
+    {
+      char c = str[i];
+      unsigned d;
+
+      if (c >= 'A' && c <= 'Z')
+	d = c - 'A';
+      else if (c >= 'a' && c <= 'z')
+	d = c - 'a' + 26;
+      else if (c >= '0' && c <= '9')
+	d = c - '0' + 52;
+      else if (c == '+')
+	d = 62;
+      else if (c == '/')
+	d = 63;
+      else
+	return false;
+
+      /* Check for overflow. */
+      if ((val >> 26) != 0)
+	return false;
+
+      val = (val << 6) + d;
+    }
+
+  *res = val;
+  return true;
+}
+
  /* Take a section header read from a coff file (in HOST byte order),
     and make a BFD "section" out of it.  This is used by ECOFF.  */

@@ -67,32 +130,46 @@ make_a_section_from_file (bfd *abfd,
    if (bfd_coff_set_long_section_names (abfd, 
bfd_coff_long_section_names (abfd))
        && hdr->s_name[0] == '/')
      {
-      char buf[SCNNMLEN];
-      long strindex;
-      char *p;
-      const char *strings;
-
        /* Flag that this BFD uses long names, even though the format might
  	 expect them to be off by default.  This won't directly affect the
  	 format of any output BFD created from this one, but the information
  	 can be used to decide what to do.  */
        bfd_coff_set_long_section_names (abfd, true);
-      memcpy (buf, hdr->s_name + 1, SCNNMLEN - 1);
-      buf[SCNNMLEN - 1] = '\0';
-      strindex = strtol (buf, &p, 10);
-      if (*p == '\0' && strindex >= 0)
+
+      if (hdr->s_name[1] == '/')
  	{
-	  strings = _bfd_coff_read_string_table (abfd);
-	  if (strings == NULL)
-	    return false;
-	  if ((bfd_size_type)(strindex + 2) >= obj_coff_strings_len (abfd))
+	  /* LLVM extension: the '/' is followed by another '/' and then by
+	     the index in the strtab encoded in base64 without NUL at the
+	     end.  */
+	  uint32_t strindex;
+
+	  /* Decode the index.  No overflow is expected as the string table
+	     length is at most 2^32 - 1 (the length is written on the first
+	     four bytes).  */
+	  if (!decode_base64 (hdr->s_name + 2, SCNNMLEN - 2, &strindex))
  	    return false;
-	  strings += strindex;
-	  name = (char *) bfd_alloc (abfd,
-				     (bfd_size_type) strlen (strings) + 1 + 1);
+
+	  name = extract_long_section_name (abfd, strindex);
  	  if (name == NULL)
  	    return false;
-	  strcpy (name, strings);
+	}
+      else
+	{
+	  /* PE classic long section name.  The '/' is followed by the index
+	     in the strtab.  The index is formatted as a decimal string.  */
+	  char buf[SCNNMLEN];
+	  long strindex;
+	  char *p;
+
+	  memcpy (buf, hdr->s_name + 1, SCNNMLEN - 1);
+	  buf[SCNNMLEN - 1] = '\0';
+	  strindex = strtol (buf, &p, 10);
+	  if (*p == '\0' && strindex >= 0)
+	    {
+	      name = extract_long_section_name (abfd, strindex);
+	      if (name == NULL)
+		return false;
+	    }
  	}
      }


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] pe/coff - add support for base64 encoded long section names
  2023-05-22 19:48 [PATCH v2] pe/coff - add support for base64 encoded long section names Tristan Gingold
@ 2023-05-23  6:20 ` Jan Beulich
  2023-05-23  9:17 ` Jose E. Marchesi
  1 sibling, 0 replies; 7+ messages in thread
From: Jan Beulich @ 2023-05-23  6:20 UTC (permalink / raw)
  To: Tristan Gingold; +Cc: binutils

On 22.05.2023 21:48, Tristan Gingold via Binutils wrote:
> so new version of the patch, including generation of base64 encoded 
> section name indexes.
> 
> I was able to assemble the test from PR 30444 (using -mbig-obj to 
> overcome the 2^16 sections number limit).
> 
> I haven't added a test due to its size.
> 
> No failures on x86_64-gnu-linux for binutils configured for 
> x86_64-pc-mingw64.

A couple of nits and a question, looks good to me otherwise.

> --- a/bfd/coffcode.h
> +++ b/bfd/coffcode.h
> @@ -3625,18 +3625,54 @@ coff_write_object_contents (bfd * abfd)
>   	  len = strlen (current->name);
>   	  if (len > SCNNMLEN)
>   	    {
> -	      /* The s_name field is defined to be NUL-padded but need not be
> -		 NUL-terminated.  We use a temporary buffer so that we can still
> -		 sprintf all eight chars without splatting a terminating NUL
> -		 over the first byte of the following member (s_paddr).  */
> -	      /* PR 21096: The +20 is to stop a bogus warning from gcc7 about
> -		 a possible buffer overflow.  */
> -	      char s_name_buf[SCNNMLEN + 1 + 20];
> 
>   	      /* An inherent limitation of the /nnnnnnn notation used to indicate
>   		 the offset of the long name in the string table is that we
>   		 cannot address entries beyone the ten million byte boundary.  */
> -	      if (string_size >= 10000000)
> +	      if (string_size < 10000000)
> +		{
> +		  /* The s_name field is defined to be NUL-padded but need not
> +		     be NUL-terminated.  We use a temporary buffer so that we
> +		     can still sprintf all eight chars without splatting a
> +		     terminating NUL over the first byte of the following
> +		     member (s_paddr).  */
> +		  /* PR 21096: The +20 is to stop a bogus warning from gcc7
> +		     about a possible buffer overflow.  */
> +		  char s_name_buf[SCNNMLEN + 1 + 20];
> +
> +		  /* We do not need to use snprintf here as we have already
> +		     verified that string_size is not too big, plus we have
> +		     an overlarge buffer, just in case.  */
> +		  sprintf (s_name_buf, "/%lu", (unsigned long) string_size);
> +		  /* Then strncpy takes care of any padding for us.  */
> +		  strncpy (section.s_name, s_name_buf, SCNNMLEN);
> +		}
> +	      else
> +#ifdef COFF_WITH_PE
> +		{
> +		  /* PE use a bae64 encoding for long section names whose

base64

> +		     index is very large.  */
> +		  static const char base64[] =
> +		    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
> +		    "abcdefghijklmnopqrstuvwxyz"
> +		    "0123456789+/";
> +		  unsigned long off = string_size;
> +		  unsigned i;
> +
> +		  section.s_name[0] = '/';
> +		  section.s_name[1] = '/';
> +		  for (i = SCNNMLEN - 1; i >=2; i--)

Blank before 2 please.

> +/* Decode a base64 coded string at STR of length LEN, and write the result
> +   to RES.  Return true on success.
> +   Return false in case of invalid character or overflow.  */
> +
> +static bool
> +decode_base64 (const char *str, unsigned len, uint32_t *res)
> +{
> +  unsigned i;
> +  uint32_t val;
> +
> +  val = 0;
> +  for (i = 0; i < len; i++)
> +    {
> +      char c = str[i];
> +      unsigned d;
> +
> +      if (c >= 'A' && c <= 'Z')
> +	d = c - 'A';
> +      else if (c >= 'a' && c <= 'z')
> +	d = c - 'a' + 26;
> +      else if (c >= '0' && c <= '9')
> +	d = c - '0' + 52;
> +      else if (c == '+')
> +	d = 62;
> +      else if (c == '/')
> +	d = 63;
> +      else
> +	return false;
> +
> +      /* Check for overflow. */
> +      if ((val >> 26) != 0)
> +	return false;
> +
> +      val = (val << 6) + d;
> +    }
> +
> +  *res = val;
> +  return true;
> +}

So you decided to not also permit the nul-tail-padded form along with
the "canonical" 'A'-head-padded one here? (Generating just the
"canonical" form in coff_write_object_contents() is of course not under
question.)

Jan

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] pe/coff - add support for base64 encoded long section names
  2023-05-22 19:48 [PATCH v2] pe/coff - add support for base64 encoded long section names Tristan Gingold
  2023-05-23  6:20 ` Jan Beulich
@ 2023-05-23  9:17 ` Jose E. Marchesi
  2023-05-24  5:48   ` Tristan Gingold
  1 sibling, 1 reply; 7+ messages in thread
From: Jose E. Marchesi @ 2023-05-23  9:17 UTC (permalink / raw)
  To: Tristan Gingold via Binutils; +Cc: Tristan Gingold


Hello Tristan.

> so new version of the patch, including generation of base64 encoded
> section name indexes.
>
> I was able to assemble the test from PR 30444 (using -mbig-obj to
> overcome the 2^16 sections number limit).

Very nice.  I wasn't aware of -mbig-ob.

> +#ifdef COFF_WITH_PE
> +		{
> +		  /* PE use a bae64 encoding for long section names whose
> +		     index is very large.  */

I would suggest to add a note to that comment making it explicit that
this encoding is not RFC 4648, even if it happens to use the same
alphabet.  Spelling this as `base 64' rather than 'base64' may help to
avoid confusion.

> +		  static const char base64[] =
> +		    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
> +		    "abcdefghijklmnopqrstuvwxyz"
> +		    "0123456789+/";
> +		  unsigned long off = string_size;
> +		  unsigned i;
> +
> +		  section.s_name[0] = '/';
> +		  section.s_name[1] = '/';
> +		  for (i = SCNNMLEN - 1; i >=2; i--)
> +		    {
> +		      section.s_name[i] = base64[off & 0x3f];
> +		      off >>= 6;
> +		    }
> +		}
> +#endif
> +	      if (string_size > 0xffffffffUL - (len + 1)
> +#ifndef COFF_WITH_PE
> +		  || string_size >= 10000000
> +#endif
> +		  )
>  		{

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] pe/coff - add support for base64 encoded long section names
  2023-05-23  9:17 ` Jose E. Marchesi
@ 2023-05-24  5:48   ` Tristan Gingold
  2023-05-24  6:29     ` Jan Beulich
  0 siblings, 1 reply; 7+ messages in thread
From: Tristan Gingold @ 2023-05-24  5:48 UTC (permalink / raw)
  To: Jose E. Marchesi, Tristan Gingold via Binutils

Hello,

I have adjusted the comments to clarify.  Is it OK ?

Tristan.

2023-05-22  Tristan Gingold  <tgingold@thinkpad-e15>

	PR 30444
	* coffcode.h (coff_write_object_contents): Handle base64 encoding
	on PE.  Also check for too large string table.
	* coffgen.c (extract_long_section_name): New function extracted
	from ...
	(make_a_section_from_file): ... here.  Add support for base64
	long section names.
	(decode_base64): New function.

diff --git a/bfd/coffcode.h b/bfd/coffcode.h
index 974c8ad9854..9ecc05fb7dd 100644
--- a/bfd/coffcode.h
+++ b/bfd/coffcode.h
@@ -3625,18 +3625,55 @@ coff_write_object_contents (bfd * abfd)
  	  len = strlen (current->name);
  	  if (len > SCNNMLEN)
  	    {
-	      /* The s_name field is defined to be NUL-padded but need not be
-		 NUL-terminated.  We use a temporary buffer so that we can still
-		 sprintf all eight chars without splatting a terminating NUL
-		 over the first byte of the following member (s_paddr).  */
-	      /* PR 21096: The +20 is to stop a bogus warning from gcc7 about
-		 a possible buffer overflow.  */
-	      char s_name_buf[SCNNMLEN + 1 + 20];

  	      /* An inherent limitation of the /nnnnnnn notation used to indicate
  		 the offset of the long name in the string table is that we
  		 cannot address entries beyone the ten million byte boundary.  */
-	      if (string_size >= 10000000)
+	      if (string_size < 10000000)
+		{
+		  /* The s_name field is defined to be NUL-padded but need not
+		     be NUL-terminated.  We use a temporary buffer so that we
+		     can still sprintf all eight chars without splatting a
+		     terminating NUL over the first byte of the following
+		     member (s_paddr).  */
+		  /* PR 21096: The +20 is to stop a bogus warning from gcc7
+		     about a possible buffer overflow.  */
+		  char s_name_buf[SCNNMLEN + 1 + 20];
+
+		  /* We do not need to use snprintf here as we have already
+		     verified that string_size is not too big, plus we have
+		     an overlarge buffer, just in case.  */
+		  sprintf (s_name_buf, "/%lu", (unsigned long) string_size);
+		  /* Then strncpy takes care of any padding for us.  */
+		  strncpy (section.s_name, s_name_buf, SCNNMLEN);
+		}
+	      else
+#ifdef COFF_WITH_PE
+		{
+		  /* PE use a base 64 encoding for long section names whose
+		     index is very large.  But contrary to RFC 4648, there is
+		     no padding: 6 characters must be generated.  */
+		  static const char base64[] =
+		    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+		    "abcdefghijklmnopqrstuvwxyz"
+		    "0123456789+/";
+		  unsigned long off = string_size;
+		  unsigned i;
+
+		  section.s_name[0] = '/';
+		  section.s_name[1] = '/';
+		  for (i = SCNNMLEN - 1; i >=2; i--)
+		    {
+		      section.s_name[i] = base64[off & 0x3f];
+		      off >>= 6;
+		    }
+		}
+#endif
+	      if (string_size > 0xffffffffUL - (len + 1)
+#ifndef COFF_WITH_PE
+		  || string_size >= 10000000
+#endif
+		  )
  		{
  		  bfd_set_error (bfd_error_file_too_big);
  		  _bfd_error_handler
@@ -3646,12 +3683,6 @@ coff_write_object_contents (bfd * abfd)
  		  return false;
  		}

-	      /* We do not need to use snprintf here as we have already verfied
-		 that string_size is not too big, plus we have an overlarge
-		 buffer, just in case.  */
-	      sprintf (s_name_buf, "/%lu", (unsigned long) string_size);
-	      /* Then strncpy takes care of any padding for us.  */
-	      strncpy (section.s_name, s_name_buf, SCNNMLEN);
  	      string_size += len + 1;
  	      long_section_names = true;
  	    }
diff --git a/bfd/coffgen.c b/bfd/coffgen.c
index ac936def566..d5cef273837 100644
--- a/bfd/coffgen.c
+++ b/bfd/coffgen.c
@@ -43,6 +43,69 @@
  #include "coff/internal.h"
  #include "libcoff.h"

+/* Extract a long section name at STRINDEX and copy it to the bfd objstack.
+   Return NULL in case of error.  */
+
+static char *
+extract_long_section_name(bfd *abfd, unsigned long strindex)
+{
+  const char *strings;
+  char *name;
+
+  strings = _bfd_coff_read_string_table (abfd);
+  if (strings == NULL)
+    return NULL;
+  if ((bfd_size_type)(strindex + 2) >= obj_coff_strings_len (abfd))
+    return NULL;
+  strings += strindex;
+  name = (char *) bfd_alloc (abfd, (bfd_size_type) strlen (strings) + 1);
+  if (name == NULL)
+    return NULL;
+  strcpy (name, strings);
+
+  return name;
+}
+
+/* Decode a base 64 coded string at STR of length LEN, and write the result
+   to RES.  Return true on success.
+   Return false in case of invalid character or overflow.  */
+
+static bool
+decode_base64 (const char *str, unsigned len, uint32_t *res)
+{
+  unsigned i;
+  uint32_t val;
+
+  val = 0;
+  for (i = 0; i < len; i++)
+    {
+      char c = str[i];
+      unsigned d;
+
+      if (c >= 'A' && c <= 'Z')
+	d = c - 'A';
+      else if (c >= 'a' && c <= 'z')
+	d = c - 'a' + 26;
+      else if (c >= '0' && c <= '9')
+	d = c - '0' + 52;
+      else if (c == '+')
+	d = 62;
+      else if (c == '/')
+	d = 63;
+      else
+	return false;
+
+      /* Check for overflow. */
+      if ((val >> 26) != 0)
+	return false;
+
+      val = (val << 6) + d;
+    }
+
+  *res = val;
+  return true;
+}
+
  /* Take a section header read from a coff file (in HOST byte order),
     and make a BFD "section" out of it.  This is used by ECOFF.  */

@@ -67,32 +130,48 @@ make_a_section_from_file (bfd *abfd,
    if (bfd_coff_set_long_section_names (abfd, 
bfd_coff_long_section_names (abfd))
        && hdr->s_name[0] == '/')
      {
-      char buf[SCNNMLEN];
-      long strindex;
-      char *p;
-      const char *strings;
-
        /* Flag that this BFD uses long names, even though the format might
  	 expect them to be off by default.  This won't directly affect the
  	 format of any output BFD created from this one, but the information
  	 can be used to decide what to do.  */
        bfd_coff_set_long_section_names (abfd, true);
-      memcpy (buf, hdr->s_name + 1, SCNNMLEN - 1);
-      buf[SCNNMLEN - 1] = '\0';
-      strindex = strtol (buf, &p, 10);
-      if (*p == '\0' && strindex >= 0)
+
+      if (hdr->s_name[1] == '/')
  	{
-	  strings = _bfd_coff_read_string_table (abfd);
-	  if (strings == NULL)
-	    return false;
-	  if ((bfd_size_type)(strindex + 2) >= obj_coff_strings_len (abfd))
+	  /* LLVM extension: the '/' is followed by another '/' and then by
+	     the index in the strtab encoded in base64 without NUL at the
+	     end.  */
+	  uint32_t strindex;
+
+	  /* Decode the index.  No overflow is expected as the string table
+	     length is at most 2^32 - 1 (the length is written on the first
+	     four bytes).
+	     Also, contrary to RFC 4648, all the characters must be decoded,
+	     there is no padding.  */
+	  if (!decode_base64 (hdr->s_name + 2, SCNNMLEN - 2, &strindex))
  	    return false;
-	  strings += strindex;
-	  name = (char *) bfd_alloc (abfd,
-				     (bfd_size_type) strlen (strings) + 1 + 1);
+
+	  name = extract_long_section_name (abfd, strindex);
  	  if (name == NULL)
  	    return false;
-	  strcpy (name, strings);
+	}
+      else
+	{
+	  /* PE classic long section name.  The '/' is followed by the index
+	     in the strtab.  The index is formatted as a decimal string.  */
+	  char buf[SCNNMLEN];
+	  long strindex;
+	  char *p;
+
+	  memcpy (buf, hdr->s_name + 1, SCNNMLEN - 1);
+	  buf[SCNNMLEN - 1] = '\0';
+	  strindex = strtol (buf, &p, 10);
+	  if (*p == '\0' && strindex >= 0)
+	    {
+	      name = extract_long_section_name (abfd, strindex);
+	      if (name == NULL)
+		return false;
+	    }
  	}
      }


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] pe/coff - add support for base64 encoded long section names
  2023-05-24  5:48   ` Tristan Gingold
@ 2023-05-24  6:29     ` Jan Beulich
  2023-05-24 17:33       ` Tristan Gingold
  0 siblings, 1 reply; 7+ messages in thread
From: Jan Beulich @ 2023-05-24  6:29 UTC (permalink / raw)
  To: Tristan Gingold; +Cc: Jose E. Marchesi, Tristan Gingold via Binutils

On 24.05.2023 07:48, Tristan Gingold via Binutils wrote:
> I have adjusted the comments to clarify.  Is it OK ?

With the earlier nit addressed (I'll repeat it below), yes. But please
allow a little bit of time for in particular Nick to raise last minute
comments, as he had looked at the earlier version of the patch. Plus ...

> Tristan.
> 
> 2023-05-22  Tristan Gingold  <tgingold@thinkpad-e15>

... I notice that in at least one case there was a similar email address
you used in a ChangeLog entry (gingold@gingold-Precision-7510), but
neither that nor the one here look like valid email addresses to me. Yet
aiui these email addresses are expected to be real ones.

> --- a/bfd/coffcode.h
> +++ b/bfd/coffcode.h
> @@ -3625,18 +3625,55 @@ coff_write_object_contents (bfd * abfd)
>   	  len = strlen (current->name);
>   	  if (len > SCNNMLEN)
>   	    {
> -	      /* The s_name field is defined to be NUL-padded but need not be
> -		 NUL-terminated.  We use a temporary buffer so that we can still
> -		 sprintf all eight chars without splatting a terminating NUL
> -		 over the first byte of the following member (s_paddr).  */
> -	      /* PR 21096: The +20 is to stop a bogus warning from gcc7 about
> -		 a possible buffer overflow.  */
> -	      char s_name_buf[SCNNMLEN + 1 + 20];
> 
>   	      /* An inherent limitation of the /nnnnnnn notation used to indicate
>   		 the offset of the long name in the string table is that we
>   		 cannot address entries beyone the ten million byte boundary.  */
> -	      if (string_size >= 10000000)
> +	      if (string_size < 10000000)
> +		{
> +		  /* The s_name field is defined to be NUL-padded but need not
> +		     be NUL-terminated.  We use a temporary buffer so that we
> +		     can still sprintf all eight chars without splatting a
> +		     terminating NUL over the first byte of the following
> +		     member (s_paddr).  */
> +		  /* PR 21096: The +20 is to stop a bogus warning from gcc7
> +		     about a possible buffer overflow.  */
> +		  char s_name_buf[SCNNMLEN + 1 + 20];
> +
> +		  /* We do not need to use snprintf here as we have already
> +		     verified that string_size is not too big, plus we have
> +		     an overlarge buffer, just in case.  */
> +		  sprintf (s_name_buf, "/%lu", (unsigned long) string_size);
> +		  /* Then strncpy takes care of any padding for us.  */
> +		  strncpy (section.s_name, s_name_buf, SCNNMLEN);
> +		}
> +	      else
> +#ifdef COFF_WITH_PE
> +		{
> +		  /* PE use a base 64 encoding for long section names whose
> +		     index is very large.  But contrary to RFC 4648, there is
> +		     no padding: 6 characters must be generated.  */
> +		  static const char base64[] =
> +		    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
> +		    "abcdefghijklmnopqrstuvwxyz"
> +		    "0123456789+/";
> +		  unsigned long off = string_size;
> +		  unsigned i;
> +
> +		  section.s_name[0] = '/';
> +		  section.s_name[1] = '/';
> +		  for (i = SCNNMLEN - 1; i >=2; i--)

Repeat of earlier nit: Please insert a blank between >= and 2.

Jan

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] pe/coff - add support for base64 encoded long section names
  2023-05-24  6:29     ` Jan Beulich
@ 2023-05-24 17:33       ` Tristan Gingold
  2023-05-25 10:24         ` Nick Clifton
  0 siblings, 1 reply; 7+ messages in thread
From: Tristan Gingold @ 2023-05-24 17:33 UTC (permalink / raw)
  To: Jan Beulich, Tristan Gingold via Binutils; +Cc: Jose E. Marchesi

Hello,

On 24/05/2023 08:29, Jan Beulich wrote:
> On 24.05.2023 07:48, Tristan Gingold via Binutils wrote:
>> I have adjusted the comments to clarify.  Is it OK ?
> 
> With the earlier nit addressed (I'll repeat it below), yes. But please
> allow a little bit of time for in particular Nick to raise last minute
> comments, as he had looked at the earlier version of the patch. Plus ...

Oops, sorry.  I missed your note about the nit.  Now fixed.  And thank 
you for the reminder.  I have also fixed the email address in the CL.

Tristan.

2023-05-24  Tristan Gingold  <tgingold@free.fr>

	PR 30444
	* coffcode.h (coff_write_object_contents): Handle base64 encoding
	on PE.  Also check for too large string table.
	* coffgen.c (extract_long_section_name): New function extracted
	from ...
	(make_a_section_from_file): ... here.  Add support for base64
	long section names.
	(decode_base64): New function.

diff --git a/bfd/coffcode.h b/bfd/coffcode.h
index 974c8ad9854..780b669fe9d 100644
--- a/bfd/coffcode.h
+++ b/bfd/coffcode.h
@@ -3625,18 +3625,55 @@ coff_write_object_contents (bfd * abfd)
  	  len = strlen (current->name);
  	  if (len > SCNNMLEN)
  	    {
-	      /* The s_name field is defined to be NUL-padded but need not be
-		 NUL-terminated.  We use a temporary buffer so that we can still
-		 sprintf all eight chars without splatting a terminating NUL
-		 over the first byte of the following member (s_paddr).  */
-	      /* PR 21096: The +20 is to stop a bogus warning from gcc7 about
-		 a possible buffer overflow.  */
-	      char s_name_buf[SCNNMLEN + 1 + 20];

  	      /* An inherent limitation of the /nnnnnnn notation used to indicate
  		 the offset of the long name in the string table is that we
  		 cannot address entries beyone the ten million byte boundary.  */
-	      if (string_size >= 10000000)
+	      if (string_size < 10000000)
+		{
+		  /* The s_name field is defined to be NUL-padded but need not
+		     be NUL-terminated.  We use a temporary buffer so that we
+		     can still sprintf all eight chars without splatting a
+		     terminating NUL over the first byte of the following
+		     member (s_paddr).  */
+		  /* PR 21096: The +20 is to stop a bogus warning from gcc7
+		     about a possible buffer overflow.  */
+		  char s_name_buf[SCNNMLEN + 1 + 20];
+
+		  /* We do not need to use snprintf here as we have already
+		     verified that string_size is not too big, plus we have
+		     an overlarge buffer, just in case.  */
+		  sprintf (s_name_buf, "/%lu", (unsigned long) string_size);
+		  /* Then strncpy takes care of any padding for us.  */
+		  strncpy (section.s_name, s_name_buf, SCNNMLEN);
+		}
+	      else
+#ifdef COFF_WITH_PE
+		{
+		  /* PE use a base 64 encoding for long section names whose
+		     index is very large.  But contrary to RFC 4648, there is
+		     no padding: 6 characters must be generated.  */
+		  static const char base64[] =
+		    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+		    "abcdefghijklmnopqrstuvwxyz"
+		    "0123456789+/";
+		  unsigned long off = string_size;
+		  unsigned i;
+
+		  section.s_name[0] = '/';
+		  section.s_name[1] = '/';
+		  for (i = SCNNMLEN - 1; i >= 2; i--)
+		    {
+		      section.s_name[i] = base64[off & 0x3f];
+		      off >>= 6;
+		    }
+		}
+#endif
+	      if (string_size > 0xffffffffUL - (len + 1)
+#ifndef COFF_WITH_PE
+		  || string_size >= 10000000
+#endif
+		  )
  		{
  		  bfd_set_error (bfd_error_file_too_big);
  		  _bfd_error_handler
@@ -3646,12 +3683,6 @@ coff_write_object_contents (bfd * abfd)
  		  return false;
  		}

-	      /* We do not need to use snprintf here as we have already verfied
-		 that string_size is not too big, plus we have an overlarge
-		 buffer, just in case.  */
-	      sprintf (s_name_buf, "/%lu", (unsigned long) string_size);
-	      /* Then strncpy takes care of any padding for us.  */
-	      strncpy (section.s_name, s_name_buf, SCNNMLEN);
  	      string_size += len + 1;
  	      long_section_names = true;
  	    }
diff --git a/bfd/coffgen.c b/bfd/coffgen.c
index ac936def566..d5cef273837 100644
--- a/bfd/coffgen.c
+++ b/bfd/coffgen.c
@@ -43,6 +43,69 @@
  #include "coff/internal.h"
  #include "libcoff.h"

+/* Extract a long section name at STRINDEX and copy it to the bfd objstack.
+   Return NULL in case of error.  */
+
+static char *
+extract_long_section_name(bfd *abfd, unsigned long strindex)
+{
+  const char *strings;
+  char *name;
+
+  strings = _bfd_coff_read_string_table (abfd);
+  if (strings == NULL)
+    return NULL;
+  if ((bfd_size_type)(strindex + 2) >= obj_coff_strings_len (abfd))
+    return NULL;
+  strings += strindex;
+  name = (char *) bfd_alloc (abfd, (bfd_size_type) strlen (strings) + 1);
+  if (name == NULL)
+    return NULL;
+  strcpy (name, strings);
+
+  return name;
+}
+
+/* Decode a base 64 coded string at STR of length LEN, and write the result
+   to RES.  Return true on success.
+   Return false in case of invalid character or overflow.  */
+
+static bool
+decode_base64 (const char *str, unsigned len, uint32_t *res)
+{
+  unsigned i;
+  uint32_t val;
+
+  val = 0;
+  for (i = 0; i < len; i++)
+    {
+      char c = str[i];
+      unsigned d;
+
+      if (c >= 'A' && c <= 'Z')
+	d = c - 'A';
+      else if (c >= 'a' && c <= 'z')
+	d = c - 'a' + 26;
+      else if (c >= '0' && c <= '9')
+	d = c - '0' + 52;
+      else if (c == '+')
+	d = 62;
+      else if (c == '/')
+	d = 63;
+      else
+	return false;
+
+      /* Check for overflow. */
+      if ((val >> 26) != 0)
+	return false;
+
+      val = (val << 6) + d;
+    }
+
+  *res = val;
+  return true;
+}
+
  /* Take a section header read from a coff file (in HOST byte order),
     and make a BFD "section" out of it.  This is used by ECOFF.  */

@@ -67,32 +130,48 @@ make_a_section_from_file (bfd *abfd,
    if (bfd_coff_set_long_section_names (abfd, 
bfd_coff_long_section_names (abfd))
        && hdr->s_name[0] == '/')
      {
-      char buf[SCNNMLEN];
-      long strindex;
-      char *p;
-      const char *strings;
-
        /* Flag that this BFD uses long names, even though the format might
  	 expect them to be off by default.  This won't directly affect the
  	 format of any output BFD created from this one, but the information
  	 can be used to decide what to do.  */
        bfd_coff_set_long_section_names (abfd, true);
-      memcpy (buf, hdr->s_name + 1, SCNNMLEN - 1);
-      buf[SCNNMLEN - 1] = '\0';
-      strindex = strtol (buf, &p, 10);
-      if (*p == '\0' && strindex >= 0)
+
+      if (hdr->s_name[1] == '/')
  	{
-	  strings = _bfd_coff_read_string_table (abfd);
-	  if (strings == NULL)
-	    return false;
-	  if ((bfd_size_type)(strindex + 2) >= obj_coff_strings_len (abfd))
+	  /* LLVM extension: the '/' is followed by another '/' and then by
+	     the index in the strtab encoded in base64 without NUL at the
+	     end.  */
+	  uint32_t strindex;
+
+	  /* Decode the index.  No overflow is expected as the string table
+	     length is at most 2^32 - 1 (the length is written on the first
+	     four bytes).
+	     Also, contrary to RFC 4648, all the characters must be decoded,
+	     there is no padding.  */
+	  if (!decode_base64 (hdr->s_name + 2, SCNNMLEN - 2, &strindex))
  	    return false;
-	  strings += strindex;
-	  name = (char *) bfd_alloc (abfd,
-				     (bfd_size_type) strlen (strings) + 1 + 1);
+
+	  name = extract_long_section_name (abfd, strindex);
  	  if (name == NULL)
  	    return false;
-	  strcpy (name, strings);
+	}
+      else
+	{
+	  /* PE classic long section name.  The '/' is followed by the index
+	     in the strtab.  The index is formatted as a decimal string.  */
+	  char buf[SCNNMLEN];
+	  long strindex;
+	  char *p;
+
+	  memcpy (buf, hdr->s_name + 1, SCNNMLEN - 1);
+	  buf[SCNNMLEN - 1] = '\0';
+	  strindex = strtol (buf, &p, 10);
+	  if (*p == '\0' && strindex >= 0)
+	    {
+	      name = extract_long_section_name (abfd, strindex);
+	      if (name == NULL)
+		return false;
+	    }
  	}
      }


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] pe/coff - add support for base64 encoded long section names
  2023-05-24 17:33       ` Tristan Gingold
@ 2023-05-25 10:24         ` Nick Clifton
  0 siblings, 0 replies; 7+ messages in thread
From: Nick Clifton @ 2023-05-25 10:24 UTC (permalink / raw)
  To: Tristan Gingold, Jan Beulich, Tristan Gingold via Binutils
  Cc: Jose E. Marchesi

Hi Tristan,


> 2023-05-24  Tristan Gingold  <tgingold@free.fr>
> 
>      PR 30444
>      * coffcode.h (coff_write_object_contents): Handle base64 encoding
>      on PE.  Also check for too large string table.
>      * coffgen.c (extract_long_section_name): New function extracted
>      from ...
>      (make_a_section_from_file): ... here.  Add support for base64
>      long section names.
>      (decode_base64): New function.

This patch looks good to me, so "approved - please apply".

I suspect that the issue of NUL padded base64 values might come up in
the future, but if it does, it will have to be with a valid test case
and then we can properly determine what to do.

Cheers
   Nick


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2023-05-25 10:24 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-22 19:48 [PATCH v2] pe/coff - add support for base64 encoded long section names Tristan Gingold
2023-05-23  6:20 ` Jan Beulich
2023-05-23  9:17 ` Jose E. Marchesi
2023-05-24  5:48   ` Tristan Gingold
2023-05-24  6:29     ` Jan Beulich
2023-05-24 17:33       ` Tristan Gingold
2023-05-25 10:24         ` Nick Clifton

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).