public inbox for gdb-patches@sourceware.org
 help / color / mirror / Atom feed
* [patch] fix phony_iconv wide character support
@ 2015-12-31 16:48 Sandra Loosemore
  2016-01-12 14:58 ` Pedro Alves
  0 siblings, 1 reply; 4+ messages in thread
From: Sandra Loosemore @ 2015-12-31 16:48 UTC (permalink / raw)
  To: gdb-patches

[-- Attachment #1: Type: text/plain, Size: 1401 bytes --]

A while back I submitted a patch to skip gdb.base/wchar.exp tests when 
GDB is built with PHONY_ICONV enabled:

https://sourceware.org/ml/gdb-patches/2015-09/msg00388.html

The consensus from the discussion seemed to be that it would be better 
to fix the PHONY_ICONV support instead, so that's what this patch does.

I found a whole bunch of bugs in the existing code....

(1) phony_iconv_open had code to recognize "UTF-32BE" as a wide 
character set, but GDB_DEFAULT_TARGET_WIDE_CHARSET was set to 
"ISO-8859-1" instead, so the decoding wasn't getting triggered.

(2) phony_iconv wasn't advancing *inbuf properly after processing each 
4-byte group, plus it wasn't checking for output overflow.  I don't see 
how the old code could ever have worked even if you explicitly chose 
UTF-32BE.

(3) I was doing experiments on a little-endian target (nios2-linux-gnu), 
so I had to extend the conversion code to handle both endiannesses.

(4) On Windows host, the system default (narrow) charset is CP1252 
rather than ISO-8859-1; this affects the behavior of functions like 
isprint, and in particular what is printed for the character constant 
cent ('\242') used in the testcase.  The testcase already checks for 
CP1252 but GDB wasn't reporting the default correctly.

So, my patch fixes all these things and the gdb.base/wchar.exp tests now 
all PASS in this configuration.  OK to commit?

-Sandra


[-- Attachment #2: iconv.log --]
[-- Type: text/x-log, Size: 533 bytes --]

2015-12-31  Sandra Loosemore  <sandra@codesourcery.com>

	gdb/
	* charset.c [PHONY_ICONV] (GDB_DEFAULT_HOST_CHARSET):
	Conditionalize for Windows host.
	(GDB_DEFAULT_TARGET_CHARSET): Match GDB_DEFAULT_HOST_CHARSET.
	(GDB_DEFAULT_TARGET_WIDE_CHARSET): Use UTF-32.
	(phony_iconv_open): Handle both UTF-32 endiannesses.
	(phony_iconv): Likewise.  Check for output overflow and clean up
	out-of-input cases.  Correct adjustment to input buffer pointer.
	(set_be_le_names) [PHONY_ICONV]: Use hard-wired names to match
	phony_iconv_open.


[-- Attachment #3: iconv.patch --]
[-- Type: text/x-patch, Size: 3646 bytes --]

diff --git a/gdb/charset.c b/gdb/charset.c
index ee1ae20..82e5644 100644
--- a/gdb/charset.c
+++ b/gdb/charset.c
@@ -77,9 +77,13 @@
    arrange for there to be a single available character set.  */
 
 #undef GDB_DEFAULT_HOST_CHARSET
+#ifdef USE_WIN32API
+#define GDB_DEFAULT_HOST_CHARSET "CP1252"
+#else
 #define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1"
-#define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
-#define GDB_DEFAULT_TARGET_WIDE_CHARSET "ISO-8859-1"
+#endif
+#define GDB_DEFAULT_TARGET_CHARSET GDB_DEFAULT_HOST_CHARSET 
+#define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32"
 #undef DEFAULT_CHARSET_NAMES
 #define DEFAULT_CHARSET_NAMES GDB_DEFAULT_HOST_CHARSET ,
 
@@ -95,20 +99,27 @@
 #undef ICONV_CONST
 #define ICONV_CONST const
 
+/* We allow conversions from UTF-32, wchar_t, and the host charset.
+   We allow conversions to wchar_t and the host charset
+   Return 1 if we are converting from UTF-32BE, 2 if from UTF32-LE,
+   0 otherwise.  This is  used as a flag in calls to iconv.  */
+
 static iconv_t
 phony_iconv_open (const char *to, const char *from)
 {
-  /* We allow conversions from UTF-32BE, wchar_t, and the host charset.
-     We allow conversions to wchar_t and the host charset.  */
-  if (strcmp (from, "UTF-32BE") && strcmp (from, "wchar_t")
-      && strcmp (from, GDB_DEFAULT_HOST_CHARSET))
-    return -1;
   if (strcmp (to, "wchar_t") && strcmp (to, GDB_DEFAULT_HOST_CHARSET))
     return -1;
 
-  /* Return 1 if we are converting from UTF-32BE, 0 otherwise.  This is
-     used as a flag in calls to iconv.  */
-  return !strcmp (from, "UTF-32BE");
+  if (!strcmp (from, "UTF-32BE") || !strcmp (from, "UTF-32"))
+    return 1;
+
+  if (!strcmp (from, "UTF-32LE"))
+    return 2;
+
+  if (strcmp (from, "wchar_t") && strcmp (from, GDB_DEFAULT_HOST_CHARSET))
+    return -1;
+
+  return 0;
 }
 
 static int
@@ -130,8 +141,17 @@ phony_iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft,
 
 	  for (j = 0; j < 4; ++j)
 	    {
-	      c <<= 8;
-	      c += (*inbuf)[j] & 0xff;
+	      if (utf_flag == 1)
+		{
+		  /* Big-endian.  */
+		  c <<= 8;
+		  c += (*inbuf)[j] & 0xff;
+		}
+	      else
+		{
+		  /* Little-endian.  */
+		  c += ((*inbuf)[j] & 0xff) << (8 * j);
+		}
 	    }
 
 	  if (c >= 256)
@@ -139,15 +159,21 @@ phony_iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft,
 	      errno = EILSEQ;
 	      return -1;
 	    }
+	  if (*outbytesleft < 1)
+	    {
+	      errno = E2BIG;
+	      return -1;
+	    }
 	  **outbuf = c & 0xff;
 	  ++*outbuf;
 	  --*outbytesleft;
 
-	  ++*inbuf;
+	  *inbuf += 4;
 	  *inbytesleft -= 4;
 	}
-      if (*inbytesleft < 4)
+      if (*inbytesleft)
 	{
+	  /* Partial sequence on input.  */
 	  errno = EINVAL;
 	  return -1;
 	}
@@ -165,12 +191,11 @@ phony_iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft,
       *outbuf += amt;
       *inbytesleft -= amt;
       *outbytesleft -= amt;
-    }
-
-  if (*inbytesleft)
-    {
-      errno = E2BIG;
-      return -1;
+      if (*inbytesleft)
+	{
+	  errno = E2BIG;
+	  return -1;
+	}
     }
 
   /* The number of non-reversible conversions -- but they were all
@@ -290,6 +315,10 @@ set_be_le_names (struct gdbarch *gdbarch)
     return;
   be_le_arch = gdbarch;
 
+#ifdef PHONY_ICONV
+  target_wide_charset_le_name = "UTF-32LE";
+  target_wide_charset_be_name = "UTF-32BE";
+#else
   target_wide_charset_le_name = NULL;
   target_wide_charset_be_name = NULL;
 
@@ -313,6 +342,7 @@ set_be_le_names (struct gdbarch *gdbarch)
 	    target_wide_charset_le_name = charset_enum[i];
 	}
     }
+# endif  /* PHONY_ICONV */
 }
 
 /* 'Set charset', 'set host-charset', 'set target-charset', 'set

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [patch] fix phony_iconv wide character support
  2015-12-31 16:48 [patch] fix phony_iconv wide character support Sandra Loosemore
@ 2016-01-12 14:58 ` Pedro Alves
  2016-01-15 20:45   ` Sandra Loosemore
  0 siblings, 1 reply; 4+ messages in thread
From: Pedro Alves @ 2016-01-12 14:58 UTC (permalink / raw)
  To: Sandra Loosemore, gdb-patches

On 12/31/2015 04:48 PM, Sandra Loosemore wrote:

> 
> diff --git a/gdb/charset.c b/gdb/charset.c
> index ee1ae20..82e5644 100644
> --- a/gdb/charset.c
> +++ b/gdb/charset.c
> @@ -77,9 +77,13 @@
>     arrange for there to be a single available character set.  */
>  
>  #undef GDB_DEFAULT_HOST_CHARSET
> +#ifdef USE_WIN32API
> +#define GDB_DEFAULT_HOST_CHARSET "CP1252"
> +#else
>  #define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1"
> -#define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
> -#define GDB_DEFAULT_TARGET_WIDE_CHARSET "ISO-8859-1"
> +#endif

Could you indent this, like:

#ifdef USE_WIN32API
# define GDB_DEFAULT_HOST_CHARSET "CP1252"
#else
# define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1"
#endif


> +#define GDB_DEFAULT_TARGET_CHARSET GDB_DEFAULT_HOST_CHARSET 
> +#define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32"
>  #undef DEFAULT_CHARSET_NAMES
>  #define DEFAULT_CHARSET_NAMES GDB_DEFAULT_HOST_CHARSET ,
>  
> @@ -95,20 +99,27 @@
>  #undef ICONV_CONST
>  #define ICONV_CONST const
>  
> +/* We allow conversions from UTF-32, wchar_t, and the host charset.
> +   We allow conversions to wchar_t and the host charset

Missing period.

> +   Return 1 if we are converting from UTF-32BE, 2 if from UTF32-LE,
> +   0 otherwise.  This is  used as a flag in calls to iconv.  */

Spurious double space after "This is".


> +
> +  return 0;
>  }
>  
>  static int
> @@ -130,8 +141,17 @@ phony_iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft,
>  
>  	  for (j = 0; j < 4; ++j)
>  	    {
> -	      c <<= 8;
> -	      c += (*inbuf)[j] & 0xff;
> +	      if (utf_flag == 1)
> +		{
> +		  /* Big-endian.  */
> +		  c <<= 8;
> +		  c += (*inbuf)[j] & 0xff;
> +		}
> +	      else
> +		{
> +		  /* Little-endian.  */
> +		  c += ((*inbuf)[j] & 0xff) << (8 * j);
> +		}
>  	    }


Isn't this the same as:

   enum bfd_endian endian = utf_flag == 1 ? BFD_ENDIAN_BIG : BFD_ENDIAN_LITTLE;
   extract_unsigned_integer (*inbuf, 4, endian);

?

>  
>    /* The number of non-reversible conversions -- but they were all
> @@ -290,6 +315,10 @@ set_be_le_names (struct gdbarch *gdbarch)
>      return;
>    be_le_arch = gdbarch;
>  
> +#ifdef PHONY_ICONV
> +  target_wide_charset_le_name = "UTF-32LE";
> +  target_wide_charset_be_name = "UTF-32BE";
> +#else
>    target_wide_charset_le_name = NULL;
>    target_wide_charset_be_name = NULL;
>  
> @@ -313,6 +342,7 @@ set_be_le_names (struct gdbarch *gdbarch)
>  	    target_wide_charset_le_name = charset_enum[i];
>  	}
>      }
> +# endif  /* PHONY_ICONV */

This change isn't obvious to me.  You wrote in the ChangeLog:

> 	(set_be_le_names) [PHONY_ICONV]: Use hard-wired names to match
> 	phony_iconv_open.

But I think this "to match" comment should be here in the sources.

>  }
>  
>  /* 'Set charset', 'set host-charset', 'set target-charset', 'set
> 

Otherwise LGTM.

Thanks,
Pedro Alves

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [patch] fix phony_iconv wide character support
  2016-01-12 14:58 ` Pedro Alves
@ 2016-01-15 20:45   ` Sandra Loosemore
  2016-01-15 21:35     ` Pedro Alves
  0 siblings, 1 reply; 4+ messages in thread
From: Sandra Loosemore @ 2016-01-15 20:45 UTC (permalink / raw)
  To: Pedro Alves, gdb-patches

[-- Attachment #1: Type: text/plain, Size: 1607 bytes --]

On 01/12/2016 07:58 AM, Pedro Alves wrote:
> [snip]
>
> Could you indent this, like:
>
> #ifdef USE_WIN32API
> # define GDB_DEFAULT_HOST_CHARSET "CP1252"
> #else
> # define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1"
> #endif

Done.

>> +/* We allow conversions from UTF-32, wchar_t, and the host charset.
>> +   We allow conversions to wchar_t and the host charset
>
> Missing period.
>
>> +   Return 1 if we are converting from UTF-32BE, 2 if from UTF32-LE,
>> +   0 otherwise.  This is  used as a flag in calls to iconv.  */
>
> Spurious double space after "This is".

Both fixed now.

> Isn't this the same as:
>
>     enum bfd_endian endian = utf_flag == 1 ? BFD_ENDIAN_BIG : BFD_ENDIAN_LITTLE;
>     extract_unsigned_integer (*inbuf, 4, endian);
>
> ?

That looks much nicer.  :-)  Fixed.

>> @@ -290,6 +315,10 @@ set_be_le_names (struct gdbarch *gdbarch)
>>       return;
>>     be_le_arch = gdbarch;
>>
>> +#ifdef PHONY_ICONV
>> +  target_wide_charset_le_name = "UTF-32LE";
>> +  target_wide_charset_be_name = "UTF-32BE";
>> +#else
>>     target_wide_charset_le_name = NULL;
>>     target_wide_charset_be_name = NULL;
>>
>> @@ -313,6 +342,7 @@ set_be_le_names (struct gdbarch *gdbarch)
>>   	    target_wide_charset_le_name = charset_enum[i];
>>   	}
>>       }
>> +# endif  /* PHONY_ICONV */
>
> This change isn't obvious to me.  You wrote in the ChangeLog:
>
>> 	(set_be_le_names) [PHONY_ICONV]: Use hard-wired names to match
>> 	phony_iconv_open.
>
> But I think this "to match" comment should be here in the sources.

Done.

> Otherwise LGTM.

New patch attached.  Is this one OK to commit?

-Sandra


[-- Attachment #2: iconv.log --]
[-- Type: text/x-log, Size: 533 bytes --]

2016-01-15  Sandra Loosemore  <sandra@codesourcery.com>

	gdb/
	* charset.c [PHONY_ICONV] (GDB_DEFAULT_HOST_CHARSET):
	Conditionalize for Windows host.
	(GDB_DEFAULT_TARGET_CHARSET): Match GDB_DEFAULT_HOST_CHARSET.
	(GDB_DEFAULT_TARGET_WIDE_CHARSET): Use UTF-32.
	(phony_iconv_open): Handle both UTF-32 endiannesses.
	(phony_iconv): Likewise.  Check for output overflow and clean up
	out-of-input cases.  Correct adjustment to input buffer pointer.
	(set_be_le_names) [PHONY_ICONV]: Use hard-wired names to match
	phony_iconv_open.


[-- Attachment #3: iconv.patch --]
[-- Type: text/x-patch, Size: 3764 bytes --]

diff --git a/gdb/charset.c b/gdb/charset.c
index ee1ae20..4e1c168 100644
--- a/gdb/charset.c
+++ b/gdb/charset.c
@@ -77,9 +77,13 @@
    arrange for there to be a single available character set.  */
 
 #undef GDB_DEFAULT_HOST_CHARSET
-#define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1"
-#define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
-#define GDB_DEFAULT_TARGET_WIDE_CHARSET "ISO-8859-1"
+#ifdef USE_WIN32API
+# define GDB_DEFAULT_HOST_CHARSET "CP1252"
+#else
+# define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1"
+#endif
+#define GDB_DEFAULT_TARGET_CHARSET GDB_DEFAULT_HOST_CHARSET 
+#define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32"
 #undef DEFAULT_CHARSET_NAMES
 #define DEFAULT_CHARSET_NAMES GDB_DEFAULT_HOST_CHARSET ,
 
@@ -95,20 +99,27 @@
 #undef ICONV_CONST
 #define ICONV_CONST const
 
+/* We allow conversions from UTF-32, wchar_t, and the host charset.
+   We allow conversions to wchar_t and the host charset.
+   Return 1 if we are converting from UTF-32BE, 2 if from UTF32-LE,
+   0 otherwise.  This is used as a flag in calls to iconv.  */
+
 static iconv_t
 phony_iconv_open (const char *to, const char *from)
 {
-  /* We allow conversions from UTF-32BE, wchar_t, and the host charset.
-     We allow conversions to wchar_t and the host charset.  */
-  if (strcmp (from, "UTF-32BE") && strcmp (from, "wchar_t")
-      && strcmp (from, GDB_DEFAULT_HOST_CHARSET))
-    return -1;
   if (strcmp (to, "wchar_t") && strcmp (to, GDB_DEFAULT_HOST_CHARSET))
     return -1;
 
-  /* Return 1 if we are converting from UTF-32BE, 0 otherwise.  This is
-     used as a flag in calls to iconv.  */
-  return !strcmp (from, "UTF-32BE");
+  if (!strcmp (from, "UTF-32BE") || !strcmp (from, "UTF-32"))
+    return 1;
+
+  if (!strcmp (from, "UTF-32LE"))
+    return 2;
+
+  if (strcmp (from, "wchar_t") && strcmp (from, GDB_DEFAULT_HOST_CHARSET))
+    return -1;
+
+  return 0;
 }
 
 static int
@@ -123,31 +134,33 @@ phony_iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft,
 {
   if (utf_flag)
     {
+      enum bfd_endian endian
+	= utf_flag == 1 ? BFD_ENDIAN_BIG : BFD_ENDIAN_LITTLE;
       while (*inbytesleft >= 4)
 	{
-	  size_t j;
-	  unsigned long c = 0;
-
-	  for (j = 0; j < 4; ++j)
-	    {
-	      c <<= 8;
-	      c += (*inbuf)[j] & 0xff;
-	    }
+	  unsigned long c
+	    = extract_unsigned_integer ((const gdb_byte *)*inbuf, 4, endian);
 
 	  if (c >= 256)
 	    {
 	      errno = EILSEQ;
 	      return -1;
 	    }
+	  if (*outbytesleft < 1)
+	    {
+	      errno = E2BIG;
+	      return -1;
+	    }
 	  **outbuf = c & 0xff;
 	  ++*outbuf;
 	  --*outbytesleft;
 
-	  ++*inbuf;
+	  *inbuf += 4;
 	  *inbytesleft -= 4;
 	}
-      if (*inbytesleft < 4)
+      if (*inbytesleft)
 	{
+	  /* Partial sequence on input.  */
 	  errno = EINVAL;
 	  return -1;
 	}
@@ -165,12 +178,11 @@ phony_iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft,
       *outbuf += amt;
       *inbytesleft -= amt;
       *outbytesleft -= amt;
-    }
-
-  if (*inbytesleft)
-    {
-      errno = E2BIG;
-      return -1;
+      if (*inbytesleft)
+	{
+	  errno = E2BIG;
+	  return -1;
+	}
     }
 
   /* The number of non-reversible conversions -- but they were all
@@ -290,6 +302,11 @@ set_be_le_names (struct gdbarch *gdbarch)
     return;
   be_le_arch = gdbarch;
 
+#ifdef PHONY_ICONV
+  /* Match the wide charset names recognized by phony_iconv_open.  */
+  target_wide_charset_le_name = "UTF-32LE";
+  target_wide_charset_be_name = "UTF-32BE";
+#else
   target_wide_charset_le_name = NULL;
   target_wide_charset_be_name = NULL;
 
@@ -313,6 +330,7 @@ set_be_le_names (struct gdbarch *gdbarch)
 	    target_wide_charset_le_name = charset_enum[i];
 	}
     }
+# endif  /* PHONY_ICONV */
 }
 
 /* 'Set charset', 'set host-charset', 'set target-charset', 'set

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [patch] fix phony_iconv wide character support
  2016-01-15 20:45   ` Sandra Loosemore
@ 2016-01-15 21:35     ` Pedro Alves
  0 siblings, 0 replies; 4+ messages in thread
From: Pedro Alves @ 2016-01-15 21:35 UTC (permalink / raw)
  To: Sandra Loosemore, gdb-patches

On 01/15/2016 08:45 PM, Sandra Loosemore wrote:

> New patch attached.  Is this one OK to commit?

OK.

Thanks,
Pedro Alves

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2016-01-15 21:35 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-12-31 16:48 [patch] fix phony_iconv wide character support Sandra Loosemore
2016-01-12 14:58 ` Pedro Alves
2016-01-15 20:45   ` Sandra Loosemore
2016-01-15 21:35     ` Pedro Alves

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).