[SCM] archer-tromey-charset: handle re-syncing correctly with wide characters

public inbox for archer-commits@sourceware.org
help / color / mirror / Atom feed

* [SCM]  archer-tromey-charset: handle re-syncing correctly with wide characters
@ 2009-01-11 23:01 tromey
  0 siblings, 0 replies; only message in thread
From: tromey @ 2009-01-11 23:01 UTC (permalink / raw)
  To: archer-commits

The branch, archer-tromey-charset has been updated
       via  01e993ba4395de28f42d001151b3c73a53c75ec2 (commit)
       via  966fe7b3b535cc42d9ba371539fce670bf6b2ce5 (commit)
      from  22fc6b9152aa37de270ed193ec2110091abcdc43 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email.

- Log -----------------------------------------------------------------
commit 01e993ba4395de28f42d001151b3c73a53c75ec2
Author: Tom Tromey <tromey@redhat.com>
Date:   Sun Jan 11 16:01:17 2009 -0700

    handle re-syncing correctly with wide characters

commit 966fe7b3b535cc42d9ba371539fce670bf6b2ce5
Author: Tom Tromey <tromey@redhat.com>
Date:   Sun Jan 11 15:23:52 2009 -0700

    partial cleanup for UCN printing

-----------------------------------------------------------------------

Summary of changes:
 gdb/c-lang.c   |   19 +++++++++++-----
 gdb/charset.c  |   65 +++++++++++++++++++++++++-------------------------------
 gdb/charset.h  |    5 +++-
 gdb/printcmd.c |    4 ++-
 gdb/utils.c    |    2 +-
 5 files changed, 50 insertions(+), 45 deletions(-)

First 500 lines of diff:
diff --git a/gdb/c-lang.c b/gdb/c-lang.c
index 73caf9e..37f2bf4 100644
--- a/gdb/c-lang.c
+++ b/gdb/c-lang.c
@@ -157,10 +157,17 @@ print_wchar (wchar_t w, mbstate_t *state, struct obstack *output,
 	  break;
 	default:
 	  {
-	    /* FIXME: this is not right.  */
+	    /* Emit a UCN.  This is most convenient since a UCN would
+	       be translated to the equivalent sequence in the target
+	       character set.  */
+#ifdef __STDC_ISO_10646__
 	    char buf[20];
-	    sprintf (buf, "\\u%04x", (int) w);
+	    /* We print all 8 digits to avoid any possible ambiguity.  */
+	    sprintf (buf, "\\U%08lx", (long) w);
 	    obstack_grow_str (output, buf);
+#else
+#error "Not yet ported where __STDC_ISO_10646__ is not defined."
+#endif
 	  }
 	  break;
 	}
@@ -194,7 +201,7 @@ c_emit_char (int c, struct type *type, struct ui_file *stream, int quoter)
   make_cleanup_obstack_free (&output);
 
   convert_between_encodings (encoding, "wchar_t",
-			     buf, TYPE_LENGTH (type),
+			     buf, TYPE_LENGTH (type), TYPE_LENGTH (type),
 			     &host_wdata, translit_wchar);
 
   memset (&state, 0, sizeof (state));
@@ -309,7 +316,7 @@ c_printstr (struct ui_file *stream, struct type *type, const gdb_byte *string,
   cleanup = make_cleanup_obstack_free (&internal);
 
   convert_between_encodings (encoding, "wchar_t", string, length * width,
-			     &internal, translit_wchar);
+			     width, &internal, translit_wchar);
 
   new_len = obstack_object_size (&internal) / sizeof (wchar_t);
   obj = (wchar_t *) obstack_base (&internal);
@@ -426,7 +433,7 @@ convert_ucn (char *p, char *limit, const char *dest_charset,
       result >>= 8;
     }
 
-  convert_between_encodings ("UCS-4BE", dest_charset, data, 4, output,
+  convert_between_encodings ("UCS-4BE", dest_charset, data, 4, 4, output,
 			     translit_none);
 
   return p;
@@ -550,7 +557,7 @@ parse_one_string (struct obstack *output, char *data, int len,
       /* If we saw a run of characters, convert them all.  */
       if (p > data)
 	convert_between_encodings (host_charset (), dest_charset,
-				   data, p - data, output, translit_none);
+				   data, p - data, 1, output, translit_none);
       /* If we saw an escape, convert it.  */
       if (p < limit)
 	p = convert_escape (type, dest_charset, p, limit, output);
diff --git a/gdb/charset.c b/gdb/charset.c
index fcf54df..74dbec6 100644
--- a/gdb/charset.c
+++ b/gdb/charset.c
@@ -318,7 +318,7 @@ cleanup_iconv (void *p)
 void
 convert_between_encodings (const char *from, const char *to,
 			   const gdb_byte *bytes, unsigned int num_bytes,
-			   struct obstack *output,
+			   int width, struct obstack *output,
 			   enum transliterations translit)
 {
   iconv_t desc;
@@ -370,43 +370,36 @@ convert_between_encodings (const char *from, const char *to,
 	    {
 	    case EILSEQ:
 	      {
+		int i;
+
 		/* Invalid input sequence.  */
-		switch (translit)
-		  {
-		  case translit_wchar:
-		    {
-		      /* We emit an escape sequence for the byte, skip
-			 it, and try again.  */
-		      char hex[5];
-		      int i;
-
-		      /* Translit is only available when converting to
-			 the host wchar_t.  */
-		      gdb_assert (!strcmp (to, "wchar_t"));
-		      sprintf (hex, "\\x%02x", *inp);
-		      for (i = 0; hex[i]; ++i)
-			{
-			  wchar_t w = btowc (hex[i]);
-			  obstack_grow (output, &w, sizeof (wchar_t));
-			}
-
-		      ++inp;
-		    }
-		    break;
-		  case translit_char:
-		    {
-		      /* We emit an escape sequence for the byte, skip
-			 it, and try again.  */
-		      char hex[5];
-
-		      sprintf (hex, "\\x%02x", *inp);
-		      obstack_grow_str (output, hex);
-
-		      ++inp;
-		    }
-		    break;
-		  case translit_none:
+		if (translit == translit_none)
 		    error ("Could not convert character");
+
+		/* We emit escape sequence for the bytes, skip them,
+		   and try again.  */
+		for (i = 0; i < width; ++i)
+		  {
+		    char hex[5];
+
+		    sprintf (hex, "\\x%02x", *inp);
+		    if (translit == translit_wchar)
+		      {
+			int j;
+
+			for (j = 0; hex[j]; ++j)
+			  {
+			    wchar_t w = btowc (hex[j]);
+			    obstack_grow (output, &w, sizeof (wchar_t));
+			  }
+		      }
+		    else
+		      {
+			/* translit_char.  */
+			obstack_grow_str (output, hex);
+		      }
+
+		    ++inp;
 		  }
 	      }
 	      break;
diff --git a/gdb/charset.h b/gdb/charset.h
index 6802753..228e4ff 100644
--- a/gdb/charset.h
+++ b/gdb/charset.h
@@ -56,13 +56,16 @@ enum transliterations
    BYTES holds the bytes to convert; this is assumed to be characters
    in the target encoding.
    NUM_BYTES is the number of bytes.
+   WIDTH is the width of a character from the FROM charset, in bytes.
+   For a variable width encoding, WIDTH should be the size of a "base
+   character".
    OUTPUT is an obstack where the converted data is written.  The
    caller is responsible for initializing the obstack, and for
    destroying the obstack should an error occur.
    TRANSLIT specifies how invalid conversions should be handled.  */
 void convert_between_encodings (const char *from, const char *to,
 				const gdb_byte *bytes, unsigned int num_bytes,
-				struct obstack *output,
+				int width, struct obstack *output,
 				enum transliterations translit);
 
 \f
diff --git a/gdb/printcmd.c b/gdb/printcmd.c
index 5c5db23..002aa92 100644
--- a/gdb/printcmd.c
+++ b/gdb/printcmd.c
@@ -2173,7 +2173,8 @@ printf_command (char *arg, int from_tty)
 
 	      convert_between_encodings (target_wide_charset (),
 					 host_charset (),
-					 str, j, &output, translit_char);
+					 str, j, wcwidth,
+					 &output, translit_char);
 	      obstack_grow_str0 (&output, "");
 
 	      printf_filtered (current_substring, obstack_base (&output));
@@ -2201,6 +2202,7 @@ printf_command (char *arg, int from_tty)
 	      convert_between_encodings (target_wide_charset (),
 					 host_charset (),
 					 bytes, TYPE_LENGTH (valtype),
+					 TYPE_LENGTH (valtype),
 					 &output, translit_char);
 	      obstack_grow_str0 (&output, "");
 
diff --git a/gdb/utils.c b/gdb/utils.c
index c84a007..2b3b5d4 100644
--- a/gdb/utils.c
+++ b/gdb/utils.c
@@ -1504,7 +1504,7 @@ host_char_to_target (int c, int *target_c)
   cleanups = make_cleanup_obstack_free (&host_data);
 
   convert_between_encodings (target_charset (), host_charset (),
-			     &the_char, 1, &host_data, translit_none);
+			     &the_char, 1, 1, &host_data, translit_none);
 
   if (obstack_object_size (&host_data) == 1)
     {


hooks/post-receive
--
Repository for Project Archer.


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2009-01-11 23:01 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-01-11 23:01 [SCM] archer-tromey-charset: handle re-syncing correctly with wide characters tromey

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).