From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <archer-commits-return-281-listarch-archer-commits=sourceware.org@sourceware.org>
Received: (qmail 15349 invoked by alias); 11 Jan 2009 23:01:57 -0000
Mailing-List: contact archer-commits-help@sourceware.org; run by ezmlm
Sender: <archer-commits@sourceware.org>
Precedence: bulk
List-Post: <mailto:archer-commits@sourceware.org>
List-Help: <mailto:archer-commits-help@sourceware.org>
List-Subscribe: <mailto:archer-commits-subscribe@sourceware.org>
Received: (qmail 14856 invoked by uid 306); 11 Jan 2009 23:01:55 -0000
Date: Sun, 11 Jan 2009 23:01:00 -0000
Message-ID: <20090111230155.14796.qmail@sourceware.org>
From: tromey@sourceware.org
To: archer-commits@sourceware.org
Subject: [SCM]  archer-tromey-charset: handle re-syncing correctly with wide characters
X-Git-Refname: refs/heads/archer-tromey-charset
X-Git-Reftype: branch
X-Git-Oldrev: 22fc6b9152aa37de270ed193ec2110091abcdc43
X-Git-Newrev: 01e993ba4395de28f42d001151b3c73a53c75ec2
X-SW-Source: 2009-q1/txt/msg00031.txt.bz2
List-Id: <archer-commits.sourceware.org>

The branch, archer-tromey-charset has been updated
       via  01e993ba4395de28f42d001151b3c73a53c75ec2 (commit)
       via  966fe7b3b535cc42d9ba371539fce670bf6b2ce5 (commit)
      from  22fc6b9152aa37de270ed193ec2110091abcdc43 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email.

- Log -----------------------------------------------------------------
commit 01e993ba4395de28f42d001151b3c73a53c75ec2
Author: Tom Tromey <tromey@redhat.com>
Date:   Sun Jan 11 16:01:17 2009 -0700

    handle re-syncing correctly with wide characters

commit 966fe7b3b535cc42d9ba371539fce670bf6b2ce5
Author: Tom Tromey <tromey@redhat.com>
Date:   Sun Jan 11 15:23:52 2009 -0700

    partial cleanup for UCN printing

-----------------------------------------------------------------------

Summary of changes:
 gdb/c-lang.c   |   19 +++++++++++-----
 gdb/charset.c  |   65 +++++++++++++++++++++++++-------------------------------
 gdb/charset.h  |    5 +++-
 gdb/printcmd.c |    4 ++-
 gdb/utils.c    |    2 +-
 5 files changed, 50 insertions(+), 45 deletions(-)

First 500 lines of diff:
diff --git a/gdb/c-lang.c b/gdb/c-lang.c
index 73caf9e..37f2bf4 100644
--- a/gdb/c-lang.c
+++ b/gdb/c-lang.c
@@ -157,10 +157,17 @@ print_wchar (wchar_t w, mbstate_t *state, struct obstack *output,
 	  break;
 	default:
 	  {
-	    /* FIXME: this is not right.  */
+	    /* Emit a UCN.  This is most convenient since a UCN would
+	       be translated to the equivalent sequence in the target
+	       character set.  */
+#ifdef __STDC_ISO_10646__
 	    char buf[20];
-	    sprintf (buf, "\\u%04x", (int) w);
+	    /* We print all 8 digits to avoid any possible ambiguity.  */
+	    sprintf (buf, "\\U%08lx", (long) w);
 	    obstack_grow_str (output, buf);
+#else
+#error "Not yet ported where __STDC_ISO_10646__ is not defined."
+#endif
 	  }
 	  break;
 	}
@@ -194,7 +201,7 @@ c_emit_char (int c, struct type *type, struct ui_file *stream, int quoter)
   make_cleanup_obstack_free (&output);
 
   convert_between_encodings (encoding, "wchar_t",
-			     buf, TYPE_LENGTH (type),
+			     buf, TYPE_LENGTH (type), TYPE_LENGTH (type),
 			     &host_wdata, translit_wchar);
 
   memset (&state, 0, sizeof (state));
@@ -309,7 +316,7 @@ c_printstr (struct ui_file *stream, struct type *type, const gdb_byte *string,
   cleanup = make_cleanup_obstack_free (&internal);
 
   convert_between_encodings (encoding, "wchar_t", string, length * width,
-			     &internal, translit_wchar);
+			     width, &internal, translit_wchar);
 
   new_len = obstack_object_size (&internal) / sizeof (wchar_t);
   obj = (wchar_t *) obstack_base (&internal);
@@ -426,7 +433,7 @@ convert_ucn (char *p, char *limit, const char *dest_charset,
       result >>= 8;
     }
 
-  convert_between_encodings ("UCS-4BE", dest_charset, data, 4, output,
+  convert_between_encodings ("UCS-4BE", dest_charset, data, 4, 4, output,
 			     translit_none);
 
   return p;
@@ -550,7 +557,7 @@ parse_one_string (struct obstack *output, char *data, int len,
       /* If we saw a run of characters, convert them all.  */
       if (p > data)
 	convert_between_encodings (host_charset (), dest_charset,
-				   data, p - data, output, translit_none);
+				   data, p - data, 1, output, translit_none);
       /* If we saw an escape, convert it.  */
       if (p < limit)
 	p = convert_escape (type, dest_charset, p, limit, output);
diff --git a/gdb/charset.c b/gdb/charset.c
index fcf54df..74dbec6 100644
--- a/gdb/charset.c
+++ b/gdb/charset.c
@@ -318,7 +318,7 @@ cleanup_iconv (void *p)
 void
 convert_between_encodings (const char *from, const char *to,
 			   const gdb_byte *bytes, unsigned int num_bytes,
-			   struct obstack *output,
+			   int width, struct obstack *output,
 			   enum transliterations translit)
 {
   iconv_t desc;
@@ -370,43 +370,36 @@ convert_between_encodings (const char *from, const char *to,
 	    {
 	    case EILSEQ:
 	      {
+		int i;
+
 		/* Invalid input sequence.  */
-		switch (translit)
-		  {
-		  case translit_wchar:
-		    {
-		      /* We emit an escape sequence for the byte, skip
-			 it, and try again.  */
-		      char hex[5];
-		      int i;
-
-		      /* Translit is only available when converting to
-			 the host wchar_t.  */
-		      gdb_assert (!strcmp (to, "wchar_t"));
-		      sprintf (hex, "\\x%02x", *inp);
-		      for (i = 0; hex[i]; ++i)
-			{
-			  wchar_t w = btowc (hex[i]);
-			  obstack_grow (output, &w, sizeof (wchar_t));
-			}
-
-		      ++inp;
-		    }
-		    break;
-		  case translit_char:
-		    {
-		      /* We emit an escape sequence for the byte, skip
-			 it, and try again.  */
-		      char hex[5];
-
-		      sprintf (hex, "\\x%02x", *inp);
-		      obstack_grow_str (output, hex);
-
-		      ++inp;
-		    }
-		    break;
-		  case translit_none:
+		if (translit == translit_none)
 		    error ("Could not convert character");
+
+		/* We emit escape sequence for the bytes, skip them,
+		   and try again.  */
+		for (i = 0; i < width; ++i)
+		  {
+		    char hex[5];
+
+		    sprintf (hex, "\\x%02x", *inp);
+		    if (translit == translit_wchar)
+		      {
+			int j;
+
+			for (j = 0; hex[j]; ++j)
+			  {
+			    wchar_t w = btowc (hex[j]);
+			    obstack_grow (output, &w, sizeof (wchar_t));
+			  }
+		      }
+		    else
+		      {
+			/* translit_char.  */
+			obstack_grow_str (output, hex);
+		      }
+
+		    ++inp;
 		  }
 	      }
 	      break;
diff --git a/gdb/charset.h b/gdb/charset.h
index 6802753..228e4ff 100644
--- a/gdb/charset.h
+++ b/gdb/charset.h
@@ -56,13 +56,16 @@ enum transliterations
    BYTES holds the bytes to convert; this is assumed to be characters
    in the target encoding.
    NUM_BYTES is the number of bytes.
+   WIDTH is the width of a character from the FROM charset, in bytes.
+   For a variable width encoding, WIDTH should be the size of a "base
+   character".
    OUTPUT is an obstack where the converted data is written.  The
    caller is responsible for initializing the obstack, and for
    destroying the obstack should an error occur.
    TRANSLIT specifies how invalid conversions should be handled.  */
 void convert_between_encodings (const char *from, const char *to,
 				const gdb_byte *bytes, unsigned int num_bytes,
-				struct obstack *output,
+				int width, struct obstack *output,
 				enum transliterations translit);
 
 
diff --git a/gdb/printcmd.c b/gdb/printcmd.c
index 5c5db23..002aa92 100644
--- a/gdb/printcmd.c
+++ b/gdb/printcmd.c
@@ -2173,7 +2173,8 @@ printf_command (char *arg, int from_tty)
 
 	      convert_between_encodings (target_wide_charset (),
 					 host_charset (),
-					 str, j, &output, translit_char);
+					 str, j, wcwidth,
+					 &output, translit_char);
 	      obstack_grow_str0 (&output, "");
 
 	      printf_filtered (current_substring, obstack_base (&output));
@@ -2201,6 +2202,7 @@ printf_command (char *arg, int from_tty)
 	      convert_between_encodings (target_wide_charset (),
 					 host_charset (),
 					 bytes, TYPE_LENGTH (valtype),
+					 TYPE_LENGTH (valtype),
 					 &output, translit_char);
 	      obstack_grow_str0 (&output, "");
 
diff --git a/gdb/utils.c b/gdb/utils.c
index c84a007..2b3b5d4 100644
--- a/gdb/utils.c
+++ b/gdb/utils.c
@@ -1504,7 +1504,7 @@ host_char_to_target (int c, int *target_c)
   cleanups = make_cleanup_obstack_free (&host_data);
 
   convert_between_encodings (target_charset (), host_charset (),
-			     &the_char, 1, &host_data, translit_none);
+			     &the_char, 1, 1, &host_data, translit_none);
 
   if (obstack_object_size (&host_data) == 1)
     {


hooks/post-receive
--
Repository for Project Archer.