From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 15349 invoked by alias); 11 Jan 2009 23:01:57 -0000 Mailing-List: contact archer-commits-help@sourceware.org; run by ezmlm Sender: Precedence: bulk List-Post: List-Help: List-Subscribe: Received: (qmail 14856 invoked by uid 306); 11 Jan 2009 23:01:55 -0000 Date: Sun, 11 Jan 2009 23:01:00 -0000 Message-ID: <20090111230155.14796.qmail@sourceware.org> From: tromey@sourceware.org To: archer-commits@sourceware.org Subject: [SCM] archer-tromey-charset: handle re-syncing correctly with wide characters X-Git-Refname: refs/heads/archer-tromey-charset X-Git-Reftype: branch X-Git-Oldrev: 22fc6b9152aa37de270ed193ec2110091abcdc43 X-Git-Newrev: 01e993ba4395de28f42d001151b3c73a53c75ec2 X-SW-Source: 2009-q1/txt/msg00031.txt.bz2 List-Id: The branch, archer-tromey-charset has been updated via 01e993ba4395de28f42d001151b3c73a53c75ec2 (commit) via 966fe7b3b535cc42d9ba371539fce670bf6b2ce5 (commit) from 22fc6b9152aa37de270ed193ec2110091abcdc43 (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email. - Log ----------------------------------------------------------------- commit 01e993ba4395de28f42d001151b3c73a53c75ec2 Author: Tom Tromey Date: Sun Jan 11 16:01:17 2009 -0700 handle re-syncing correctly with wide characters commit 966fe7b3b535cc42d9ba371539fce670bf6b2ce5 Author: Tom Tromey Date: Sun Jan 11 15:23:52 2009 -0700 partial cleanup for UCN printing ----------------------------------------------------------------------- Summary of changes: gdb/c-lang.c | 19 +++++++++++----- gdb/charset.c | 65 +++++++++++++++++++++++++------------------------------- gdb/charset.h | 5 +++- gdb/printcmd.c | 4 ++- gdb/utils.c | 2 +- 5 files changed, 50 insertions(+), 45 deletions(-) First 500 lines of diff: diff --git a/gdb/c-lang.c b/gdb/c-lang.c index 73caf9e..37f2bf4 100644 --- a/gdb/c-lang.c +++ b/gdb/c-lang.c @@ -157,10 +157,17 @@ print_wchar (wchar_t w, mbstate_t *state, struct obstack *output, break; default: { - /* FIXME: this is not right. */ + /* Emit a UCN. This is most convenient since a UCN would + be translated to the equivalent sequence in the target + character set. */ +#ifdef __STDC_ISO_10646__ char buf[20]; - sprintf (buf, "\\u%04x", (int) w); + /* We print all 8 digits to avoid any possible ambiguity. */ + sprintf (buf, "\\U%08lx", (long) w); obstack_grow_str (output, buf); +#else +#error "Not yet ported where __STDC_ISO_10646__ is not defined." +#endif } break; } @@ -194,7 +201,7 @@ c_emit_char (int c, struct type *type, struct ui_file *stream, int quoter) make_cleanup_obstack_free (&output); convert_between_encodings (encoding, "wchar_t", - buf, TYPE_LENGTH (type), + buf, TYPE_LENGTH (type), TYPE_LENGTH (type), &host_wdata, translit_wchar); memset (&state, 0, sizeof (state)); @@ -309,7 +316,7 @@ c_printstr (struct ui_file *stream, struct type *type, const gdb_byte *string, cleanup = make_cleanup_obstack_free (&internal); convert_between_encodings (encoding, "wchar_t", string, length * width, - &internal, translit_wchar); + width, &internal, translit_wchar); new_len = obstack_object_size (&internal) / sizeof (wchar_t); obj = (wchar_t *) obstack_base (&internal); @@ -426,7 +433,7 @@ convert_ucn (char *p, char *limit, const char *dest_charset, result >>= 8; } - convert_between_encodings ("UCS-4BE", dest_charset, data, 4, output, + convert_between_encodings ("UCS-4BE", dest_charset, data, 4, 4, output, translit_none); return p; @@ -550,7 +557,7 @@ parse_one_string (struct obstack *output, char *data, int len, /* If we saw a run of characters, convert them all. */ if (p > data) convert_between_encodings (host_charset (), dest_charset, - data, p - data, output, translit_none); + data, p - data, 1, output, translit_none); /* If we saw an escape, convert it. */ if (p < limit) p = convert_escape (type, dest_charset, p, limit, output); diff --git a/gdb/charset.c b/gdb/charset.c index fcf54df..74dbec6 100644 --- a/gdb/charset.c +++ b/gdb/charset.c @@ -318,7 +318,7 @@ cleanup_iconv (void *p) void convert_between_encodings (const char *from, const char *to, const gdb_byte *bytes, unsigned int num_bytes, - struct obstack *output, + int width, struct obstack *output, enum transliterations translit) { iconv_t desc; @@ -370,43 +370,36 @@ convert_between_encodings (const char *from, const char *to, { case EILSEQ: { + int i; + /* Invalid input sequence. */ - switch (translit) - { - case translit_wchar: - { - /* We emit an escape sequence for the byte, skip - it, and try again. */ - char hex[5]; - int i; - - /* Translit is only available when converting to - the host wchar_t. */ - gdb_assert (!strcmp (to, "wchar_t")); - sprintf (hex, "\\x%02x", *inp); - for (i = 0; hex[i]; ++i) - { - wchar_t w = btowc (hex[i]); - obstack_grow (output, &w, sizeof (wchar_t)); - } - - ++inp; - } - break; - case translit_char: - { - /* We emit an escape sequence for the byte, skip - it, and try again. */ - char hex[5]; - - sprintf (hex, "\\x%02x", *inp); - obstack_grow_str (output, hex); - - ++inp; - } - break; - case translit_none: + if (translit == translit_none) error ("Could not convert character"); + + /* We emit escape sequence for the bytes, skip them, + and try again. */ + for (i = 0; i < width; ++i) + { + char hex[5]; + + sprintf (hex, "\\x%02x", *inp); + if (translit == translit_wchar) + { + int j; + + for (j = 0; hex[j]; ++j) + { + wchar_t w = btowc (hex[j]); + obstack_grow (output, &w, sizeof (wchar_t)); + } + } + else + { + /* translit_char. */ + obstack_grow_str (output, hex); + } + + ++inp; } } break; diff --git a/gdb/charset.h b/gdb/charset.h index 6802753..228e4ff 100644 --- a/gdb/charset.h +++ b/gdb/charset.h @@ -56,13 +56,16 @@ enum transliterations BYTES holds the bytes to convert; this is assumed to be characters in the target encoding. NUM_BYTES is the number of bytes. + WIDTH is the width of a character from the FROM charset, in bytes. + For a variable width encoding, WIDTH should be the size of a "base + character". OUTPUT is an obstack where the converted data is written. The caller is responsible for initializing the obstack, and for destroying the obstack should an error occur. TRANSLIT specifies how invalid conversions should be handled. */ void convert_between_encodings (const char *from, const char *to, const gdb_byte *bytes, unsigned int num_bytes, - struct obstack *output, + int width, struct obstack *output, enum transliterations translit); diff --git a/gdb/printcmd.c b/gdb/printcmd.c index 5c5db23..002aa92 100644 --- a/gdb/printcmd.c +++ b/gdb/printcmd.c @@ -2173,7 +2173,8 @@ printf_command (char *arg, int from_tty) convert_between_encodings (target_wide_charset (), host_charset (), - str, j, &output, translit_char); + str, j, wcwidth, + &output, translit_char); obstack_grow_str0 (&output, ""); printf_filtered (current_substring, obstack_base (&output)); @@ -2201,6 +2202,7 @@ printf_command (char *arg, int from_tty) convert_between_encodings (target_wide_charset (), host_charset (), bytes, TYPE_LENGTH (valtype), + TYPE_LENGTH (valtype), &output, translit_char); obstack_grow_str0 (&output, ""); diff --git a/gdb/utils.c b/gdb/utils.c index c84a007..2b3b5d4 100644 --- a/gdb/utils.c +++ b/gdb/utils.c @@ -1504,7 +1504,7 @@ host_char_to_target (int c, int *target_c) cleanups = make_cleanup_obstack_free (&host_data); convert_between_encodings (target_charset (), host_charset (), - &the_char, 1, &host_data, translit_none); + &the_char, 1, 1, &host_data, translit_none); if (obstack_object_size (&host_data) == 1) { hooks/post-receive -- Repository for Project Archer.