public inbox for archer-commits@sourceware.org
help / color / mirror / Atom feed
* [SCM] archer-tromey-charset: handle re-syncing correctly with wide characters
@ 2009-01-11 23:01 tromey
0 siblings, 0 replies; only message in thread
From: tromey @ 2009-01-11 23:01 UTC (permalink / raw)
To: archer-commits
The branch, archer-tromey-charset has been updated
via 01e993ba4395de28f42d001151b3c73a53c75ec2 (commit)
via 966fe7b3b535cc42d9ba371539fce670bf6b2ce5 (commit)
from 22fc6b9152aa37de270ed193ec2110091abcdc43 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email.
- Log -----------------------------------------------------------------
commit 01e993ba4395de28f42d001151b3c73a53c75ec2
Author: Tom Tromey <tromey@redhat.com>
Date: Sun Jan 11 16:01:17 2009 -0700
handle re-syncing correctly with wide characters
commit 966fe7b3b535cc42d9ba371539fce670bf6b2ce5
Author: Tom Tromey <tromey@redhat.com>
Date: Sun Jan 11 15:23:52 2009 -0700
partial cleanup for UCN printing
-----------------------------------------------------------------------
Summary of changes:
gdb/c-lang.c | 19 +++++++++++-----
gdb/charset.c | 65 +++++++++++++++++++++++++-------------------------------
gdb/charset.h | 5 +++-
gdb/printcmd.c | 4 ++-
gdb/utils.c | 2 +-
5 files changed, 50 insertions(+), 45 deletions(-)
First 500 lines of diff:
diff --git a/gdb/c-lang.c b/gdb/c-lang.c
index 73caf9e..37f2bf4 100644
--- a/gdb/c-lang.c
+++ b/gdb/c-lang.c
@@ -157,10 +157,17 @@ print_wchar (wchar_t w, mbstate_t *state, struct obstack *output,
break;
default:
{
- /* FIXME: this is not right. */
+ /* Emit a UCN. This is most convenient since a UCN would
+ be translated to the equivalent sequence in the target
+ character set. */
+#ifdef __STDC_ISO_10646__
char buf[20];
- sprintf (buf, "\\u%04x", (int) w);
+ /* We print all 8 digits to avoid any possible ambiguity. */
+ sprintf (buf, "\\U%08lx", (long) w);
obstack_grow_str (output, buf);
+#else
+#error "Not yet ported where __STDC_ISO_10646__ is not defined."
+#endif
}
break;
}
@@ -194,7 +201,7 @@ c_emit_char (int c, struct type *type, struct ui_file *stream, int quoter)
make_cleanup_obstack_free (&output);
convert_between_encodings (encoding, "wchar_t",
- buf, TYPE_LENGTH (type),
+ buf, TYPE_LENGTH (type), TYPE_LENGTH (type),
&host_wdata, translit_wchar);
memset (&state, 0, sizeof (state));
@@ -309,7 +316,7 @@ c_printstr (struct ui_file *stream, struct type *type, const gdb_byte *string,
cleanup = make_cleanup_obstack_free (&internal);
convert_between_encodings (encoding, "wchar_t", string, length * width,
- &internal, translit_wchar);
+ width, &internal, translit_wchar);
new_len = obstack_object_size (&internal) / sizeof (wchar_t);
obj = (wchar_t *) obstack_base (&internal);
@@ -426,7 +433,7 @@ convert_ucn (char *p, char *limit, const char *dest_charset,
result >>= 8;
}
- convert_between_encodings ("UCS-4BE", dest_charset, data, 4, output,
+ convert_between_encodings ("UCS-4BE", dest_charset, data, 4, 4, output,
translit_none);
return p;
@@ -550,7 +557,7 @@ parse_one_string (struct obstack *output, char *data, int len,
/* If we saw a run of characters, convert them all. */
if (p > data)
convert_between_encodings (host_charset (), dest_charset,
- data, p - data, output, translit_none);
+ data, p - data, 1, output, translit_none);
/* If we saw an escape, convert it. */
if (p < limit)
p = convert_escape (type, dest_charset, p, limit, output);
diff --git a/gdb/charset.c b/gdb/charset.c
index fcf54df..74dbec6 100644
--- a/gdb/charset.c
+++ b/gdb/charset.c
@@ -318,7 +318,7 @@ cleanup_iconv (void *p)
void
convert_between_encodings (const char *from, const char *to,
const gdb_byte *bytes, unsigned int num_bytes,
- struct obstack *output,
+ int width, struct obstack *output,
enum transliterations translit)
{
iconv_t desc;
@@ -370,43 +370,36 @@ convert_between_encodings (const char *from, const char *to,
{
case EILSEQ:
{
+ int i;
+
/* Invalid input sequence. */
- switch (translit)
- {
- case translit_wchar:
- {
- /* We emit an escape sequence for the byte, skip
- it, and try again. */
- char hex[5];
- int i;
-
- /* Translit is only available when converting to
- the host wchar_t. */
- gdb_assert (!strcmp (to, "wchar_t"));
- sprintf (hex, "\\x%02x", *inp);
- for (i = 0; hex[i]; ++i)
- {
- wchar_t w = btowc (hex[i]);
- obstack_grow (output, &w, sizeof (wchar_t));
- }
-
- ++inp;
- }
- break;
- case translit_char:
- {
- /* We emit an escape sequence for the byte, skip
- it, and try again. */
- char hex[5];
-
- sprintf (hex, "\\x%02x", *inp);
- obstack_grow_str (output, hex);
-
- ++inp;
- }
- break;
- case translit_none:
+ if (translit == translit_none)
error ("Could not convert character");
+
+ /* We emit escape sequence for the bytes, skip them,
+ and try again. */
+ for (i = 0; i < width; ++i)
+ {
+ char hex[5];
+
+ sprintf (hex, "\\x%02x", *inp);
+ if (translit == translit_wchar)
+ {
+ int j;
+
+ for (j = 0; hex[j]; ++j)
+ {
+ wchar_t w = btowc (hex[j]);
+ obstack_grow (output, &w, sizeof (wchar_t));
+ }
+ }
+ else
+ {
+ /* translit_char. */
+ obstack_grow_str (output, hex);
+ }
+
+ ++inp;
}
}
break;
diff --git a/gdb/charset.h b/gdb/charset.h
index 6802753..228e4ff 100644
--- a/gdb/charset.h
+++ b/gdb/charset.h
@@ -56,13 +56,16 @@ enum transliterations
BYTES holds the bytes to convert; this is assumed to be characters
in the target encoding.
NUM_BYTES is the number of bytes.
+ WIDTH is the width of a character from the FROM charset, in bytes.
+ For a variable width encoding, WIDTH should be the size of a "base
+ character".
OUTPUT is an obstack where the converted data is written. The
caller is responsible for initializing the obstack, and for
destroying the obstack should an error occur.
TRANSLIT specifies how invalid conversions should be handled. */
void convert_between_encodings (const char *from, const char *to,
const gdb_byte *bytes, unsigned int num_bytes,
- struct obstack *output,
+ int width, struct obstack *output,
enum transliterations translit);
\f
diff --git a/gdb/printcmd.c b/gdb/printcmd.c
index 5c5db23..002aa92 100644
--- a/gdb/printcmd.c
+++ b/gdb/printcmd.c
@@ -2173,7 +2173,8 @@ printf_command (char *arg, int from_tty)
convert_between_encodings (target_wide_charset (),
host_charset (),
- str, j, &output, translit_char);
+ str, j, wcwidth,
+ &output, translit_char);
obstack_grow_str0 (&output, "");
printf_filtered (current_substring, obstack_base (&output));
@@ -2201,6 +2202,7 @@ printf_command (char *arg, int from_tty)
convert_between_encodings (target_wide_charset (),
host_charset (),
bytes, TYPE_LENGTH (valtype),
+ TYPE_LENGTH (valtype),
&output, translit_char);
obstack_grow_str0 (&output, "");
diff --git a/gdb/utils.c b/gdb/utils.c
index c84a007..2b3b5d4 100644
--- a/gdb/utils.c
+++ b/gdb/utils.c
@@ -1504,7 +1504,7 @@ host_char_to_target (int c, int *target_c)
cleanups = make_cleanup_obstack_free (&host_data);
convert_between_encodings (target_charset (), host_charset (),
- &the_char, 1, &host_data, translit_none);
+ &the_char, 1, 1, &host_data, translit_none);
if (obstack_object_size (&host_data) == 1)
{
hooks/post-receive
--
Repository for Project Archer.
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2009-01-11 23:01 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-01-11 23:01 [SCM] archer-tromey-charset: handle re-syncing correctly with wide characters tromey
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).