From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 18137 invoked by alias); 12 Jan 2009 21:54:56 -0000 Mailing-List: contact archer-commits-help@sourceware.org; run by ezmlm Sender: Precedence: bulk List-Post: List-Help: List-Subscribe: Received: (qmail 18098 invoked by uid 306); 12 Jan 2009 21:54:55 -0000 Date: Mon, 12 Jan 2009 21:54:00 -0000 Message-ID: <20090112215455.18083.qmail@sourceware.org> From: tromey@sourceware.org To: archer-commits@sourceware.org Subject: [SCM] archer-tromey-charset: fix some escape sequence bugs. X-Git-Refname: refs/heads/archer-tromey-charset X-Git-Reftype: branch X-Git-Oldrev: 53cfd74cde0b5ce17d1c0dd0183858dcf6099710 X-Git-Newrev: 9b2cc12d114246c76e7f288ca2b9e5b9eaaed27a X-SW-Source: 2009-q1/txt/msg00035.txt.bz2 List-Id: The branch, archer-tromey-charset has been updated via 9b2cc12d114246c76e7f288ca2b9e5b9eaaed27a (commit) via 3c849c091d5a7f0f9434067ba7cd83b68fb2e0b6 (commit) from 53cfd74cde0b5ce17d1c0dd0183858dcf6099710 (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email. - Log ----------------------------------------------------------------- commit 9b2cc12d114246c76e7f288ca2b9e5b9eaaed27a Author: Tom Tromey Date: Mon Jan 12 14:53:28 2009 -0700 fix some escape sequence bugs. remove undocumented \^ extension commit 3c849c091d5a7f0f9434067ba7cd83b68fb2e0b6 Author: Tom Tromey Date: Mon Jan 12 13:34:55 2009 -0700 attempt to fix print/s with wide characters ----------------------------------------------------------------------- Summary of changes: gdb/ChangeLog | 8 ++- gdb/c-exp.y | 305 +++++++++++++++++++++++++++++++------------------------- gdb/c-lang.h | 8 +- gdb/charset.h | 3 + gdb/macroexp.c | 13 ++- gdb/printcmd.c | 13 ++- gdb/utils.c | 56 +---------- 7 files changed, 205 insertions(+), 201 deletions(-) First 500 lines of diff: diff --git a/gdb/ChangeLog b/gdb/ChangeLog index 9e9927e..2835327 100644 --- a/gdb/ChangeLog +++ b/gdb/ChangeLog @@ -10,6 +10,8 @@ (make_cleanup_obstack_free): Likewise. (host_char_to_target): New function. (parse_escape): Use host_char_to_target, host_hex_value. Update. + Remove '^' case. + (no_control_char_error): Remove. * typeprint.c (print_type_scalar): Update. * scm-valprint.c (scm_scmval_print): Update. * scm-lang.h (scm_printchar, scm_printstr): Update. @@ -35,7 +37,9 @@ (objc_printchar): Likewise. (objc_printstr): Likewise. * macroexp.c (get_character_constant): Handle unicode characters. - (get_string_literal): Handle unicode strings. + Use c_parse_escape. + (get_string_literal): Handle unicode strings. Use + c_parse_escape. * m2-valprint.c (print_unpacked_pointer): Update. (m2_print_array_contents): Update. (m2_val_print): Update. @@ -66,6 +70,7 @@ (enum transliterations): New type. (convert_between_encodings): Declare. (HOST_ESCAPE_CHAR): New define. + (HOST_DELETE_CHAR): Likewise. (host_letter_to_control_character, host_hex_value): Declare. * charset-list.h: New file. * c-valprint.c (textual_name): New function. @@ -106,6 +111,7 @@ : Remove. Handle 'u', 'U', and 'L' prefixes. Call parse_string_or_char. + (c_parse_escape): New function. * auxv.c (fprint_target_auxv): Update. * ada-valprint.c (ada_emit_char): Add type argument. (ada_printchar): Likewise. diff --git a/gdb/c-exp.y b/gdb/c-exp.y index 62d36a0..1873980 100644 --- a/gdb/c-exp.y +++ b/gdb/c-exp.y @@ -1394,6 +1394,166 @@ parse_number (p, len, parsed_float, putithere) static struct obstack tempbuf; static int tempbuf_init; +/* Parse a C escape sequence. The initial backslash of the sequence + is at (*PTR)[-1]. *PTR will be updated to point to just after the + last character of the sequence. If OUTPUT is not NULL, the + translated form of the escape sequence will be written there. If + OUTPUT is NULL, no output is written and the call will only affect + *PTR. If an escape sequence is expressed in target bytes, then the + entire sequence will simply be copied to OUTPUT. Return 1 if any + character was emitted, 0 otherwise. */ + +int +c_parse_escape (char **ptr, struct obstack *output) +{ + char *tokptr = *ptr; + int result = 1; + + /* Some escape sequences undergo character set conversion. Those we + translate here. */ + switch (*tokptr) + { + /* Hex escapes do not undergo character set conversion, so keep + the escape sequence for later. */ + case 'x': + if (output) + obstack_grow_str (output, "\\x"); + ++tokptr; + if (!isxdigit (*tokptr)) + error (_("\\x escape without a following hex digit")); + while (isxdigit (*tokptr)) + { + if (output) + obstack_1grow (output, *tokptr); + ++tokptr; + } + break; + + /* Octal escapes do not undergo character set + conversion, so keep the escape sequence for + later. */ + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + if (output) + obstack_grow_str (output, "\\"); + while (isdigit (*tokptr) && *tokptr != '8' && *tokptr != '9') + { + if (output) + obstack_1grow (output, *tokptr); + ++tokptr; + } + break; + + /* We handle UCNs later. We could handle them here, but that + would mean a spurious error in the case where the UCN could + be converted to the target charset but not the host + charset. */ + case 'u': + case 'U': + { + char c = *tokptr; + int i, len = c == 'U' ? 8 : 4; + if (output) + { + obstack_1grow (output, '\\'); + obstack_1grow (output, *tokptr); + } + ++tokptr; + if (!isxdigit (*tokptr)) + error (_("\\%c escape without a following hex digit"), c); + for (i = 0; i < len && isxdigit (*tokptr); ++i) + { + if (output) + obstack_1grow (output, *tokptr); + ++tokptr; + } + } + break; + + /* We must pass backslash through so that it does not + cause quoting during the second expansion. */ + case '\\': + if (output) + obstack_grow_str (output, "\\\\"); + ++tokptr; + break; + + /* Escapes which undergo conversion. */ + case 'a': + if (output) + obstack_1grow (output, '\a'); + ++tokptr; + break; + case 'b': + if (output) + obstack_1grow (output, '\b'); + ++tokptr; + break; + case 'f': + if (output) + obstack_1grow (output, '\f'); + ++tokptr; + break; + case 'n': + if (output) + obstack_1grow (output, '\n'); + ++tokptr; + break; + case 'r': + if (output) + obstack_1grow (output, '\r'); + ++tokptr; + break; + case 't': + if (output) + obstack_1grow (output, '\t'); + ++tokptr; + break; + case 'v': + if (output) + obstack_1grow (output, '\v'); + ++tokptr; + break; + + /* GCC extension. */ + case 'e': + if (output) + obstack_1grow (output, HOST_ESCAPE_CHAR); + ++tokptr; + break; + + /* Backslash-newline expands to nothing at all. */ + case '\n': + ++tokptr; + result = 0; + break; + + /* A few escapes just expand to the character itself. */ + case '\'': + case '\"': + case '?': + /* GCC extensions. */ + case '(': + case '{': + case '[': + case '%': + /* Unrecognized escapes turn into the character itself. */ + default: + if (output) + obstack_1grow (output, *tokptr); + ++tokptr; + break; + } + *ptr = tokptr; + return result; +} + /* Parse a string or character literal from TOKPTR. The string or character may be wide or unicode. *OUTPTR is set to just after the end of the literal in the input string. The resulting token is @@ -1404,7 +1564,7 @@ static int parse_string_or_char (char *tokptr, char **outptr, struct typed_stoken *value, int *host_chars) { - int quoted, quote, i; + int quote, i; enum c_string_type type; /* Build the gdb internal form of the input string in tempbuf. Note @@ -1447,151 +1607,24 @@ parse_string_or_char (char *tokptr, char **outptr, struct typed_stoken *value, type |= C_CHAR; ++tokptr; - quoted = 0; *host_chars = 0; - for (; *tokptr; ++tokptr, ++*host_chars) + while (*tokptr) { char c = *tokptr; - if (quoted) + if (c == '\\') { - char new_c = 0; - quoted = 0; - /* Some escape sequences undergo character set conversion. - Those we translate here. */ - switch (c) - { - /* Hex escapes do not undergo character set conversion, - so keep the escape sequence for later. */ - case 'x': - obstack_1grow (&tempbuf, 'x'); - /* We look at TOKPTR[1] so that the following "continue" - will do the right thing. */ - while (isxdigit (tokptr[1])) - { - obstack_1grow (&tempbuf, tokptr[1]); - ++tokptr; - } - continue; - - /* Octal escapes do not undergo character set - conversion, so keep the escape sequence for - later. */ - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - obstack_1grow (&tempbuf, *tokptr); - /* We look at TOKPTR[1] so that the following "continue" - will do the right thing. */ - while (isdigit (tokptr[1]) - && tokptr[1] != '8' - && tokptr[1] != '9') - { - obstack_1grow (&tempbuf, tokptr[1]); - ++tokptr; - } - continue; - - /* We handle UCNs later. We could handle them here, but - that would mean a spurious error in the case where - the UCN could be converted to the target charset but - not the host charset. */ - case 'u': - case 'U': - { - int len = c == 'U' ? 8 : 4; - obstack_1grow (&tempbuf, c); - for (i = 0; i < len; ++i) - { - /* We look at TOKPTR[1] so that the following - "continue" will do the right thing. */ - if (!isxdigit (tokptr[1])) - break; - obstack_1grow (&tempbuf, tokptr[1]); - ++tokptr; - } - } - continue; - - /* We must pass backslash through so that it does not - cause quoting during the second expansion. */ - case '\\': - break; - - /* Escapes which undergo conversion. */ - case 'a': - new_c = '\a'; - break; - case 'b': - new_c = '\b'; - break; - case 'f': - new_c = '\f'; - break; - case 'n': - new_c = '\n'; - break; - case 'r': - new_c = '\r'; - break; - case 't': - new_c = '\t'; - break; - case 'v': - new_c = '\v'; - break; - - /* GCC extension. */ - case 'e': - new_c = HOST_ESCAPE_CHAR; - break; - - /* Backslash-newline expands to nothing at all. */ - case '\n': - obstack_blank (&tempbuf, -1); - continue; - - /* GDB extension. */ - case '^': - /* FIXME: needs a recursive call &c. */ - break; - - /* A few escapes just expand to the character itself. */ - case '\'': - case '\"': - case '?': - /* GCC extensions. */ - case '(': - case '{': - case '[': - case '%': - /* Unrecognized escapes turn into the character itself. */ - default: - new_c = c; - break; - } - if (new_c) - { - /* If we have a replacement character, delete the - backslash before inserting it. */ - obstack_blank (&tempbuf, -1); - c = new_c; - } + ++tokptr; + *host_chars += c_parse_escape (&tokptr, &tempbuf); } else if (c == quote) break; - else if (c == '\\') + else { - /* Don't count the quote character. */ - --*host_chars; - quoted = 1; + obstack_1grow (&tempbuf, c); + ++tokptr; + ++*host_chars; } - obstack_1grow (&tempbuf, c); } if (*tokptr != quote) diff --git a/gdb/c-lang.h b/gdb/c-lang.h index 157013d..ba9d996 100644 --- a/gdb/c-lang.h +++ b/gdb/c-lang.h @@ -54,9 +54,13 @@ enum c_string_type C_CHAR_32 = 7 }; -extern int c_parse (void); /* Defined in c-exp.y */ +/* Defined in c-exp.y. */ -extern void c_error (char *); /* Defined in c-exp.y */ +extern int c_parse (void); + +extern void c_error (char *); + +extern int c_parse_escape (char **, struct obstack *); /* Defined in c-typeprint.c */ extern void c_print_type (struct type *, char *, struct ui_file *, int, diff --git a/gdb/charset.h b/gdb/charset.h index 228e4ff..db017f5 100644 --- a/gdb/charset.h +++ b/gdb/charset.h @@ -76,6 +76,9 @@ void convert_between_encodings (const char *from, const char *to, /* The escape character. */ #define HOST_ESCAPE_CHAR 27 +/* The DEL character. */ +#define HOST_DELETE_CHAR 0177 + /* Convert a letter, like 'c', to its corresponding control character. */ char host_letter_to_control_character (char c); diff --git a/gdb/macroexp.c b/gdb/macroexp.c index a84b58b..752a939 100644 --- a/gdb/macroexp.c +++ b/gdb/macroexp.c @@ -23,6 +23,7 @@ #include "macrotab.h" #include "macroexp.h" #include "gdb_assert.h" +#include "c-lang.h" @@ -326,6 +327,7 @@ get_character_constant (struct macro_buffer *tok, char *p, char *end) { char *tok_start = p; char *body_start; + int char_count = 0; if (*p == '\'') p++; @@ -341,7 +343,7 @@ get_character_constant (struct macro_buffer *tok, char *p, char *end) error (_("Unmatched single quote.")); else if (*p == '\'') { - if (p == body_start) + if (!char_count) error (_("A character constant must contain at least one " "character.")); p++; @@ -350,10 +352,13 @@ get_character_constant (struct macro_buffer *tok, char *p, char *end) else if (*p == '\\') { p++; - parse_escape (&p); + char_count += c_parse_escape (&p, NULL); } else - p++; + { + p++; + char_count++; + } } set_token (tok, tok_start, p); @@ -401,7 +406,7 @@ get_string_literal (struct macro_buffer *tok, char *p, char *end) else if (*p == '\\') { p++; - parse_escape (&p); + c_parse_escape (&p, NULL); } else p++; diff --git a/gdb/printcmd.c b/gdb/printcmd.c index 002aa92..a475867 100644 --- a/gdb/printcmd.c +++ b/gdb/printcmd.c @@ -270,12 +270,13 @@ print_formatted (struct value *val, int size, switch (options->format) { case 's': - /* FIXME: Need to handle wchar_t's here... */ - next_address = VALUE_ADDRESS (val) - /* FIXME: wrong char type here */ - + val_print_string (builtin_type (current_gdbarch)->builtin_char, - VALUE_ADDRESS (val), -1, stream, - options); + { + struct type *elttype = TYPE_TARGET_TYPE (type); + next_address = (VALUE_ADDRESS (val) + + val_print_string (elttype, + VALUE_ADDRESS (val), -1, + stream, options)); + } return; case 'i': diff --git a/gdb/utils.c b/gdb/utils.c index 2b3b5d4..49f9984 100644 --- a/gdb/utils.c +++ b/gdb/utils.c @@ -1475,22 +1475,10 @@ query (const char *ctlstr, ...) va_end (args); } hooks/post-receive -- Repository for Project Archer.