From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 13866 invoked by alias); 9 Jan 2009 04:24:07 -0000 Mailing-List: contact archer-commits-help@sourceware.org; run by ezmlm Sender: Precedence: bulk List-Post: List-Help: List-Subscribe: Received: (qmail 13831 invoked by uid 306); 9 Jan 2009 04:24:05 -0000 Date: Fri, 09 Jan 2009 04:24:00 -0000 Message-ID: <20090109042404.13814.qmail@sourceware.org> From: tromey@sourceware.org To: archer-commits@sourceware.org Subject: [SCM] archer-tromey-charset: merged X-Git-Refname: refs/heads/archer-tromey-charset X-Git-Reftype: branch X-Git-Oldrev: 26af839fdd5cd7143867acff0012473537ecc667 X-Git-Newrev: 3db781d3884e7b03393020d1851a58985fbe82ab X-SW-Source: 2009-q1/txt/msg00021.txt.bz2 List-Id: The branch, archer-tromey-charset has been updated via 3db781d3884e7b03393020d1851a58985fbe82ab (commit) via c80cea24f00fbecf63ac8d9430d9b3b5be0acf3d (commit) via d1314bebf1724c8eca4cb6f41741460bb11eedd3 (commit) via 58e614ec7d3d04d2df44a7f117c3d4a4cb05242e (commit) via 775b290a7715c0cbbb0dc1c4b9ba46076431a8f1 (commit) from 26af839fdd5cd7143867acff0012473537ecc667 (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email. - Log ----------------------------------------------------------------- commit 3db781d3884e7b03393020d1851a58985fbe82ab Merge: c80cea24f00fbecf63ac8d9430d9b3b5be0acf3d 26af839fdd5cd7143867acff0012473537ecc667 Author: Tom Tromey Date: Thu Jan 8 21:21:22 2009 -0700 merged commit c80cea24f00fbecf63ac8d9430d9b3b5be0acf3d Author: Tom Tromey Date: Thu Jan 8 18:35:36 2009 -0700 fix wide string printing. auto-select big- or little-endian target wide charset. list available charset names for completion. commit d1314bebf1724c8eca4cb6f41741460bb11eedd3 Author: Tom Tromey Date: Thu Jan 8 15:11:07 2009 -0700 fixed wide-char lexer buglet commit 58e614ec7d3d04d2df44a7f117c3d4a4cb05242e Author: Tom Tromey Date: Sun Dec 28 16:21:52 2008 -0700 macro tokenization bug fix commit 775b290a7715c0cbbb0dc1c4b9ba46076431a8f1 Author: Tom Tromey Date: Sun Dec 28 02:22:00 2008 -0700 initial charset rewrite ----------------------------------------------------------------------- Summary of changes: gdb/ChangeLog | 9 + gdb/acinclude.m4 | 3 + gdb/ada-lang.h | 4 +- gdb/ada-valprint.c | 7 +- gdb/auxv.c | 3 +- gdb/c-exp.y | 436 ++++++++++++------ gdb/c-lang.c | 533 ++++++++++++++++++++-- gdb/c-lang.h | 29 ++- gdb/c-valprint.c | 58 ++- gdb/charset-list.h | 1178 +++++++++++++++++++++++++++++++++++++++++++++++ gdb/charset.c | 1259 ++++++++++----------------------------------------- gdb/charset.h | 110 ++--- gdb/config.in | 3 + gdb/configure | 64 +++ gdb/configure.ac | 1 + gdb/defs.h | 3 + gdb/doc/gdb.texinfo | 4 - gdb/expprint.c | 12 +- gdb/f-lang.c | 5 +- gdb/f-valprint.c | 5 +- gdb/gdb_locale.h | 4 + gdb/jv-valprint.c | 2 +- gdb/language.c | 5 +- gdb/language.h | 8 +- gdb/m2-lang.c | 5 +- gdb/m2-valprint.c | 8 +- gdb/macroexp.c | 16 +- gdb/objc-lang.c | 6 +- gdb/p-lang.c | 17 +- gdb/p-lang.h | 7 +- gdb/p-valprint.c | 15 +- gdb/parse.c | 46 ++ gdb/parser-defs.h | 18 + gdb/printcmd.c | 4 +- gdb/scm-lang.c | 4 +- gdb/utils.c | 98 ++++- gdb/valops.c | 18 + gdb/valprint.c | 6 +- gdb/value.h | 4 +- 39 files changed, 2648 insertions(+), 1369 deletions(-) create mode 100644 gdb/charset-list.h First 500 lines of diff: diff --git a/gdb/ChangeLog b/gdb/ChangeLog index 8c93b4a..f37ec17 100644 --- a/gdb/ChangeLog +++ b/gdb/ChangeLog @@ -1,3 +1,12 @@ +2008-12-23 Tom Tromey + + * gdb_locale.h: Include langinfo.h. + * charset.c (_initialize_charset): Set default host charset from + the locale. + * aclocal.m4, config.in, configure: Rebuild. + * configure.ac: Call AM_LANGINFO_CODESET. + * acinclude.m4: Include codeset.m4. + 2009-01-06 Jim Blandy Check return values of functions declared with warn_unused_result diff --git a/gdb/acinclude.m4 b/gdb/acinclude.m4 index 81b5d47..09a2eb7 100644 --- a/gdb/acinclude.m4 +++ b/gdb/acinclude.m4 @@ -29,6 +29,9 @@ sinclude([../config/depstand.m4]) dnl For AM_LC_MESSAGES sinclude([../config/lcmessage.m4]) +dnl For AM_LANGINFO_CODESET. +sinclude([../config/codeset.m4]) + # # Sometimes the native compiler is a bogus stub for gcc or /usr/ucb/cc. This # makes configure think it's cross compiling. If --target wasn't used, then diff --git a/gdb/ada-lang.h b/gdb/ada-lang.h index c7cc62a..b3e6454 100644 --- a/gdb/ada-lang.h +++ b/gdb/ada-lang.h @@ -261,8 +261,8 @@ extern void ada_emit_char (int, struct ui_file *, int, int); extern void ada_printchar (int, struct ui_file *); -extern void ada_printstr (struct ui_file *, const gdb_byte *, - unsigned int, int, int, +extern void ada_printstr (struct ui_file *, struct type *, const gdb_byte *, + unsigned int, int, const struct value_print_options *); struct value *ada_convert_actual (struct value *actual, diff --git a/gdb/ada-valprint.c b/gdb/ada-valprint.c index 9647971..6aeaba5 100644 --- a/gdb/ada-valprint.c +++ b/gdb/ada-valprint.c @@ -544,11 +544,12 @@ printstr (struct ui_file *stream, const gdb_byte *string, } void -ada_printstr (struct ui_file *stream, const gdb_byte *string, - unsigned int length, int width, int force_ellipses, +ada_printstr (struct ui_file *stream, struct type *type, const gdb_byte *string, + unsigned int length, int force_ellipses, const struct value_print_options *options) { - printstr (stream, string, length, force_ellipses, width, options); + printstr (stream, string, length, force_ellipses, TYPE_LENGTH (type), + options); } diff --git a/gdb/auxv.c b/gdb/auxv.c index 2c42529..52aa29e 100644 --- a/gdb/auxv.c +++ b/gdb/auxv.c @@ -247,7 +247,8 @@ fprint_target_auxv (struct ui_file *file, struct target_ops *ops) get_user_print_options (&opts); if (opts.addressprint) fprintf_filtered (file, "0x%s", paddr_nz (val)); - val_print_string (val, -1, 1, file, &opts); + val_print_string (builtin_type (target_gdbarch)->builtin_char, + val, -1, file, &opts); fprintf_filtered (file, "\n"); } break; diff --git a/gdb/c-exp.y b/gdb/c-exp.y index d4bbbcc..085c4dd 100644 --- a/gdb/c-exp.y +++ b/gdb/c-exp.y @@ -143,6 +143,7 @@ void yyerror (char *); struct symbol *sym; struct type *tval; struct stoken sval; + struct typed_stoken tsval; struct ttype tsym; struct symtoken ssym; int voidval; @@ -150,6 +151,7 @@ void yyerror (char *); enum exp_opcode opcode; struct internalvar *ivar; + struct stoken_vector svec; struct type **tvec; int *ivec; } @@ -182,11 +184,13 @@ static int parse_number (char *, int, int, YYSTYPE *); Contexts where this distinction is not important can use the nonterminal "name", which matches either NAME or TYPENAME. */ -%token STRING +%token STRING +%token CHAR %token NAME /* BLOCKNAME defined below to give it higher precedence. */ %token COMPLETE %token TYPENAME -%type name string_exp +%type name +%type string_exp %type name_not_typename %type typename @@ -524,6 +528,15 @@ exp : INT write_exp_elt_opcode (OP_LONG); } ; +exp : CHAR + { + struct stoken_vector vec; + vec.len = 1; + vec.tokens = &$1; + write_exp_string_vector ($1.type, &vec); + } + ; + exp : NAME_OR_INT { YYSTYPE val; parse_number ($1.stoken.ptr, $1.stoken.length, 0, &val); @@ -572,48 +585,64 @@ string_exp: string. Note that we follow the NUL-termination convention of the lexer. */ - $$.length = $1.length; - $$.ptr = malloc ($1.length + 1); - memcpy ($$.ptr, $1.ptr, $1.length + 1); + struct typed_stoken *vec = XNEW (struct typed_stoken); + $$.len = 1; + $$.tokens = vec; + + vec->type = $1.type; + vec->length = $1.length; + vec->ptr = malloc ($1.length + 1); + memcpy (vec->ptr, $1.ptr, $1.length + 1); } | string_exp STRING { /* Note that we NUL-terminate here, but just for convenience. */ - struct stoken t; - t.length = $1.length + $2.length; - t.ptr = malloc (t.length + 1); - memcpy (t.ptr, $1.ptr, $1.length); - memcpy (t.ptr + $1.length, $2.ptr, $2.length + 1); - free ($1.ptr); - $$ = t; + char *p; + ++$$.len; + $$.tokens = realloc ($$.tokens, + $$.len * sizeof (struct typed_stoken)); + + p = malloc ($2.length + 1); + memcpy (p, $2.ptr, $2.length + 1); + + $$.tokens[$$.len - 1].type = $2.type; + $$.tokens[$$.len - 1].length = $2.length; + $$.tokens[$$.len - 1].ptr = p; } ; exp : string_exp - { /* C strings are converted into array constants with - an explicit null byte added at the end. Thus - the array upper bound is the string length. - There is no such thing in C as a completely empty - string. */ - char *sp = $1.ptr; int count = $1.length; - while (count-- > 0) + { + int i; + enum c_string_type type = C_STRING; + + for (i = 0; i < $1.len; ++i) { - write_exp_elt_opcode (OP_LONG); - write_exp_elt_type (parse_type->builtin_char); - write_exp_elt_longcst ((LONGEST)(*sp++)); - write_exp_elt_opcode (OP_LONG); + switch ($1.tokens[i].type) + { + case C_STRING: + break; + case C_WIDE_STRING: + case C_STRING_16: + case C_STRING_32: + if (type != C_STRING + && type != $1.tokens[i].type) + error ("undefined string concatenation"); + type = $1.tokens[i].type; + break; + default: + /* internal error */ + internal_error (__FILE__, __LINE__, + "unrecognized type in string concatenation"); + } } - write_exp_elt_opcode (OP_LONG); - write_exp_elt_type (parse_type->builtin_char); - write_exp_elt_longcst ((LONGEST)'\0'); - write_exp_elt_opcode (OP_LONG); - write_exp_elt_opcode (OP_ARRAY); - write_exp_elt_longcst ((LONGEST) 0); - write_exp_elt_longcst ((LONGEST) ($1.length)); - write_exp_elt_opcode (OP_ARRAY); - free ($1.ptr); + + write_exp_string_vector (type, &$1); + for (i = 0; i < $1.len; ++i) + free ($1.tokens[i].ptr); + free ($1.tokens); } ; @@ -1361,6 +1390,222 @@ parse_number (p, len, parsed_float, putithere) return INT; } +/* Temporary obstack used for holding strings. */ +static struct obstack tempbuf; +static int tempbuf_init; + +static int +parse_string_or_char (char *tokptr, char **outptr, struct typed_stoken *value, + int *host_chars) +{ + int quoted, quote, i; + enum c_string_type type; + + /* Build the gdb internal form of the input string in tempbuf. Note + that the buffer is null byte terminated *only* for the + convenience of debugging gdb itself and printing the buffer + contents when the buffer contains no embedded nulls. Gdb does + not depend upon the buffer being null byte terminated, it uses + the length string instead. This allows gdb to handle C strings + (as well as strings in other languages) with embedded null + bytes */ + + if (!tempbuf_init) + tempbuf_init = 1; + else + obstack_free (&tempbuf, NULL); + obstack_init (&tempbuf); + + /* Record the string type. */ + if (*tokptr == 'L') + { + type = C_WIDE_STRING; + ++tokptr; + } + else if (*tokptr == 'u') + { + type = C_STRING_16; + ++tokptr; + } + else if (*tokptr == 'U') + { + type = C_STRING_32; + ++tokptr; + } + else + type = C_STRING; + + /* Skip the quote. */ + quote = *tokptr; + if (quote == '\'') + type |= C_CHAR; + ++tokptr; + + quoted = 0; + *host_chars = 0; + + for (; *tokptr; ++tokptr, ++*host_chars) + { + char c = *tokptr; + if (quoted) + { + char new_c = 0; + quoted = 0; + /* Some escape sequences undergo character set conversion. + Those we translate here. */ + switch (c) + { + /* Hex escapes do not undergo character set conversion, + so keep the escape sequence for later. */ + case 'x': + obstack_1grow (&tempbuf, 'x'); + /* We look at TOKPTR[1] so that the following "continue" + will do the right thing. */ + while (isxdigit (tokptr[1])) + { + obstack_1grow (&tempbuf, tokptr[1]); + ++tokptr; + } + continue; + + /* Octal escapes do not undergo character set + conversion, so keep the escape sequence for + later. */ + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + obstack_1grow (&tempbuf, *tokptr); + /* We look at TOKPTR[1] so that the following "continue" + will do the right thing. */ + while (isdigit (tokptr[1]) + && tokptr[1] != '8' + && tokptr[1] != '9') + { + obstack_1grow (&tempbuf, tokptr[1]); + ++tokptr; + } + continue; + + /* We handle UCNs later. We could handle them here, but + that would mean a spurious error in the case where + the UCN could be converted to the target charset but + not the host charset. */ + case 'u': + case 'U': + { + int len = c == 'U' ? 8 : 4; + obstack_1grow (&tempbuf, c); + for (i = 0; i < len; ++i) + { + /* We look at TOKPTR[1] so that the following + "continue" will do the right thing. */ + if (!isxdigit (tokptr[1])) + error ("Invalid UCN."); + obstack_1grow (&tempbuf, tokptr[1]); + ++tokptr; + } + } + continue; + + /* We must pass backslash through so that it does not + cause quoting during the second expansion. */ + case '\\': + break; + + /* Escapes which undergo conversion. */ + case 'a': + new_c = '\a'; + break; + case 'b': + new_c = '\b'; + break; + case 'f': + new_c = '\f'; + break; + case 'n': + new_c = '\n'; + break; + case 'r': + new_c = '\r'; + break; + case 't': + new_c = '\t'; + break; + case 'v': + new_c = '\v'; + break; + + /* GCC extension. */ + case 'e': + new_c = HOST_ESCAPE_CHAR; + break; + + /* Backslash-newline expands to nothing at all. */ + case '\n': + obstack_blank (&tempbuf, -1); + continue; + + /* GDB extension. */ + case '^': + /* FIXME: needs a recursive call &c. */ + break; + + /* A few escapes just expand to the character itself. */ + case '\'': + case '\"': + case '?': + /* GCC extensions. */ + case '(': + case '{': + case '[': + case '%': + /* Unrecognized escapes turn into the character itself. */ + default: + new_c = c; + break; + } + if (new_c) + { + /* If we have a replacement character, delete the + backslash before inserting it. */ + obstack_blank (&tempbuf, -1); + c = new_c; + } + } + else if (c == quote) + break; + else if (c == '\\') + { + /* Don't count the quote character. */ + --*host_chars; + quoted = 1; + } + obstack_1grow (&tempbuf, c); + } + + if (*tokptr != quote) + { + if (quote == '"') + error ("Unterminated string in expression."); + else + error ("Unmatched single quote."); + } + ++tokptr; + + value->type = type; + value->ptr = obstack_base (&tempbuf); + value->length = obstack_object_size (&tempbuf); + + *outptr = tokptr; + + return quote == '"' ? STRING : CHAR; +} + struct token { char *operator; @@ -1530,12 +1775,6 @@ yylex () int namelen; unsigned int i; char *tokstart; - char *tokptr; - int tempbufindex; - static char *tempbuf; - static int tempbufsize; - char * token_string = NULL; - int class_prefix = 0; int saw_structop = last_was_structop; char *copy; @@ -1607,46 +1846,6 @@ yylex () lexptr++; goto retry; - case '\'': - /* We either have a character constant ('0' or '\177' for example) - or we have a quoted symbol reference ('foo(int,int)' in C++ - for example). */ - lexptr++; - c = *lexptr++; - if (c == '\\') - c = parse_escape (&lexptr); - else if (c == '\'') - error ("Empty character constant."); - else if (! host_char_to_target (c, &c)) - { - int toklen = lexptr - tokstart + 1; - char *tok = alloca (toklen + 1); - memcpy (tok, tokstart, toklen); - tok[toklen] = '\0'; - error ("There is no character corresponding to %s in the target " - "character set `%s'.", tok, target_charset ()); - } - - yylval.typed_val_int.val = c; - yylval.typed_val_int.type = parse_type->builtin_char; - - c = *lexptr++; - if (c != '\'') - { - namelen = skip_quoted (tokstart) - tokstart; - if (namelen > 2) - { - lexptr = tokstart + namelen; - if (lexptr[-1] != '\'') - error ("Unmatched single quote."); - namelen -= 2; hooks/post-receive -- Repository for Project Archer.