From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <archer-commits-return-271-listarch-archer-commits=sourceware.org@sourceware.org>
Received: (qmail 13866 invoked by alias); 9 Jan 2009 04:24:07 -0000
Mailing-List: contact archer-commits-help@sourceware.org; run by ezmlm
Sender: <archer-commits@sourceware.org>
Precedence: bulk
List-Post: <mailto:archer-commits@sourceware.org>
List-Help: <mailto:archer-commits-help@sourceware.org>
List-Subscribe: <mailto:archer-commits-subscribe@sourceware.org>
Received: (qmail 13831 invoked by uid 306); 9 Jan 2009 04:24:05 -0000
Date: Fri, 09 Jan 2009 04:24:00 -0000
Message-ID: <20090109042404.13814.qmail@sourceware.org>
From: tromey@sourceware.org
To: archer-commits@sourceware.org
Subject: [SCM]  archer-tromey-charset: merged
X-Git-Refname: refs/heads/archer-tromey-charset
X-Git-Reftype: branch
X-Git-Oldrev: 26af839fdd5cd7143867acff0012473537ecc667
X-Git-Newrev: 3db781d3884e7b03393020d1851a58985fbe82ab
X-SW-Source: 2009-q1/txt/msg00021.txt.bz2
List-Id: <archer-commits.sourceware.org>

The branch, archer-tromey-charset has been updated
       via  3db781d3884e7b03393020d1851a58985fbe82ab (commit)
       via  c80cea24f00fbecf63ac8d9430d9b3b5be0acf3d (commit)
       via  d1314bebf1724c8eca4cb6f41741460bb11eedd3 (commit)
       via  58e614ec7d3d04d2df44a7f117c3d4a4cb05242e (commit)
       via  775b290a7715c0cbbb0dc1c4b9ba46076431a8f1 (commit)
      from  26af839fdd5cd7143867acff0012473537ecc667 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email.

- Log -----------------------------------------------------------------
commit 3db781d3884e7b03393020d1851a58985fbe82ab
Merge: c80cea24f00fbecf63ac8d9430d9b3b5be0acf3d 26af839fdd5cd7143867acff0012473537ecc667
Author: Tom Tromey <tromey@redhat.com>
Date:   Thu Jan 8 21:21:22 2009 -0700

    merged

commit c80cea24f00fbecf63ac8d9430d9b3b5be0acf3d
Author: Tom Tromey <tromey@redhat.com>
Date:   Thu Jan 8 18:35:36 2009 -0700

    fix wide string printing.
    auto-select big- or little-endian target wide charset.
    list available charset names for completion.

commit d1314bebf1724c8eca4cb6f41741460bb11eedd3
Author: Tom Tromey <tromey@redhat.com>
Date:   Thu Jan 8 15:11:07 2009 -0700

    fixed wide-char lexer buglet

commit 58e614ec7d3d04d2df44a7f117c3d4a4cb05242e
Author: Tom Tromey <tromey@redhat.com>
Date:   Sun Dec 28 16:21:52 2008 -0700

    macro tokenization bug fix

commit 775b290a7715c0cbbb0dc1c4b9ba46076431a8f1
Author: Tom Tromey <tromey@redhat.com>
Date:   Sun Dec 28 02:22:00 2008 -0700

    initial charset rewrite

-----------------------------------------------------------------------

Summary of changes:
 gdb/ChangeLog       |    9 +
 gdb/acinclude.m4    |    3 +
 gdb/ada-lang.h      |    4 +-
 gdb/ada-valprint.c  |    7 +-
 gdb/auxv.c          |    3 +-
 gdb/c-exp.y         |  436 ++++++++++++------
 gdb/c-lang.c        |  533 ++++++++++++++++++++--
 gdb/c-lang.h        |   29 ++-
 gdb/c-valprint.c    |   58 ++-
 gdb/charset-list.h  | 1178 +++++++++++++++++++++++++++++++++++++++++++++++
 gdb/charset.c       | 1259 ++++++++++-----------------------------------------
 gdb/charset.h       |  110 ++---
 gdb/config.in       |    3 +
 gdb/configure       |   64 +++
 gdb/configure.ac    |    1 +
 gdb/defs.h          |    3 +
 gdb/doc/gdb.texinfo |    4 -
 gdb/expprint.c      |   12 +-
 gdb/f-lang.c        |    5 +-
 gdb/f-valprint.c    |    5 +-
 gdb/gdb_locale.h    |    4 +
 gdb/jv-valprint.c   |    2 +-
 gdb/language.c      |    5 +-
 gdb/language.h      |    8 +-
 gdb/m2-lang.c       |    5 +-
 gdb/m2-valprint.c   |    8 +-
 gdb/macroexp.c      |   16 +-
 gdb/objc-lang.c     |    6 +-
 gdb/p-lang.c        |   17 +-
 gdb/p-lang.h        |    7 +-
 gdb/p-valprint.c    |   15 +-
 gdb/parse.c         |   46 ++
 gdb/parser-defs.h   |   18 +
 gdb/printcmd.c      |    4 +-
 gdb/scm-lang.c      |    4 +-
 gdb/utils.c         |   98 ++++-
 gdb/valops.c        |   18 +
 gdb/valprint.c      |    6 +-
 gdb/value.h         |    4 +-
 39 files changed, 2648 insertions(+), 1369 deletions(-)
 create mode 100644 gdb/charset-list.h

First 500 lines of diff:
diff --git a/gdb/ChangeLog b/gdb/ChangeLog
index 8c93b4a..f37ec17 100644
--- a/gdb/ChangeLog
+++ b/gdb/ChangeLog
@@ -1,3 +1,12 @@
+2008-12-23  Tom Tromey  <tromey@redhat.com>
+
+	* gdb_locale.h: Include langinfo.h.
+	* charset.c (_initialize_charset): Set default host charset from
+	the locale.
+	* aclocal.m4, config.in, configure: Rebuild.
+	* configure.ac: Call AM_LANGINFO_CODESET.
+	* acinclude.m4: Include codeset.m4.
+
 2009-01-06  Jim Blandy  <jimb@red-bean.com>
 
 	Check return values of functions declared with warn_unused_result
diff --git a/gdb/acinclude.m4 b/gdb/acinclude.m4
index 81b5d47..09a2eb7 100644
--- a/gdb/acinclude.m4
+++ b/gdb/acinclude.m4
@@ -29,6 +29,9 @@ sinclude([../config/depstand.m4])
 dnl For AM_LC_MESSAGES
 sinclude([../config/lcmessage.m4])
 
+dnl For AM_LANGINFO_CODESET.
+sinclude([../config/codeset.m4])
+
 #
 # Sometimes the native compiler is a bogus stub for gcc or /usr/ucb/cc. This
 # makes configure think it's cross compiling. If --target wasn't used, then
diff --git a/gdb/ada-lang.h b/gdb/ada-lang.h
index c7cc62a..b3e6454 100644
--- a/gdb/ada-lang.h
+++ b/gdb/ada-lang.h
@@ -261,8 +261,8 @@ extern void ada_emit_char (int, struct ui_file *, int, int);
 
 extern void ada_printchar (int, struct ui_file *);
 
-extern void ada_printstr (struct ui_file *, const gdb_byte *,
-			  unsigned int, int, int,
+extern void ada_printstr (struct ui_file *, struct type *, const gdb_byte *,
+			  unsigned int, int,
 			  const struct value_print_options *);
 
 struct value *ada_convert_actual (struct value *actual,
diff --git a/gdb/ada-valprint.c b/gdb/ada-valprint.c
index 9647971..6aeaba5 100644
--- a/gdb/ada-valprint.c
+++ b/gdb/ada-valprint.c
@@ -544,11 +544,12 @@ printstr (struct ui_file *stream, const gdb_byte *string,
 }
 
 void
-ada_printstr (struct ui_file *stream, const gdb_byte *string,
-	      unsigned int length, int width, int force_ellipses,
+ada_printstr (struct ui_file *stream, struct type *type, const gdb_byte *string,
+	      unsigned int length, int force_ellipses,
 	      const struct value_print_options *options)
 {
-  printstr (stream, string, length, force_ellipses, width, options);
+  printstr (stream, string, length, force_ellipses, TYPE_LENGTH (type),
+	    options);
 }
 
 
diff --git a/gdb/auxv.c b/gdb/auxv.c
index 2c42529..52aa29e 100644
--- a/gdb/auxv.c
+++ b/gdb/auxv.c
@@ -247,7 +247,8 @@ fprint_target_auxv (struct ui_file *file, struct target_ops *ops)
 	    get_user_print_options (&opts);
 	    if (opts.addressprint)
 	      fprintf_filtered (file, "0x%s", paddr_nz (val));
-	    val_print_string (val, -1, 1, file, &opts);
+	    val_print_string (builtin_type (target_gdbarch)->builtin_char,
+			      val, -1, file, &opts);
 	    fprintf_filtered (file, "\n");
 	  }
 	  break;
diff --git a/gdb/c-exp.y b/gdb/c-exp.y
index d4bbbcc..085c4dd 100644
--- a/gdb/c-exp.y
+++ b/gdb/c-exp.y
@@ -143,6 +143,7 @@ void yyerror (char *);
     struct symbol *sym;
     struct type *tval;
     struct stoken sval;
+    struct typed_stoken tsval;
     struct ttype tsym;
     struct symtoken ssym;
     int voidval;
@@ -150,6 +151,7 @@ void yyerror (char *);
     enum exp_opcode opcode;
     struct internalvar *ivar;
 
+    struct stoken_vector svec;
     struct type **tvec;
     int *ivec;
   }
@@ -182,11 +184,13 @@ static int parse_number (char *, int, int, YYSTYPE *);
    Contexts where this distinction is not important can use the
    nonterminal "name", which matches either NAME or TYPENAME.  */
 
-%token <sval> STRING
+%token <tsval> STRING
+%token <tsval> CHAR
 %token <ssym> NAME /* BLOCKNAME defined below to give it higher precedence. */
 %token <voidval> COMPLETE
 %token <tsym> TYPENAME
-%type <sval> name string_exp
+%type <sval> name
+%type <svec> string_exp
 %type <ssym> name_not_typename
 %type <tsym> typename
 
@@ -524,6 +528,15 @@ exp	:	INT
 			  write_exp_elt_opcode (OP_LONG); }
 	;
 
+exp	:	CHAR
+			{
+			  struct stoken_vector vec;
+			  vec.len = 1;
+			  vec.tokens = &$1;
+			  write_exp_string_vector ($1.type, &vec);
+			}
+	;
+
 exp	:	NAME_OR_INT
 			{ YYSTYPE val;
 			  parse_number ($1.stoken.ptr, $1.stoken.length, 0, &val);
@@ -572,48 +585,64 @@ string_exp:
 			     string.  Note that we follow the
 			     NUL-termination convention of the
 			     lexer.  */
-			  $$.length = $1.length;
-			  $$.ptr = malloc ($1.length + 1);
-			  memcpy ($$.ptr, $1.ptr, $1.length + 1);
+			  struct typed_stoken *vec = XNEW (struct typed_stoken);
+			  $$.len = 1;
+			  $$.tokens = vec;
+
+			  vec->type = $1.type;
+			  vec->length = $1.length;
+			  vec->ptr = malloc ($1.length + 1);
+			  memcpy (vec->ptr, $1.ptr, $1.length + 1);
 			}
 
 	|	string_exp STRING
 			{
 			  /* Note that we NUL-terminate here, but just
 			     for convenience.  */
-			  struct stoken t;
-			  t.length = $1.length + $2.length;
-			  t.ptr = malloc (t.length + 1);
-			  memcpy (t.ptr, $1.ptr, $1.length);
-			  memcpy (t.ptr + $1.length, $2.ptr, $2.length + 1);
-			  free ($1.ptr);
-			  $$ = t;
+			  char *p;
+			  ++$$.len;
+			  $$.tokens = realloc ($$.tokens,
+					       $$.len * sizeof (struct typed_stoken));
+
+			  p = malloc ($2.length + 1);
+			  memcpy (p, $2.ptr, $2.length + 1);
+
+			  $$.tokens[$$.len - 1].type = $2.type;
+			  $$.tokens[$$.len - 1].length = $2.length;
+			  $$.tokens[$$.len - 1].ptr = p;
 			}
 		;
 
 exp	:	string_exp
-			{ /* C strings are converted into array constants with
-			     an explicit null byte added at the end.  Thus
-			     the array upper bound is the string length.
-			     There is no such thing in C as a completely empty
-			     string. */
-			  char *sp = $1.ptr; int count = $1.length;
-			  while (count-- > 0)
+			{
+			  int i;
+			  enum c_string_type type = C_STRING;
+
+			  for (i = 0; i < $1.len; ++i)
 			    {
-			      write_exp_elt_opcode (OP_LONG);
-			      write_exp_elt_type (parse_type->builtin_char);
-			      write_exp_elt_longcst ((LONGEST)(*sp++));
-			      write_exp_elt_opcode (OP_LONG);
+			      switch ($1.tokens[i].type)
+				{
+				case C_STRING:
+				  break;
+				case C_WIDE_STRING:
+				case C_STRING_16:
+				case C_STRING_32:
+				  if (type != C_STRING
+				      && type != $1.tokens[i].type)
+				    error ("undefined string concatenation");
+				  type = $1.tokens[i].type;
+				  break;
+				default:
+				  /* internal error */
+				  internal_error (__FILE__, __LINE__,
+						  "unrecognized type in string concatenation");
+				}
 			    }
-			  write_exp_elt_opcode (OP_LONG);
-			  write_exp_elt_type (parse_type->builtin_char);
-			  write_exp_elt_longcst ((LONGEST)'\0');
-			  write_exp_elt_opcode (OP_LONG);
-			  write_exp_elt_opcode (OP_ARRAY);
-			  write_exp_elt_longcst ((LONGEST) 0);
-			  write_exp_elt_longcst ((LONGEST) ($1.length));
-			  write_exp_elt_opcode (OP_ARRAY);
-			  free ($1.ptr);
+
+			  write_exp_string_vector (type, &$1);
+			  for (i = 0; i < $1.len; ++i)
+			    free ($1.tokens[i].ptr);
+			  free ($1.tokens);
 			}
 	;
 
@@ -1361,6 +1390,222 @@ parse_number (p, len, parsed_float, putithere)
    return INT;
 }
 
+/* Temporary obstack used for holding strings.  */
+static struct obstack tempbuf;
+static int tempbuf_init;
+
+static int
+parse_string_or_char (char *tokptr, char **outptr, struct typed_stoken *value,
+		      int *host_chars)
+{
+  int quoted, quote, i;
+  enum c_string_type type;
+
+  /* Build the gdb internal form of the input string in tempbuf.  Note
+     that the buffer is null byte terminated *only* for the
+     convenience of debugging gdb itself and printing the buffer
+     contents when the buffer contains no embedded nulls.  Gdb does
+     not depend upon the buffer being null byte terminated, it uses
+     the length string instead.  This allows gdb to handle C strings
+     (as well as strings in other languages) with embedded null
+     bytes */
+
+  if (!tempbuf_init)
+    tempbuf_init = 1;
+  else
+    obstack_free (&tempbuf, NULL);
+  obstack_init (&tempbuf);
+
+  /* Record the string type.  */
+  if (*tokptr == 'L')
+    {
+      type = C_WIDE_STRING;
+      ++tokptr;
+    }
+  else if (*tokptr == 'u')
+    {
+      type = C_STRING_16;
+      ++tokptr;
+    }
+  else if (*tokptr == 'U')
+    {
+      type = C_STRING_32;
+      ++tokptr;
+    }
+  else
+    type = C_STRING;
+
+  /* Skip the quote.  */
+  quote = *tokptr;
+  if (quote == '\'')
+    type |= C_CHAR;
+  ++tokptr;
+
+  quoted = 0;
+  *host_chars = 0;
+
+  for (; *tokptr; ++tokptr, ++*host_chars)
+    {
+      char c = *tokptr;
+      if (quoted)
+	{
+	  char new_c = 0;
+	  quoted = 0;
+	  /* Some escape sequences undergo character set conversion.
+	     Those we translate here.  */
+	  switch (c)
+	    {
+	      /* Hex escapes do not undergo character set conversion,
+		 so keep the escape sequence for later.  */
+	    case 'x':
+	      obstack_1grow (&tempbuf, 'x');
+	      /* We look at TOKPTR[1] so that the following "continue"
+		 will do the right thing.  */
+	      while (isxdigit (tokptr[1]))
+		{
+		  obstack_1grow (&tempbuf, tokptr[1]);
+		  ++tokptr;
+		}
+	      continue;
+
+	      /* Octal escapes do not undergo character set
+		 conversion, so keep the escape sequence for
+		 later.  */
+	    case '0':
+	    case '1':
+	    case '2':
+	    case '3':
+	    case '4':
+	    case '5':
+	    case '6':
+	    case '7':
+	      obstack_1grow (&tempbuf, *tokptr);
+	      /* We look at TOKPTR[1] so that the following "continue"
+		 will do the right thing.  */
+	      while (isdigit (tokptr[1])
+		     && tokptr[1] != '8'
+		     && tokptr[1] != '9')
+		{
+		  obstack_1grow (&tempbuf, tokptr[1]);
+		  ++tokptr;
+		}
+	      continue;
+
+	      /* We handle UCNs later.  We could handle them here, but
+		 that would mean a spurious error in the case where
+		 the UCN could be converted to the target charset but
+		 not the host charset.  */
+	    case 'u':
+	    case 'U':
+	      {
+		int len = c == 'U' ? 8 : 4;
+		obstack_1grow (&tempbuf, c);
+		for (i = 0; i < len; ++i)
+		  {
+		    /* We look at TOKPTR[1] so that the following
+		       "continue" will do the right thing.  */
+		    if (!isxdigit (tokptr[1]))
+		      error ("Invalid UCN.");
+		    obstack_1grow (&tempbuf, tokptr[1]);
+		    ++tokptr;
+		  }
+	      }
+	      continue;
+
+	      /* We must pass backslash through so that it does not
+		 cause quoting during the second expansion.  */
+	    case '\\':
+	      break;
+
+	      /* Escapes which undergo conversion.  */
+	    case 'a':
+	      new_c = '\a';
+	      break;
+	    case 'b':
+	      new_c = '\b';
+	      break;
+	    case 'f':
+	      new_c = '\f';
+	      break;
+	    case 'n':
+	      new_c = '\n';
+	      break;
+	    case 'r':
+	      new_c = '\r';
+	      break;
+	    case 't':
+	      new_c = '\t';
+	      break;
+	    case 'v':
+	      new_c = '\v';
+	      break;
+
+	      /* GCC extension.  */
+	    case 'e':
+	      new_c = HOST_ESCAPE_CHAR;
+	      break;
+
+	      /* Backslash-newline expands to nothing at all.  */
+	    case '\n':
+	      obstack_blank (&tempbuf, -1);
+	      continue;
+
+	      /* GDB extension.  */
+	    case '^':
+	      /* FIXME: needs a recursive call &c.  */
+	      break;
+
+	      /* A few escapes just expand to the character itself.  */
+	    case '\'':
+	    case '\"':
+	    case '?':
+	      /* GCC extensions.  */
+	    case '(':
+	    case '{':
+	    case '[':
+	    case '%':
+	      /* Unrecognized escapes turn into the character itself.  */
+	    default:
+	      new_c = c;
+	      break;
+	    }
+	  if (new_c)
+	    {
+	      /* If we have a replacement character, delete the
+		 backslash before inserting it.  */
+	      obstack_blank (&tempbuf, -1);
+	      c = new_c;
+	    }
+	}
+      else if (c == quote)
+	break;
+      else if (c == '\\')
+	{
+	  /* Don't count the quote character.  */
+	  --*host_chars;
+	  quoted = 1;
+	}
+      obstack_1grow (&tempbuf, c);
+    }
+
+  if (*tokptr != quote)
+    {
+      if (quote == '"')
+	error ("Unterminated string in expression.");
+      else
+	error ("Unmatched single quote.");
+    }
+  ++tokptr;
+
+  value->type = type;
+  value->ptr = obstack_base (&tempbuf);
+  value->length = obstack_object_size (&tempbuf);
+
+  *outptr = tokptr;
+
+  return quote == '"' ? STRING : CHAR;
+}
+
 struct token
 {
   char *operator;
@@ -1530,12 +1775,6 @@ yylex ()
   int namelen;
   unsigned int i;
   char *tokstart;
-  char *tokptr;
-  int tempbufindex;
-  static char *tempbuf;
-  static int tempbufsize;
-  char * token_string = NULL;
-  int class_prefix = 0;
   int saw_structop = last_was_structop;
   char *copy;
 
@@ -1607,46 +1846,6 @@ yylex ()
       lexptr++;
       goto retry;
 
-    case '\'':
-      /* We either have a character constant ('0' or '\177' for example)
-	 or we have a quoted symbol reference ('foo(int,int)' in C++
-	 for example). */
-      lexptr++;
-      c = *lexptr++;
-      if (c == '\\')
-	c = parse_escape (&lexptr);
-      else if (c == '\'')
-	error ("Empty character constant.");
-      else if (! host_char_to_target (c, &c))
-        {
-          int toklen = lexptr - tokstart + 1;
-          char *tok = alloca (toklen + 1);
-          memcpy (tok, tokstart, toklen);
-          tok[toklen] = '\0';
-          error ("There is no character corresponding to %s in the target "
-                 "character set `%s'.", tok, target_charset ());
-        }
-
-      yylval.typed_val_int.val = c;
-      yylval.typed_val_int.type = parse_type->builtin_char;
-
-      c = *lexptr++;
-      if (c != '\'')
-	{
-	  namelen = skip_quoted (tokstart) - tokstart;
-	  if (namelen > 2)
-	    {
-	      lexptr = tokstart + namelen;
-	      if (lexptr[-1] != '\'')
-		error ("Unmatched single quote.");
-	      namelen -= 2;


hooks/post-receive
--
Repository for Project Archer.