[SCM] archer-tromey-charset: fix some escape sequence bugs.

public inbox for archer-commits@sourceware.org
help / color / mirror / Atom feed

* [SCM]  archer-tromey-charset: fix some escape sequence bugs.
@ 2009-01-12 21:54 tromey
  0 siblings, 0 replies; only message in thread
From: tromey @ 2009-01-12 21:54 UTC (permalink / raw)
  To: archer-commits

The branch, archer-tromey-charset has been updated
       via  9b2cc12d114246c76e7f288ca2b9e5b9eaaed27a (commit)
       via  3c849c091d5a7f0f9434067ba7cd83b68fb2e0b6 (commit)
      from  53cfd74cde0b5ce17d1c0dd0183858dcf6099710 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email.

- Log -----------------------------------------------------------------
commit 9b2cc12d114246c76e7f288ca2b9e5b9eaaed27a
Author: Tom Tromey <tromey@redhat.com>
Date:   Mon Jan 12 14:53:28 2009 -0700

    fix some escape sequence bugs.
    remove undocumented \^ extension

commit 3c849c091d5a7f0f9434067ba7cd83b68fb2e0b6
Author: Tom Tromey <tromey@redhat.com>
Date:   Mon Jan 12 13:34:55 2009 -0700

    attempt to fix print/s with wide characters

-----------------------------------------------------------------------

Summary of changes:
 gdb/ChangeLog  |    8 ++-
 gdb/c-exp.y    |  305 +++++++++++++++++++++++++++++++-------------------------
 gdb/c-lang.h   |    8 +-
 gdb/charset.h  |    3 +
 gdb/macroexp.c |   13 ++-
 gdb/printcmd.c |   13 ++-
 gdb/utils.c    |   56 +----------
 7 files changed, 205 insertions(+), 201 deletions(-)

First 500 lines of diff:
diff --git a/gdb/ChangeLog b/gdb/ChangeLog
index 9e9927e..2835327 100644
--- a/gdb/ChangeLog
+++ b/gdb/ChangeLog
@@ -10,6 +10,8 @@
 	(make_cleanup_obstack_free): Likewise.
 	(host_char_to_target): New function.
 	(parse_escape): Use host_char_to_target, host_hex_value.  Update.
+	Remove '^' case.
+	(no_control_char_error): Remove.
 	* typeprint.c (print_type_scalar): Update.
 	* scm-valprint.c (scm_scmval_print): Update.
 	* scm-lang.h (scm_printchar, scm_printstr): Update.
@@ -35,7 +37,9 @@
 	(objc_printchar): Likewise.
 	(objc_printstr): Likewise.
 	* macroexp.c (get_character_constant): Handle unicode characters.
-	(get_string_literal): Handle unicode strings.
+	Use c_parse_escape.
+	(get_string_literal): Handle unicode strings.  Use
+	c_parse_escape.
 	* m2-valprint.c (print_unpacked_pointer): Update.
 	(m2_print_array_contents): Update.
 	(m2_val_print): Update.
@@ -66,6 +70,7 @@
 	(enum transliterations): New type.
 	(convert_between_encodings): Declare.
 	(HOST_ESCAPE_CHAR): New define.
+	(HOST_DELETE_CHAR): Likewise.
 	(host_letter_to_control_character, host_hex_value): Declare.
 	* charset-list.h: New file.
 	* c-valprint.c (textual_name): New function.
@@ -106,6 +111,7 @@
 	<tokptr, tempbufindex, tempbufsize, token_string, class_prefix>:
 	Remove.
 	Handle 'u', 'U', and 'L' prefixes.  Call parse_string_or_char.
+	(c_parse_escape): New function.
 	* auxv.c (fprint_target_auxv): Update.
 	* ada-valprint.c (ada_emit_char): Add type argument.
 	(ada_printchar): Likewise.
diff --git a/gdb/c-exp.y b/gdb/c-exp.y
index 62d36a0..1873980 100644
--- a/gdb/c-exp.y
+++ b/gdb/c-exp.y
@@ -1394,6 +1394,166 @@ parse_number (p, len, parsed_float, putithere)
 static struct obstack tempbuf;
 static int tempbuf_init;
 
+/* Parse a C escape sequence.  The initial backslash of the sequence
+   is at (*PTR)[-1].  *PTR will be updated to point to just after the
+   last character of the sequence.  If OUTPUT is not NULL, the
+   translated form of the escape sequence will be written there.  If
+   OUTPUT is NULL, no output is written and the call will only affect
+   *PTR.  If an escape sequence is expressed in target bytes, then the
+   entire sequence will simply be copied to OUTPUT.  Return 1 if any
+   character was emitted, 0 otherwise.  */
+
+int
+c_parse_escape (char **ptr, struct obstack *output)
+{
+  char *tokptr = *ptr;
+  int result = 1;
+
+  /* Some escape sequences undergo character set conversion.  Those we
+     translate here.  */
+  switch (*tokptr)
+    {
+      /* Hex escapes do not undergo character set conversion, so keep
+	 the escape sequence for later.  */
+    case 'x':
+      if (output)
+	obstack_grow_str (output, "\\x");
+      ++tokptr;
+      if (!isxdigit (*tokptr))
+	error (_("\\x escape without a following hex digit"));
+      while (isxdigit (*tokptr))
+	{
+	  if (output)
+	    obstack_1grow (output, *tokptr);
+	  ++tokptr;
+	}
+      break;
+
+      /* Octal escapes do not undergo character set
+	 conversion, so keep the escape sequence for
+	 later.  */
+    case '0':
+    case '1':
+    case '2':
+    case '3':
+    case '4':
+    case '5':
+    case '6':
+    case '7':
+      if (output)
+	obstack_grow_str (output, "\\");
+      while (isdigit (*tokptr) && *tokptr != '8' && *tokptr != '9')
+	{
+	  if (output)
+	    obstack_1grow (output, *tokptr);
+	  ++tokptr;
+	}
+      break;
+
+      /* We handle UCNs later.  We could handle them here, but that
+	 would mean a spurious error in the case where the UCN could
+	 be converted to the target charset but not the host
+	 charset.  */
+    case 'u':
+    case 'U':
+      {
+	char c = *tokptr;
+	int i, len = c == 'U' ? 8 : 4;
+	if (output)
+	  {
+	    obstack_1grow (output, '\\');
+	    obstack_1grow (output, *tokptr);
+	  }
+	++tokptr;
+	if (!isxdigit (*tokptr))
+	  error (_("\\%c escape without a following hex digit"), c);
+	for (i = 0; i < len && isxdigit (*tokptr); ++i)
+	  {
+	    if (output)
+	      obstack_1grow (output, *tokptr);
+	    ++tokptr;
+	  }
+      }
+      break;
+
+      /* We must pass backslash through so that it does not
+	 cause quoting during the second expansion.  */
+    case '\\':
+      if (output)
+	obstack_grow_str (output, "\\\\");
+      ++tokptr;
+      break;
+
+      /* Escapes which undergo conversion.  */
+    case 'a':
+      if (output)
+	obstack_1grow (output, '\a');
+      ++tokptr;
+      break;
+    case 'b':
+      if (output)
+	obstack_1grow (output, '\b');
+      ++tokptr;
+      break;
+    case 'f':
+      if (output)
+	obstack_1grow (output, '\f');
+      ++tokptr;
+      break;
+    case 'n':
+      if (output)
+	obstack_1grow (output, '\n');
+      ++tokptr;
+      break;
+    case 'r':
+      if (output)
+	obstack_1grow (output, '\r');
+      ++tokptr;
+      break;
+    case 't':
+      if (output)
+	obstack_1grow (output, '\t');
+      ++tokptr;
+      break;
+    case 'v':
+      if (output)
+	obstack_1grow (output, '\v');
+      ++tokptr;
+      break;
+
+      /* GCC extension.  */
+    case 'e':
+      if (output)
+	obstack_1grow (output, HOST_ESCAPE_CHAR);
+      ++tokptr;
+      break;
+
+      /* Backslash-newline expands to nothing at all.  */
+    case '\n':
+      ++tokptr;
+      result = 0;
+      break;
+
+      /* A few escapes just expand to the character itself.  */
+    case '\'':
+    case '\"':
+    case '?':
+      /* GCC extensions.  */
+    case '(':
+    case '{':
+    case '[':
+    case '%':
+      /* Unrecognized escapes turn into the character itself.  */
+    default:
+      if (output)
+	obstack_1grow (output, *tokptr);
+      ++tokptr;
+      break;
+    }
+  *ptr = tokptr;
+  return result;
+}
+
 /* Parse a string or character literal from TOKPTR.  The string or
    character may be wide or unicode.  *OUTPTR is set to just after the
    end of the literal in the input string.  The resulting token is
@@ -1404,7 +1564,7 @@ static int
 parse_string_or_char (char *tokptr, char **outptr, struct typed_stoken *value,
 		      int *host_chars)
 {
-  int quoted, quote, i;
+  int quote, i;
   enum c_string_type type;
 
   /* Build the gdb internal form of the input string in tempbuf.  Note
@@ -1447,151 +1607,24 @@ parse_string_or_char (char *tokptr, char **outptr, struct typed_stoken *value,
     type |= C_CHAR;
   ++tokptr;
 
-  quoted = 0;
   *host_chars = 0;
 
-  for (; *tokptr; ++tokptr, ++*host_chars)
+  while (*tokptr)
     {
       char c = *tokptr;
-      if (quoted)
+      if (c == '\\')
 	{
-	  char new_c = 0;
-	  quoted = 0;
-	  /* Some escape sequences undergo character set conversion.
-	     Those we translate here.  */
-	  switch (c)
-	    {
-	      /* Hex escapes do not undergo character set conversion,
-		 so keep the escape sequence for later.  */
-	    case 'x':
-	      obstack_1grow (&tempbuf, 'x');
-	      /* We look at TOKPTR[1] so that the following "continue"
-		 will do the right thing.  */
-	      while (isxdigit (tokptr[1]))
-		{
-		  obstack_1grow (&tempbuf, tokptr[1]);
-		  ++tokptr;
-		}
-	      continue;
-
-	      /* Octal escapes do not undergo character set
-		 conversion, so keep the escape sequence for
-		 later.  */
-	    case '0':
-	    case '1':
-	    case '2':
-	    case '3':
-	    case '4':
-	    case '5':
-	    case '6':
-	    case '7':
-	      obstack_1grow (&tempbuf, *tokptr);
-	      /* We look at TOKPTR[1] so that the following "continue"
-		 will do the right thing.  */
-	      while (isdigit (tokptr[1])
-		     && tokptr[1] != '8'
-		     && tokptr[1] != '9')
-		{
-		  obstack_1grow (&tempbuf, tokptr[1]);
-		  ++tokptr;
-		}
-	      continue;
-
-	      /* We handle UCNs later.  We could handle them here, but
-		 that would mean a spurious error in the case where
-		 the UCN could be converted to the target charset but
-		 not the host charset.  */
-	    case 'u':
-	    case 'U':
-	      {
-		int len = c == 'U' ? 8 : 4;
-		obstack_1grow (&tempbuf, c);
-		for (i = 0; i < len; ++i)
-		  {
-		    /* We look at TOKPTR[1] so that the following
-		       "continue" will do the right thing.  */
-		    if (!isxdigit (tokptr[1]))
-		      break;
-		    obstack_1grow (&tempbuf, tokptr[1]);
-		    ++tokptr;
-		  }
-	      }
-	      continue;
-
-	      /* We must pass backslash through so that it does not
-		 cause quoting during the second expansion.  */
-	    case '\\':
-	      break;
-
-	      /* Escapes which undergo conversion.  */
-	    case 'a':
-	      new_c = '\a';
-	      break;
-	    case 'b':
-	      new_c = '\b';
-	      break;
-	    case 'f':
-	      new_c = '\f';
-	      break;
-	    case 'n':
-	      new_c = '\n';
-	      break;
-	    case 'r':
-	      new_c = '\r';
-	      break;
-	    case 't':
-	      new_c = '\t';
-	      break;
-	    case 'v':
-	      new_c = '\v';
-	      break;
-
-	      /* GCC extension.  */
-	    case 'e':
-	      new_c = HOST_ESCAPE_CHAR;
-	      break;
-
-	      /* Backslash-newline expands to nothing at all.  */
-	    case '\n':
-	      obstack_blank (&tempbuf, -1);
-	      continue;
-
-	      /* GDB extension.  */
-	    case '^':
-	      /* FIXME: needs a recursive call &c.  */
-	      break;
-
-	      /* A few escapes just expand to the character itself.  */
-	    case '\'':
-	    case '\"':
-	    case '?':
-	      /* GCC extensions.  */
-	    case '(':
-	    case '{':
-	    case '[':
-	    case '%':
-	      /* Unrecognized escapes turn into the character itself.  */
-	    default:
-	      new_c = c;
-	      break;
-	    }
-	  if (new_c)
-	    {
-	      /* If we have a replacement character, delete the
-		 backslash before inserting it.  */
-	      obstack_blank (&tempbuf, -1);
-	      c = new_c;
-	    }
+	  ++tokptr;
+	  *host_chars += c_parse_escape (&tokptr, &tempbuf);
 	}
       else if (c == quote)
 	break;
-      else if (c == '\\')
+      else
 	{
-	  /* Don't count the quote character.  */
-	  --*host_chars;
-	  quoted = 1;
+	  obstack_1grow (&tempbuf, c);
+	  ++tokptr;
+	  ++*host_chars;
 	}
-      obstack_1grow (&tempbuf, c);
     }
 
   if (*tokptr != quote)
diff --git a/gdb/c-lang.h b/gdb/c-lang.h
index 157013d..ba9d996 100644
--- a/gdb/c-lang.h
+++ b/gdb/c-lang.h
@@ -54,9 +54,13 @@ enum c_string_type
     C_CHAR_32 = 7
   };
 
-extern int c_parse (void);	/* Defined in c-exp.y */
+/* Defined in c-exp.y.  */
 
-extern void c_error (char *);	/* Defined in c-exp.y */
+extern int c_parse (void);
+
+extern void c_error (char *);
+
+extern int c_parse_escape (char **, struct obstack *);
 
 /* Defined in c-typeprint.c */
 extern void c_print_type (struct type *, char *, struct ui_file *, int,
diff --git a/gdb/charset.h b/gdb/charset.h
index 228e4ff..db017f5 100644
--- a/gdb/charset.h
+++ b/gdb/charset.h
@@ -76,6 +76,9 @@ void convert_between_encodings (const char *from, const char *to,
 /* The escape character.  */
 #define HOST_ESCAPE_CHAR 27
 
+/* The DEL character.  */
+#define HOST_DELETE_CHAR 0177
+
 /* Convert a letter, like 'c', to its corresponding control
    character.  */
 char host_letter_to_control_character (char c);
diff --git a/gdb/macroexp.c b/gdb/macroexp.c
index a84b58b..752a939 100644
--- a/gdb/macroexp.c
+++ b/gdb/macroexp.c
@@ -23,6 +23,7 @@
 #include "macrotab.h"
 #include "macroexp.h"
 #include "gdb_assert.h"
+#include "c-lang.h"
 
 
 \f
@@ -326,6 +327,7 @@ get_character_constant (struct macro_buffer *tok, char *p, char *end)
     {
       char *tok_start = p;
       char *body_start;
+      int char_count = 0;
 
       if (*p == '\'')
         p++;
@@ -341,7 +343,7 @@ get_character_constant (struct macro_buffer *tok, char *p, char *end)
             error (_("Unmatched single quote."));
           else if (*p == '\'')
             {
-              if (p == body_start)
+              if (!char_count)
                 error (_("A character constant must contain at least one "
                        "character."));
               p++;
@@ -350,10 +352,13 @@ get_character_constant (struct macro_buffer *tok, char *p, char *end)
           else if (*p == '\\')
             {
               p++;
-              parse_escape (&p);
+	      char_count += c_parse_escape (&p, NULL);
             }
           else
-            p++;
+	    {
+	      p++;
+	      char_count++;
+	    }
         }
 
       set_token (tok, tok_start, p);
@@ -401,7 +406,7 @@ get_string_literal (struct macro_buffer *tok, char *p, char *end)
           else if (*p == '\\')
             {
               p++;
-              parse_escape (&p);
+              c_parse_escape (&p, NULL);
             }
           else
             p++;
diff --git a/gdb/printcmd.c b/gdb/printcmd.c
index 002aa92..a475867 100644
--- a/gdb/printcmd.c
+++ b/gdb/printcmd.c
@@ -270,12 +270,13 @@ print_formatted (struct value *val, int size,
       switch (options->format)
 	{
 	case 's':
-	  /* FIXME: Need to handle wchar_t's here... */
-	  next_address = VALUE_ADDRESS (val)
-	    /* FIXME: wrong char type here */
-	    + val_print_string (builtin_type (current_gdbarch)->builtin_char,
-				VALUE_ADDRESS (val), -1, stream,
-				options);
+	  {
+	    struct type *elttype = TYPE_TARGET_TYPE (type);
+	    next_address = (VALUE_ADDRESS (val)
+			    + val_print_string (elttype,
+						VALUE_ADDRESS (val), -1,
+						stream, options));
+	  }
 	  return;
 
 	case 'i':
diff --git a/gdb/utils.c b/gdb/utils.c
index 2b3b5d4..49f9984 100644
--- a/gdb/utils.c
+++ b/gdb/utils.c
@@ -1475,22 +1475,10 @@ query (const char *ctlstr, ...)
   va_end (args);
 }
 


hooks/post-receive
--
Repository for Project Archer.


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2009-01-12 21:54 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-01-12 21:54 [SCM] archer-tromey-charset: fix some escape sequence bugs tromey

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).