public inbox for gdb-patches@sourceware.org
 help / color / mirror / Atom feed
* [pushed 0/2] Two fixes for Ada character enum literals
@ 2022-02-28 17:48 Tom Tromey
  2022-02-28 17:48 ` [pushed 1/2] Handle 'QWW' encoding case in Ada enums Tom Tromey
  2022-02-28 17:48 ` [pushed 2/2] Handle multi-byte bracket sequences in Ada lexer Tom Tromey
  0 siblings, 2 replies; 3+ messages in thread
From: Tom Tromey @ 2022-02-28 17:48 UTC (permalink / raw)
  To: gdb-patches

Ada has an unusual feature whereby a character literal can appear in
an enum.  gdb didn't correctly handle these when wide-wide characters
were in use.  This short series fixes the problems here.

Because these are Ada-specific and have already been approved
internally, I am checking them in.

Tom



^ permalink raw reply	[flat|nested] 3+ messages in thread

* [pushed 1/2] Handle 'QWW' encoding case in Ada enums
  2022-02-28 17:48 [pushed 0/2] Two fixes for Ada character enum literals Tom Tromey
@ 2022-02-28 17:48 ` Tom Tromey
  2022-02-28 17:48 ` [pushed 2/2] Handle multi-byte bracket sequences in Ada lexer Tom Tromey
  1 sibling, 0 replies; 3+ messages in thread
From: Tom Tromey @ 2022-02-28 17:48 UTC (permalink / raw)
  To: gdb-patches; +Cc: Tom Tromey

In Ada, an enum can contain character literals.  GNAT encodes these
values in a special way.  For example, the Unicode character U+0178
would be represented as 'QW0178' in the DWARF:

 <3><112f>: Abbrev Number: 2 (DW_TAG_enumerator)
    <1130>   DW_AT_name        : (indirect string, offset: 0x19ff): QW0178
    <1134>   DW_AT_const_value : 2

gdb handles this reasonably well, but failed to handle the 'QWW'
encoding, which is used for characters outside the base plane.

Also, while working on this, I noticed that gdb will print the decimal
value for an enum character constant:

    (gdb) print Char_X
    $2 = 1 'x'

This is a nice feature, IMO, because in this situation the 'x' enum
constant does not have its usual decimal value -- it has the value
that's assigned based on the enumeration type.

However, gdb did not do this when it decided to print the constant
using the bracket notation:

    (gdb) print Char_Thorn
    $3 = ["de"]

This patch changes gdb to print the decimal value here as well, and to
put the bracket notation in single quotes -- otherwise gdb will be
printing something that it can't then read.  Now it looks like:

    (gdb) print Char_Thorn
    $3 = 4 '["de"]'

Note that gdb can't read longer bracket notations, like the other ones
printed in this test case:

    (gdb) print Char_King
    $4 = 3 '["01fa00"]'

While I think this is a bug, I plan to fix it separately.

Finally, in the new test case, the copyright dates are chosen this way
because this all started as a copy of an existing test.
---
 gdb/ada-lang.c                                | 14 ++++--
 gdb/testsuite/gdb.ada/char_enum_unicode.exp   | 43 +++++++++++++++++++
 .../gdb.ada/char_enum_unicode/foo.adb         | 30 +++++++++++++
 .../gdb.ada/char_enum_unicode/pck.adb         | 21 +++++++++
 .../gdb.ada/char_enum_unicode/pck.ads         | 20 +++++++++
 5 files changed, 125 insertions(+), 3 deletions(-)
 create mode 100644 gdb/testsuite/gdb.ada/char_enum_unicode.exp
 create mode 100644 gdb/testsuite/gdb.ada/char_enum_unicode/foo.adb
 create mode 100644 gdb/testsuite/gdb.ada/char_enum_unicode/pck.adb
 create mode 100644 gdb/testsuite/gdb.ada/char_enum_unicode/pck.ads

diff --git a/gdb/ada-lang.c b/gdb/ada-lang.c
index d2f620cbb04..f1d59d2aadb 100644
--- a/gdb/ada-lang.c
+++ b/gdb/ada-lang.c
@@ -8786,7 +8786,13 @@ ada_enum_name (const char *name)
 
       if (name[1] == 'U' || name[1] == 'W')
 	{
-	  if (sscanf (name + 2, "%x", &v) != 1)
+	  int offset = 2;
+	  if (name[1] == 'W' && name[2] == 'W')
+	    {
+	      /* Also handle the QWW case.  */
+	      ++offset;
+	    }
+	  if (sscanf (name + offset, "%x", &v) != 1)
 	    return name;
 	}
       else if (((name[1] >= '0' && name[1] <= '9')
@@ -8802,9 +8808,11 @@ ada_enum_name (const char *name)
       if (isascii (v) && isprint (v))
 	storage = string_printf ("'%c'", v);
       else if (name[1] == 'U')
-	storage = string_printf ("[\"%02x\"]", v);
+	storage = string_printf ("'[\"%02x\"]'", v);
+      else if (name[2] != 'W')
+	storage = string_printf ("'[\"%04x\"]'", v);
       else
-	storage = string_printf ("[\"%04x\"]", v);
+	storage = string_printf ("'[\"%06x\"]'", v);
 
       return storage.c_str ();
     }
diff --git a/gdb/testsuite/gdb.ada/char_enum_unicode.exp b/gdb/testsuite/gdb.ada/char_enum_unicode.exp
new file mode 100644
index 00000000000..aa8136054e7
--- /dev/null
+++ b/gdb/testsuite/gdb.ada/char_enum_unicode.exp
@@ -0,0 +1,43 @@
+# Copyright 2011-2022 Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+load_lib "ada.exp"
+
+if { [skip_ada_tests] } { return -1 }
+
+standard_ada_testfile foo
+
+set flags [list debug additional_flags=-gnatW8]
+if {[gdb_compile_ada "${srcfile}" "${binfile}" executable $flags] != "" } {
+  return -1
+}
+
+clean_restart ${testfile}
+
+set bp_location [gdb_get_line_number "STOP" ${testdir}/foo.adb]
+runto "foo.adb:$bp_location"
+
+set y "'\\\[\"0178\"\\\]'"
+set king "'\\\[\"01fa00\"\\\]'"
+set thorn "'\\\[\"de\"\\\]'"
+
+gdb_test "ptype Char_Enum_Type" "type = \\(alpha, 'x', $y, $king, $thorn\\)"
+gdb_test "print Char_Alpha" " = alpha"
+gdb_test "print Char_X" " = 1 'x'"
+gdb_test "print Char_Y" " = 2 $y"
+gdb_test "print Char_King" " = 3 $king"
+gdb_test "print Char_Thorn" " = 4 $thorn"
+gdb_test "print Char_Enum_Type'('x')" " = 1 'x'"
+gdb_test "print Char_Enum_Type'('\[\"de\"\]')" " = 4 $thorn"
diff --git a/gdb/testsuite/gdb.ada/char_enum_unicode/foo.adb b/gdb/testsuite/gdb.ada/char_enum_unicode/foo.adb
new file mode 100644
index 00000000000..09756870320
--- /dev/null
+++ b/gdb/testsuite/gdb.ada/char_enum_unicode/foo.adb
@@ -0,0 +1,30 @@
+--  Copyright 2011-2022 Free Software Foundation, Inc.
+--
+--  This program is free software; you can redistribute it and/or modify
+--  it under the terms of the GNU General Public License as published by
+--  the Free Software Foundation; either version 3 of the License, or
+--  (at your option) any later version.
+--
+--  This program is distributed in the hope that it will be useful,
+--  but WITHOUT ANY WARRANTY; without even the implied warranty of
+--  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+--  GNU General Public License for more details.
+--
+--  You should have received a copy of the GNU General Public License
+--  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+with Pck; use Pck;
+
+procedure Foo is
+   type Char_Enum_Type is (alpha, 'x', 'Ÿ', '🨀', 'Þ');
+   Char_Alpha : Char_Enum_Type := alpha;
+   Char_X : Char_Enum_Type := 'x';
+   Char_Thorn : Char_Enum_Type := 'Þ';
+   Char_Y : Char_Enum_Type := 'Ÿ';
+   Char_King : Char_Enum_Type := '🨀';
+begin
+   Do_Nothing (Char_Alpha'Address);  -- STOP
+   Do_Nothing (Char_X'Address);
+   Do_Nothing (Char_Y'Address);
+   Do_Nothing (Char_King'Address);
+end Foo;
diff --git a/gdb/testsuite/gdb.ada/char_enum_unicode/pck.adb b/gdb/testsuite/gdb.ada/char_enum_unicode/pck.adb
new file mode 100644
index 00000000000..d9308b6c04d
--- /dev/null
+++ b/gdb/testsuite/gdb.ada/char_enum_unicode/pck.adb
@@ -0,0 +1,21 @@
+--  Copyright 2011-2022 Free Software Foundation, Inc.
+--
+--  This program is free software; you can redistribute it and/or modify
+--  it under the terms of the GNU General Public License as published by
+--  the Free Software Foundation; either version 3 of the License, or
+--  (at your option) any later version.
+--
+--  This program is distributed in the hope that it will be useful,
+--  but WITHOUT ANY WARRANTY; without even the implied warranty of
+--  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+--  GNU General Public License for more details.
+--
+--  You should have received a copy of the GNU General Public License
+--  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+package body Pck is
+   procedure Do_Nothing (A : System.Address) is
+   begin
+      null;
+   end Do_Nothing;
+end Pck;
diff --git a/gdb/testsuite/gdb.ada/char_enum_unicode/pck.ads b/gdb/testsuite/gdb.ada/char_enum_unicode/pck.ads
new file mode 100644
index 00000000000..4e937b8a5a2
--- /dev/null
+++ b/gdb/testsuite/gdb.ada/char_enum_unicode/pck.ads
@@ -0,0 +1,20 @@
+--  Copyright 2011-2022 Free Software Foundation, Inc.
+--
+--  This program is free software; you can redistribute it and/or modify
+--  it under the terms of the GNU General Public License as published by
+--  the Free Software Foundation; either version 3 of the License, or
+--  (at your option) any later version.
+--
+--  This program is distributed in the hope that it will be useful,
+--  but WITHOUT ANY WARRANTY; without even the implied warranty of
+--  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+--  GNU General Public License for more details.
+--
+--  You should have received a copy of the GNU General Public License
+--  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+with System;
+
+package Pck is
+   procedure Do_Nothing (A : System.Address);
+end Pck;
-- 
2.31.1


^ permalink raw reply	[flat|nested] 3+ messages in thread

* [pushed 2/2] Handle multi-byte bracket sequences in Ada lexer
  2022-02-28 17:48 [pushed 0/2] Two fixes for Ada character enum literals Tom Tromey
  2022-02-28 17:48 ` [pushed 1/2] Handle 'QWW' encoding case in Ada enums Tom Tromey
@ 2022-02-28 17:48 ` Tom Tromey
  1 sibling, 0 replies; 3+ messages in thread
From: Tom Tromey @ 2022-02-28 17:48 UTC (permalink / raw)
  To: gdb-patches; +Cc: Tom Tromey

As noted in an earlier patch, the Ada lexer does not handle multi-byte
bracket sequences.  This patch adds support for these for character
literals.  gdb does not generally seem to handle the Ada wide string
types, so for the time being these continue to be excluded -- but an
explicit error is added to make this more clear.
---
 gdb/ada-exp.y                               | 16 ++++++++++----
 gdb/ada-lang.c                              | 14 ++++++++----
 gdb/ada-lex.l                               | 24 ++++++++++++---------
 gdb/ada-valprint.c                          |  6 +++++-
 gdb/testsuite/gdb.ada/char_enum_unicode.exp |  9 ++++++++
 gdb/testsuite/gdb.ada/widewide.exp          |  4 ++--
 6 files changed, 52 insertions(+), 21 deletions(-)

diff --git a/gdb/ada-exp.y b/gdb/ada-exp.y
index 916b8ef94fe..d3fce8d05e3 100644
--- a/gdb/ada-exp.y
+++ b/gdb/ada-exp.y
@@ -98,7 +98,7 @@ static struct type *type_long_long (struct parser_state *);
 
 static struct type *type_long_double (struct parser_state *);
 
-static struct type *type_char (struct parser_state *);
+static struct type *type_for_char (struct parser_state *, ULONGEST);
 
 static struct type *type_boolean (struct parser_state *);
 
@@ -1727,10 +1727,18 @@ type_long_double (struct parser_state *par_state)
 }
 
 static struct type *
-type_char (struct parser_state *par_state)
+type_for_char (struct parser_state *par_state, ULONGEST value)
 {
-  return language_string_char_type (par_state->language (),
-				    par_state->gdbarch ());
+  if (value <= 0xff)
+    return language_string_char_type (par_state->language (),
+				      par_state->gdbarch ());
+  else if (value <= 0xffff)
+    return language_lookup_primitive_type (par_state->language (),
+					   par_state->gdbarch (),
+					   "wide_character");
+  return language_lookup_primitive_type (par_state->language (),
+					 par_state->gdbarch (),
+					 "wide_wide_character");
 }
 
 static struct type *
diff --git a/gdb/ada-lang.c b/gdb/ada-lang.c
index f1d59d2aadb..d44b0906e6d 100644
--- a/gdb/ada-lang.c
+++ b/gdb/ada-lang.c
@@ -10187,7 +10187,7 @@ ada_resolvable::replace (operation_up &&owner,
   return std::move (owner);
 }
 
-/* Convert the character literal whose ASCII value would be VAL to the
+/* Convert the character literal whose value would be VAL to the
    appropriate value of type TYPE, if there is a translation.
    Otherwise return VAL.  Hence, in an enumeration type ('A', 'B'),
    the literal 'A' (VAL == 65), returns 0.  */
@@ -10195,7 +10195,7 @@ ada_resolvable::replace (operation_up &&owner,
 static LONGEST
 convert_char_literal (struct type *type, LONGEST val)
 {
-  char name[7];
+  char name[12];
   int f;
 
   if (type == NULL)
@@ -10206,8 +10206,12 @@ convert_char_literal (struct type *type, LONGEST val)
 
   if ((val >= 'a' && val <= 'z') || (val >= '0' && val <= '9'))
     xsnprintf (name, sizeof (name), "Q%c", (int) val);
+  else if (val >= 0 && val < 256)
+    xsnprintf (name, sizeof (name), "QU%02x", (unsigned) val);
+  else if (val >= 0 && val < 0x10000)
+    xsnprintf (name, sizeof (name), "QW%04x", (unsigned) val);
   else
-    xsnprintf (name, sizeof (name), "QU%02x", (int) val);
+    xsnprintf (name, sizeof (name), "QWW%08lx", (unsigned long) val);
   size_t len = strlen (name);
   for (f = 0; f < type->num_fields (); f += 1)
     {
@@ -13005,9 +13009,11 @@ class ada_language : public language_defn
     add (arch_integer_type (gdbarch, gdbarch_short_bit (gdbarch),
 			    0, "short_integer"));
     struct type *char_type = arch_character_type (gdbarch, TARGET_CHAR_BIT,
-						  0, "character");
+						  1, "character");
     lai->set_string_char_type (char_type);
     add (char_type);
+    add (arch_character_type (gdbarch, 16, 1, "wide_character"));
+    add (arch_character_type (gdbarch, 32, 1, "wide_wide_character"));
     add (arch_float_type (gdbarch, gdbarch_float_bit (gdbarch),
 			  "float", gdbarch_float_format (gdbarch)));
     add (arch_float_type (gdbarch, gdbarch_double_bit (gdbarch),
diff --git a/gdb/ada-lex.l b/gdb/ada-lex.l
index d64496a3775..f61efba81a9 100644
--- a/gdb/ada-lex.l
+++ b/gdb/ada-lex.l
@@ -1,4 +1,4 @@
-/* FLEX lexer for Ada expressions, for GDB.
+/* FLEX lexer for Ada expressions, for GDB. -*- c++ -*-
    Copyright (C) 1994-2022 Free Software Foundation, Inc.
 
    This file is part of GDB.
@@ -150,20 +150,22 @@ static int paren_depth;
 		}
 
 <INITIAL>"'"({GRAPHIC}|\")"'" {
-		   yylval.typed_val.type = type_char (pstate);
 		   yylval.typed_val.val = yytext[1];
+		   yylval.typed_val.type = type_for_char (pstate, yytext[1]);
 		   return CHARLIT;
 		}
 
-<INITIAL>"'[\""{HEXDIG}{2}"\"]'"   {
-                   int v;
-                   yylval.typed_val.type = type_char (pstate);
-		   sscanf (yytext+3, "%2x", &v);
+<INITIAL>"'[\""{HEXDIG}{2,}"\"]'"   {
+                   ULONGEST v = strtoulst (yytext+3, nullptr, 16);
 		   yylval.typed_val.val = v;
+                   yylval.typed_val.type = type_for_char (pstate, v);
 		   return CHARLIT;
 		}
 
-\"({GRAPHIC}|"[\""({HEXDIG}{2}|\")"\"]")*\"   {
+	/* Note that we don't handle bracket sequences of more than 2
+	   digits here.  Currently there's no support for wide or
+	   wide-wide strings.  */
+\"({GRAPHIC}|"[\""({HEXDIG}{2,}|\")"\"]")*\"   {
 	           yylval.sval = processString (yytext+1, yyleng-2);
 		   return STRING;
 		}
@@ -513,10 +515,12 @@ processString (const char *text, int len)
 	     }
            else
 	     {
-               int chr;
-	       sscanf (p+2, "%2x", &chr);
+	       const char *end;
+	       ULONGEST chr = strtoulst (p + 2, &end, 16);
+	       if (chr > 0xff)
+		 error (_("wide strings are not yet supported"));
 	       *q = (char) chr;
-	       p += 5;
+	       p = end + 1;
 	     }
          }
        else
diff --git a/gdb/ada-valprint.c b/gdb/ada-valprint.c
index a59c392bef4..bf95719f040 100644
--- a/gdb/ada-valprint.c
+++ b/gdb/ada-valprint.c
@@ -277,7 +277,11 @@ ada_emit_char (int c, struct type *type, struct ui_file *stream,
 	fprintf_filtered (stream, "%c", c);
     }
   else
-    fprintf_filtered (stream, "[\"%0*x\"]", type_len * 2, c);
+    {
+      /* Follow GNAT's lead here and only use 6 digits for
+	 wide_wide_character.  */
+      fprintf_filtered (stream, "[\"%0*x\"]", std::min (6, type_len * 2), c);
+    }
 }
 
 /* Character #I of STRING, given that TYPE_LEN is the size in bytes
diff --git a/gdb/testsuite/gdb.ada/char_enum_unicode.exp b/gdb/testsuite/gdb.ada/char_enum_unicode.exp
index aa8136054e7..fad239983a3 100644
--- a/gdb/testsuite/gdb.ada/char_enum_unicode.exp
+++ b/gdb/testsuite/gdb.ada/char_enum_unicode.exp
@@ -40,4 +40,13 @@ gdb_test "print Char_Y" " = 2 $y"
 gdb_test "print Char_King" " = 3 $king"
 gdb_test "print Char_Thorn" " = 4 $thorn"
 gdb_test "print Char_Enum_Type'('x')" " = 1 'x'"
+gdb_test "print Char_Enum_Type'('\[\"0178\"\]')" " = 2 $y"
+gdb_test "print Char_Enum_Type'('\[\"1fa00\"\]')" " = 3 $king"
 gdb_test "print Char_Enum_Type'('\[\"de\"\]')" " = 4 $thorn"
+
+gdb_test "print '\[\"0178\"\]'" " = 376 $y"
+gdb_test "print '\[\"01fa00\"\]'" " = 129536 $king"
+gdb_test "print '\[\"de\"\]'" " = 222 $thorn"
+
+gdb_test "print \"\[\"0178\"\]\"" "wide strings are not yet supported"
+gdb_test "print \"\[\"de\"\]\"" " = \"\\\[\"de\"\\\]\""
diff --git a/gdb/testsuite/gdb.ada/widewide.exp b/gdb/testsuite/gdb.ada/widewide.exp
index 6fabb5bb08c..c0268f9c99b 100644
--- a/gdb/testsuite/gdb.ada/widewide.exp
+++ b/gdb/testsuite/gdb.ada/widewide.exp
@@ -33,9 +33,9 @@ if ![runto "foo.adb:$bp_location" ] then {
 
 gdb_test "print some_easy" "= 74 'J'"
 
-gdb_test "print some_larger" "= 48879 '\\\[\"0000beef\"\\\]'"
+gdb_test "print some_larger" "= 48879 '\\\[\"00beef\"\\\]'"
 
-gdb_test "print some_big" "= 14335727 '\\\[\"00dabeef\"\\\]'"
+gdb_test "print some_big" "= 14335727 '\\\[\"dabeef\"\\\]'"
 
 gdb_test "print my_wws" "= \" helo\""
 
-- 
2.31.1


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2022-02-28 17:49 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-02-28 17:48 [pushed 0/2] Two fixes for Ada character enum literals Tom Tromey
2022-02-28 17:48 ` [pushed 1/2] Handle 'QWW' encoding case in Ada enums Tom Tromey
2022-02-28 17:48 ` [pushed 2/2] Handle multi-byte bracket sequences in Ada lexer Tom Tromey

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).