From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <vries@sourceware.org>
Received: by sourceware.org (Postfix, from userid 2205)
 id B749F3838024; Sat,  4 Jun 2022 11:17:43 +0000 (GMT)
DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org B749F3838024
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
From: Tom de Vries <vries@sourceware.org>
To: gdb-cvs@sourceware.org
Subject: [binutils-gdb] [gdb/c] Fix type of 2147483648 and literal truncation
X-Act-Checkin: binutils-gdb
X-Git-Author: Tom de Vries <tdevries@suse.de>
X-Git-Refname: refs/heads/master
X-Git-Oldrev: 1b4633f812b329863a523c4d798c2b55417b5144
X-Git-Newrev: 1d8c0dfae79a5183e9e3311fb867afd679bc8e84
Message-Id: <20220604111743.B749F3838024@sourceware.org>
Date: Sat,  4 Jun 2022 11:17:43 +0000 (GMT)
X-BeenThere: gdb-cvs@sourceware.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Gdb-cvs mailing list <gdb-cvs.sourceware.org>
List-Unsubscribe: <https://sourceware.org/mailman/options/gdb-cvs>,
 <mailto:gdb-cvs-request@sourceware.org?subject=unsubscribe>
List-Archive: <https://sourceware.org/pipermail/gdb-cvs/>
List-Help: <mailto:gdb-cvs-request@sourceware.org?subject=help>
List-Subscribe: <https://sourceware.org/mailman/listinfo/gdb-cvs>,
 <mailto:gdb-cvs-request@sourceware.org?subject=subscribe>
X-List-Received-Date: Sat, 04 Jun 2022 11:17:43 -0000

https://sourceware.org/git/gitweb.cgi?p=3Dbinutils-gdb.git;h=3D1d8c0dfae79a=
5183e9e3311fb867afd679bc8e84

commit 1d8c0dfae79a5183e9e3311fb867afd679bc8e84
Author: Tom de Vries <tdevries@suse.de>
Date:   Sat Jun 4 13:17:33 2022 +0200

    [gdb/c] Fix type of 2147483648 and literal truncation
   =20
    [ Assuming arch i386:x86-64, sizeof (int) =3D=3D 4,
    sizeof (long) =3D=3D sizeof (long long) =3D=3D 8. ]
   =20
    Currently we have (decimal for 0x80000000):
    ...
    (gdb) ptype 2147483648
    type =3D unsigned int
    ...
   =20
    According to C language rules, unsigned types cannot be used for decimal
    constants, so the type should be long instead (reported in PR16377).
   =20
    Fix this by making sure the type of 2147483648 is long.
   =20
    The next interesting case is (decimal for 0x8000000000000000):
    ...
    (gdb) ptype 9223372036854775808
    type =3D unsigned long
    ...
   =20
    According to the same rules, unsigned long is incorrect.
   =20
    Current gcc uses __int128 as type, which is allowed, but we don't have =
that
    available in gdb, so the strict response here would be erroring out with
    overflow.
   =20
    Older gcc without __int128 support, as well as clang use an unsigned ty=
pe, but with
    a warning.  Interestingly, clang uses "unsigned long long" while gcc us=
es
    "unsigned long", which seems the better choice.
   =20
    Given that the compilers allow this as a convience, do the same in gdb
    and keep type "unsigned long", and make this explicit in parser and tes=
t-case.
   =20
    Furthermore, make sure we error out on overflow instead of truncating i=
n all
    cases.
   =20
    Tested on x86_64-linux with --enable-targets=3Dall.
   =20
    Bug: https://sourceware.org/bugzilla/show_bug.cgi?id=3D16377

Diff:
---
 gdb/c-exp.y                             | 111 +++++++++++++---------------=
----
 gdb/parse.c                             |  37 +++++++++++
 gdb/parser-defs.h                       |   2 +
 gdb/testsuite/gdb.base/parse_number.exp |  56 +++++++++++++---
 4 files changed, 130 insertions(+), 76 deletions(-)

diff --git a/gdb/c-exp.y b/gdb/c-exp.y
index 72f8dd32d93..61a61fcba09 100644
--- a/gdb/c-exp.y
+++ b/gdb/c-exp.y
@@ -1929,7 +1929,6 @@ parse_number (struct parser_state *par_state,
 {
   ULONGEST n =3D 0;
   ULONGEST prevn =3D 0;
-  ULONGEST un;
=20
   int i =3D 0;
   int c;
@@ -1945,9 +1944,6 @@ parse_number (struct parser_state *par_state,
   /* We have found a "L" or "U" (or "i") suffix.  */
   int found_suffix =3D 0;
=20
-  ULONGEST high_bit;
-  struct type *signed_type;
-  struct type *unsigned_type;
   char *p;
=20
   p =3D (char *) alloca (len);
@@ -2095,18 +2091,12 @@ parse_number (struct parser_state *par_state,
       if (i >=3D base)
 	return ERROR;		/* Invalid digit in this base */
=20
-      /* Portably test for overflow (only works for nonzero values, so make
-	 a second check for zero).  FIXME: Can't we just make n and prevn
-	 unsigned and avoid this?  */
-      if (c !=3D 'l' && c !=3D 'u' && c !=3D 'i' && (prevn >=3D n) && n !=
=3D 0)
-	unsigned_p =3D 1;		/* Try something unsigned */
-
-      /* Portably test for unsigned overflow.
-	 FIXME: This check is wrong; for example it doesn't find overflow
-	 on 0x123456789 when LONGEST is 32 bits.  */
-      if (c !=3D 'l' && c !=3D 'u' && c !=3D 'i' && n !=3D 0)
-	{=09
-	  if (unsigned_p && prevn >=3D n)
+      if (c !=3D 'l' && c !=3D 'u' && c !=3D 'i')
+	{
+	  /* Test for overflow.  */
+	  if (prevn =3D=3D 0 && n =3D=3D 0)
+	    ;
+	  else if (prevn >=3D n)
 	    error (_("Numeric constant too large."));
 	}
       prevn =3D n;
@@ -2123,58 +2113,45 @@ parse_number (struct parser_state *par_state,
      or gdbarch_long_bit will be that big, sometimes not.  To deal with
      the case where it is we just always shift the value more than
      once, with fewer bits each time.  */
-
-  un =3D n >> 2;
-  if (long_p =3D=3D 0
-      && (un >> (gdbarch_int_bit (par_state->gdbarch ()) - 2)) =3D=3D 0)
-    {
-      high_bit
-	=3D ((ULONGEST)1) << (gdbarch_int_bit (par_state->gdbarch ()) - 1);
-
-      /* A large decimal (not hex or octal) constant (between INT_MAX
-	 and UINT_MAX) is a long or unsigned long, according to ANSI,
-	 never an unsigned int, but this code treats it as unsigned
-	 int.  This probably should be fixed.  GCC gives a warning on
-	 such constants.  */
-
-      unsigned_type =3D parse_type (par_state)->builtin_unsigned_int;
-      signed_type =3D parse_type (par_state)->builtin_int;
-    }
-  else if (long_p <=3D 1
-	   && (un >> (gdbarch_long_bit (par_state->gdbarch ()) - 2)) =3D=3D 0)
-    {
-      high_bit
-	=3D ((ULONGEST)1) << (gdbarch_long_bit (par_state->gdbarch ()) - 1);
-      unsigned_type =3D parse_type (par_state)->builtin_unsigned_long;
-      signed_type =3D parse_type (par_state)->builtin_long;
-    }
+  int int_bits =3D gdbarch_int_bit (par_state->gdbarch ());
+  int long_bits =3D gdbarch_long_bit (par_state->gdbarch ());
+  int long_long_bits =3D gdbarch_long_long_bit (par_state->gdbarch ());
+  bool have_signed
+    /* No 'u' suffix.  */
+    =3D !unsigned_p;
+  bool have_unsigned
+    =3D ((/* 'u' suffix.  */
+	unsigned_p)
+       || (/* Not a decimal.  */
+	   base !=3D 10)
+       || (/* Allowed as a convenience, in case decimal doesn't fit in lar=
gest
+	      signed type.  */
+	   !fits_in_type (1, n, long_long_bits, true)));
+  bool have_int
+    /* No 'l' or 'll' suffix.  */
+    =3D long_p =3D=3D 0;
+  bool have_long
+    /* No 'll' suffix.  */
+    =3D long_p <=3D 1;
+  if (have_int && have_signed && fits_in_type (1, n, int_bits, true))
+    putithere->typed_val_int.type =3D parse_type (par_state)->builtin_int;
+  else if (have_int && have_unsigned && fits_in_type (1, n, int_bits, fals=
e))
+    putithere->typed_val_int.type
+      =3D parse_type (par_state)->builtin_unsigned_int;
+  else if (have_long && have_signed && fits_in_type (1, n, long_bits, true=
))
+    putithere->typed_val_int.type =3D parse_type (par_state)->builtin_long;
+  else if (have_long && have_unsigned && fits_in_type (1, n, long_bits, fa=
lse))
+    putithere->typed_val_int.type
+      =3D parse_type (par_state)->builtin_unsigned_long;
+  else if (have_signed && fits_in_type (1, n, long_long_bits, true))
+    putithere->typed_val_int.type
+      =3D parse_type (par_state)->builtin_long_long;
+  else if (have_unsigned && fits_in_type (1, n, long_long_bits, false))
+    putithere->typed_val_int.type
+      =3D parse_type (par_state)->builtin_unsigned_long_long;
   else
-    {
-      int shift;
-      if (sizeof (ULONGEST) * HOST_CHAR_BIT
-	  < gdbarch_long_long_bit (par_state->gdbarch ()))
-	/* A long long does not fit in a LONGEST.  */
-	shift =3D (sizeof (ULONGEST) * HOST_CHAR_BIT - 1);
-      else
-	shift =3D (gdbarch_long_long_bit (par_state->gdbarch ()) - 1);
-      high_bit =3D (ULONGEST) 1 << shift;
-      unsigned_type =3D parse_type (par_state)->builtin_unsigned_long_long;
-      signed_type =3D parse_type (par_state)->builtin_long_long;
-    }
-
-   putithere->typed_val_int.val =3D n;
-
-   /* If the high bit of the worked out type is set then this number
-      has to be unsigned. */
-
-   if (unsigned_p || (n & high_bit))
-     {
-       putithere->typed_val_int.type =3D unsigned_type;
-     }
-   else
-     {
-       putithere->typed_val_int.type =3D signed_type;
-     }
+    error (_("Numeric constant too large."));
+  putithere->typed_val_int.val =3D n;
=20
    if (imaginary_p)
      putithere->typed_val_int.type
diff --git a/gdb/parse.c b/gdb/parse.c
index fb308be0d7c..aa12f6fe012 100644
--- a/gdb/parse.c
+++ b/gdb/parse.c
@@ -611,6 +611,43 @@ parse_float (const char *p, int len,
 {
   return target_float_from_string (data, type, std::string (p, len));
 }
+
+/* Return true if the number N_SIGN * N fits in a type with TYPE_BITS and
+   TYPE_SIGNED_P.  N_SIGNED is either 1 or -1.  */
+
+bool
+fits_in_type (int n_sign, ULONGEST n, int type_bits, bool type_signed_p)
+{
+  /* Normalize -0.  */
+  if (n =3D=3D 0 && n_sign =3D=3D -1)
+    n_sign =3D 1;
+
+  if (n_sign =3D=3D -1 && !type_signed_p)
+    /* Can't fit a negative number in an unsigned type.  */
+    return false;
+
+  if (type_bits > sizeof (ULONGEST) * 8)
+    return true;
+
+  ULONGEST smax =3D (ULONGEST)1 << (type_bits - 1);
+  if (n_sign =3D=3D -1)
+    {
+      /* Negative number, signed type.  */
+      return (n <=3D smax);
+    }
+  else if (n_sign =3D=3D 1 && type_signed_p)
+    {
+      /* Positive number, signed type.  */
+      return (n < smax);
+    }
+  else if (n_sign =3D=3D 1 && !type_signed_p)
+    {
+      /* Positive number, unsigned type.  */
+      return ((n >> 1) >> (type_bits - 1)) =3D=3D 0;
+    }
+  else
+    gdb_assert_not_reached ("");
+}
 =0C
 /* This function avoids direct calls to fprintf=20
    in the parser generated debug code.  */
diff --git a/gdb/parser-defs.h b/gdb/parser-defs.h
index 3be7d6c839f..ade3039ebb0 100644
--- a/gdb/parser-defs.h
+++ b/gdb/parser-defs.h
@@ -412,6 +412,8 @@ extern std::string copy_name (struct stoken);
=20
 extern bool parse_float (const char *p, int len,
 			 const struct type *type, gdb_byte *data);
+extern bool fits_in_type (int n_sign, ULONGEST n, int type_bits,
+			  bool type_signed_p);
 =0C
=20
 /* Function used to avoid direct calls to fprintf
diff --git a/gdb/testsuite/gdb.base/parse_number.exp b/gdb/testsuite/gdb.ba=
se/parse_number.exp
index 7c259e0a8a0..638ea342384 100644
--- a/gdb/testsuite/gdb.base/parse_number.exp
+++ b/gdb/testsuite/gdb.base/parse_number.exp
@@ -68,6 +68,22 @@ proc fits_in_type { n type_bits type_signedness } {
     }
 }
=20
+# Return 1 if LANG is a c-like language, in the sense that it uses the same
+# parser.
+
+proc c_like { lang } {
+    set res 0
+    switch $lang {
+	c
+	- c++
+	- asm
+	- objective-c
+	- opencl
+	- minimal {set res 1}
+    }
+    return $res
+}
+
 # Parse number N for LANG, and return a list of expected type and value.
=20
 proc parse_number { lang n } {
@@ -165,26 +181,48 @@ proc parse_number { lang n } {
 			($any|$re_overflow)]
 	}
     } else {
-	# This is wrong for c-like languages.  For the decimal case, we
-	# shouldn't use unsigned.
-	# See PR 16377.
+	if { [c_like $lang] } {
+	    if { $hex_p } {
+		# C Hex.
+		set have_unsigned 1
+	    } else {
+		# C Decimal.  Unsigned not allowed according.
+		if { [fits_in_type $n $long_long_bits s] } {
+		    # Fits in largest signed type.
+		    set have_unsigned 0
+		} else {
+		    # Doesn't fit in largest signed type, so ill-formed, but
+		    # allow unsigned as a convenience, as compilers do (though
+		    # with a warning).
+		    set have_unsigned 1
+		}
+	    }
+	} else {
+	    # Non-C.
+	    set have_unsigned 1
+	}
+
 	if { [fits_in_type $n $int_bits s] } {
 	    return [list int $n]
-	} elseif { [fits_in_type $n $int_bits u] } {
+	} elseif { $have_unsigned && [fits_in_type $n $int_bits u] } {
 	    return [list "unsigned int" $n]
 	} elseif { [fits_in_type $n $long_bits s] } {
 	    return [list long $n]
-	} elseif { [fits_in_type $n $long_bits u] } {
+	} elseif { $have_unsigned && [fits_in_type $n $long_bits u] } {
 	    return [list "unsigned long" $n]
 	} elseif { [fits_in_type $n $long_long_bits s] } {
 	    return [list "long long" $n]
-	} elseif { [fits_in_type $n $long_long_bits u] } {
+	} elseif { $have_unsigned && [fits_in_type $n $long_long_bits u] } {
 	    return [list "unsigned long long" $n]
 	} else {
 	    # Overflow.
-	    # Some truncated value or re_overflow, should be re_overflow.
-	    return [list "((unsigned )?(int|long)|$re_overflow)" \
-			($any|$re_overflow)]
+	    if { [c_like $lang] } {
+		return [list $re_overflow $re_overflow]
+	    } else {
+		# Some truncated value or re_overflow, should be re_overflow.
+		return [list "((unsigned )?(int|long)|$re_overflow)" \
+			    ($any|$re_overflow)]
+	    }
 	}
     }