From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <tdevries@suse.de>
Received: from smtp-out2.suse.de (smtp-out2.suse.de [195.135.220.29])
 by sourceware.org (Postfix) with ESMTPS id 8CC90384F010
 for <gdb-patches@sourceware.org>; Mon, 23 May 2022 11:05:19 +0000 (GMT)
DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 8CC90384F010
Received: from imap2.suse-dmz.suse.de (imap2.suse-dmz.suse.de [192.168.254.74])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature ECDSA (P-521) server-digest SHA512)
 (No client certificate requested)
 by smtp-out2.suse.de (Postfix) with ESMTPS id C0D671F901
 for <gdb-patches@sourceware.org>; Mon, 23 May 2022 11:05:18 +0000 (UTC)
Received: from imap2.suse-dmz.suse.de (imap2.suse-dmz.suse.de [192.168.254.74])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature ECDSA (P-521) server-digest SHA512)
 (No client certificate requested)
 by imap2.suse-dmz.suse.de (Postfix) with ESMTPS id A7B1D13B2A
 for <gdb-patches@sourceware.org>; Mon, 23 May 2022 11:05:18 +0000 (UTC)
Received: from dovecot-director2.suse.de ([192.168.254.65])
 by imap2.suse-dmz.suse.de with ESMTPSA id ECrLJ25qi2JxFQAAMHmgww
 (envelope-from <tdevries@suse.de>)
 for <gdb-patches@sourceware.org>; Mon, 23 May 2022 11:05:18 +0000
From: Tom de Vries <tdevries@suse.de>
To: gdb-patches@sourceware.org
Subject: [PATCH 2/8] [gdb/c] Fix type of 2147483648 and literal truncation
Date: Mon, 23 May 2022 13:05:12 +0200
Message-Id: <20220523110518.2447-2-tdevries@suse.de>
X-Mailer: git-send-email 2.35.3
In-Reply-To: <20220523110518.2447-1-tdevries@suse.de>
References: <20220523110518.2447-1-tdevries@suse.de>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-Spam-Status: No, score=-11.3 required=5.0 tests=BAYES_00, DKIM_INVALID,
 DKIM_SIGNED, GIT_PATCH_0, KAM_DMARC_STATUS, SPF_HELO_NONE, SPF_PASS, TXREP,
 T_SCC_BODY_TEXT_LINE autolearn=ham autolearn_force=no version=3.4.6
X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on
 server2.sourceware.org
X-BeenThere: gdb-patches@sourceware.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Gdb-patches mailing list <gdb-patches.sourceware.org>
List-Unsubscribe: <https://sourceware.org/mailman/options/gdb-patches>,
 <mailto:gdb-patches-request@sourceware.org?subject=unsubscribe>
List-Archive: <https://sourceware.org/pipermail/gdb-patches/>
List-Post: <mailto:gdb-patches@sourceware.org>
List-Help: <mailto:gdb-patches-request@sourceware.org?subject=help>
List-Subscribe: <https://sourceware.org/mailman/listinfo/gdb-patches>,
 <mailto:gdb-patches-request@sourceware.org?subject=subscribe>
X-List-Received-Date: Mon, 23 May 2022 11:05:22 -0000

[ Assuming arch i386:x86-64, sizeof (int) == 4,
sizeof (long) == sizeof (long long) == 8. ]

Currently we have (decimal for 0x80000000):
...
(gdb) ptype 2147483648
type = unsigned int
...

According to C language rules, unsigned types cannot be used for decimal
constants, so the type should be long instead (reported in PR16377).

Fix this by making sure the type of 2147483648 is long.

The next interesting case is (decimal for 0x8000000000000000):
...
(gdb) ptype 9223372036854775808
type = unsigned long
...

According to the same rules, unsigned long is incorrect.

Current gcc uses __int128 as type, which is allowed, but we don't have that
available in gdb, so the strict response here would be erroring out with
overflow.

Older gcc without __int128 support, as well as clang use an unsigned type, but with
a warning.  Interestingly, clang uses "unsigned long long" while gcc uses
"unsigned long", which seems the better choice.

Given that the compilers allow this as a convience, do the same in gdb
and keep type "unsigned long", and make this explicit in parser and test-case.

Furthermore, make sure we error out on overflow instead of truncating in all
cases.

Tested on x86_64-linux with --enable-targets=all.

Bug: https://sourceware.org/bugzilla/show_bug.cgi?id=16377
---
 gdb/c-exp.y                             | 111 ++++++++++--------------
 gdb/parse.c                             |  37 ++++++++
 gdb/parser-defs.h                       |   2 +
 gdb/testsuite/gdb.base/parse_number.exp |  56 ++++++++++--
 4 files changed, 130 insertions(+), 76 deletions(-)

diff --git a/gdb/c-exp.y b/gdb/c-exp.y
index 72f8dd32d93..61a61fcba09 100644
--- a/gdb/c-exp.y
+++ b/gdb/c-exp.y
@@ -1929,7 +1929,6 @@ parse_number (struct parser_state *par_state,
 {
   ULONGEST n = 0;
   ULONGEST prevn = 0;
-  ULONGEST un;
 
   int i = 0;
   int c;
@@ -1945,9 +1944,6 @@ parse_number (struct parser_state *par_state,
   /* We have found a "L" or "U" (or "i") suffix.  */
   int found_suffix = 0;
 
-  ULONGEST high_bit;
-  struct type *signed_type;
-  struct type *unsigned_type;
   char *p;
 
   p = (char *) alloca (len);
@@ -2095,18 +2091,12 @@ parse_number (struct parser_state *par_state,
       if (i >= base)
 	return ERROR;		/* Invalid digit in this base */
 
-      /* Portably test for overflow (only works for nonzero values, so make
-	 a second check for zero).  FIXME: Can't we just make n and prevn
-	 unsigned and avoid this?  */
-      if (c != 'l' && c != 'u' && c != 'i' && (prevn >= n) && n != 0)
-	unsigned_p = 1;		/* Try something unsigned */
-
-      /* Portably test for unsigned overflow.
-	 FIXME: This check is wrong; for example it doesn't find overflow
-	 on 0x123456789 when LONGEST is 32 bits.  */
-      if (c != 'l' && c != 'u' && c != 'i' && n != 0)
-	{	
-	  if (unsigned_p && prevn >= n)
+      if (c != 'l' && c != 'u' && c != 'i')
+	{
+	  /* Test for overflow.  */
+	  if (prevn == 0 && n == 0)
+	    ;
+	  else if (prevn >= n)
 	    error (_("Numeric constant too large."));
 	}
       prevn = n;
@@ -2123,58 +2113,45 @@ parse_number (struct parser_state *par_state,
      or gdbarch_long_bit will be that big, sometimes not.  To deal with
      the case where it is we just always shift the value more than
      once, with fewer bits each time.  */
-
-  un = n >> 2;
-  if (long_p == 0
-      && (un >> (gdbarch_int_bit (par_state->gdbarch ()) - 2)) == 0)
-    {
-      high_bit
-	= ((ULONGEST)1) << (gdbarch_int_bit (par_state->gdbarch ()) - 1);
-
-      /* A large decimal (not hex or octal) constant (between INT_MAX
-	 and UINT_MAX) is a long or unsigned long, according to ANSI,
-	 never an unsigned int, but this code treats it as unsigned
-	 int.  This probably should be fixed.  GCC gives a warning on
-	 such constants.  */
-
-      unsigned_type = parse_type (par_state)->builtin_unsigned_int;
-      signed_type = parse_type (par_state)->builtin_int;
-    }
-  else if (long_p <= 1
-	   && (un >> (gdbarch_long_bit (par_state->gdbarch ()) - 2)) == 0)
-    {
-      high_bit
-	= ((ULONGEST)1) << (gdbarch_long_bit (par_state->gdbarch ()) - 1);
-      unsigned_type = parse_type (par_state)->builtin_unsigned_long;
-      signed_type = parse_type (par_state)->builtin_long;
-    }
+  int int_bits = gdbarch_int_bit (par_state->gdbarch ());
+  int long_bits = gdbarch_long_bit (par_state->gdbarch ());
+  int long_long_bits = gdbarch_long_long_bit (par_state->gdbarch ());
+  bool have_signed
+    /* No 'u' suffix.  */
+    = !unsigned_p;
+  bool have_unsigned
+    = ((/* 'u' suffix.  */
+	unsigned_p)
+       || (/* Not a decimal.  */
+	   base != 10)
+       || (/* Allowed as a convenience, in case decimal doesn't fit in largest
+	      signed type.  */
+	   !fits_in_type (1, n, long_long_bits, true)));
+  bool have_int
+    /* No 'l' or 'll' suffix.  */
+    = long_p == 0;
+  bool have_long
+    /* No 'll' suffix.  */
+    = long_p <= 1;
+  if (have_int && have_signed && fits_in_type (1, n, int_bits, true))
+    putithere->typed_val_int.type = parse_type (par_state)->builtin_int;
+  else if (have_int && have_unsigned && fits_in_type (1, n, int_bits, false))
+    putithere->typed_val_int.type
+      = parse_type (par_state)->builtin_unsigned_int;
+  else if (have_long && have_signed && fits_in_type (1, n, long_bits, true))
+    putithere->typed_val_int.type = parse_type (par_state)->builtin_long;
+  else if (have_long && have_unsigned && fits_in_type (1, n, long_bits, false))
+    putithere->typed_val_int.type
+      = parse_type (par_state)->builtin_unsigned_long;
+  else if (have_signed && fits_in_type (1, n, long_long_bits, true))
+    putithere->typed_val_int.type
+      = parse_type (par_state)->builtin_long_long;
+  else if (have_unsigned && fits_in_type (1, n, long_long_bits, false))
+    putithere->typed_val_int.type
+      = parse_type (par_state)->builtin_unsigned_long_long;
   else
-    {
-      int shift;
-      if (sizeof (ULONGEST) * HOST_CHAR_BIT
-	  < gdbarch_long_long_bit (par_state->gdbarch ()))
-	/* A long long does not fit in a LONGEST.  */
-	shift = (sizeof (ULONGEST) * HOST_CHAR_BIT - 1);
-      else
-	shift = (gdbarch_long_long_bit (par_state->gdbarch ()) - 1);
-      high_bit = (ULONGEST) 1 << shift;
-      unsigned_type = parse_type (par_state)->builtin_unsigned_long_long;
-      signed_type = parse_type (par_state)->builtin_long_long;
-    }
-
-   putithere->typed_val_int.val = n;
-
-   /* If the high bit of the worked out type is set then this number
-      has to be unsigned. */
-
-   if (unsigned_p || (n & high_bit))
-     {
-       putithere->typed_val_int.type = unsigned_type;
-     }
-   else
-     {
-       putithere->typed_val_int.type = signed_type;
-     }
+    error (_("Numeric constant too large."));
+  putithere->typed_val_int.val = n;
 
    if (imaginary_p)
      putithere->typed_val_int.type
diff --git a/gdb/parse.c b/gdb/parse.c
index fb308be0d7c..aa12f6fe012 100644
--- a/gdb/parse.c
+++ b/gdb/parse.c
@@ -611,6 +611,43 @@ parse_float (const char *p, int len,
 {
   return target_float_from_string (data, type, std::string (p, len));
 }
+
+/* Return true if the number N_SIGN * N fits in a type with TYPE_BITS and
+   TYPE_SIGNED_P.  N_SIGNED is either 1 or -1.  */
+
+bool
+fits_in_type (int n_sign, ULONGEST n, int type_bits, bool type_signed_p)
+{
+  /* Normalize -0.  */
+  if (n == 0 && n_sign == -1)
+    n_sign = 1;
+
+  if (n_sign == -1 && !type_signed_p)
+    /* Can't fit a negative number in an unsigned type.  */
+    return false;
+
+  if (type_bits > sizeof (ULONGEST) * 8)
+    return true;
+
+  ULONGEST smax = (ULONGEST)1 << (type_bits - 1);
+  if (n_sign == -1)
+    {
+      /* Negative number, signed type.  */
+      return (n <= smax);
+    }
+  else if (n_sign == 1 && type_signed_p)
+    {
+      /* Positive number, signed type.  */
+      return (n < smax);
+    }
+  else if (n_sign == 1 && !type_signed_p)
+    {
+      /* Positive number, unsigned type.  */
+      return ((n >> 1) >> (type_bits - 1)) == 0;
+    }
+  else
+    gdb_assert_not_reached ("");
+}
 
 /* This function avoids direct calls to fprintf 
    in the parser generated debug code.  */
diff --git a/gdb/parser-defs.h b/gdb/parser-defs.h
index 3be7d6c839f..ade3039ebb0 100644
--- a/gdb/parser-defs.h
+++ b/gdb/parser-defs.h
@@ -412,6 +412,8 @@ extern std::string copy_name (struct stoken);
 
 extern bool parse_float (const char *p, int len,
 			 const struct type *type, gdb_byte *data);
+extern bool fits_in_type (int n_sign, ULONGEST n, int type_bits,
+			  bool type_signed_p);
 
 
 /* Function used to avoid direct calls to fprintf
diff --git a/gdb/testsuite/gdb.base/parse_number.exp b/gdb/testsuite/gdb.base/parse_number.exp
index 7c259e0a8a0..638ea342384 100644
--- a/gdb/testsuite/gdb.base/parse_number.exp
+++ b/gdb/testsuite/gdb.base/parse_number.exp
@@ -68,6 +68,22 @@ proc fits_in_type { n type_bits type_signedness } {
     }
 }
 
+# Return 1 if LANG is a c-like language, in the sense that it uses the same
+# parser.
+
+proc c_like { lang } {
+    set res 0
+    switch $lang {
+	c
+	- c++
+	- asm
+	- objective-c
+	- opencl
+	- minimal {set res 1}
+    }
+    return $res
+}
+
 # Parse number N for LANG, and return a list of expected type and value.
 
 proc parse_number { lang n } {
@@ -165,26 +181,48 @@ proc parse_number { lang n } {
 			($any|$re_overflow)]
 	}
     } else {
-	# This is wrong for c-like languages.  For the decimal case, we
-	# shouldn't use unsigned.
-	# See PR 16377.
+	if { [c_like $lang] } {
+	    if { $hex_p } {
+		# C Hex.
+		set have_unsigned 1
+	    } else {
+		# C Decimal.  Unsigned not allowed according.
+		if { [fits_in_type $n $long_long_bits s] } {
+		    # Fits in largest signed type.
+		    set have_unsigned 0
+		} else {
+		    # Doesn't fit in largest signed type, so ill-formed, but
+		    # allow unsigned as a convenience, as compilers do (though
+		    # with a warning).
+		    set have_unsigned 1
+		}
+	    }
+	} else {
+	    # Non-C.
+	    set have_unsigned 1
+	}
+
 	if { [fits_in_type $n $int_bits s] } {
 	    return [list int $n]
-	} elseif { [fits_in_type $n $int_bits u] } {
+	} elseif { $have_unsigned && [fits_in_type $n $int_bits u] } {
 	    return [list "unsigned int" $n]
 	} elseif { [fits_in_type $n $long_bits s] } {
 	    return [list long $n]
-	} elseif { [fits_in_type $n $long_bits u] } {
+	} elseif { $have_unsigned && [fits_in_type $n $long_bits u] } {
 	    return [list "unsigned long" $n]
 	} elseif { [fits_in_type $n $long_long_bits s] } {
 	    return [list "long long" $n]
-	} elseif { [fits_in_type $n $long_long_bits u] } {
+	} elseif { $have_unsigned && [fits_in_type $n $long_long_bits u] } {
 	    return [list "unsigned long long" $n]
 	} else {
 	    # Overflow.
-	    # Some truncated value or re_overflow, should be re_overflow.
-	    return [list "((unsigned )?(int|long)|$re_overflow)" \
-			($any|$re_overflow)]
+	    if { [c_like $lang] } {
+		return [list $re_overflow $re_overflow]
+	    } else {
+		# Some truncated value or re_overflow, should be re_overflow.
+		return [list "((unsigned )?(int|long)|$re_overflow)" \
+			    ($any|$re_overflow)]
+	    }
 	}
     }
 
-- 
2.35.3