public inbox for gdb-patches@sourceware.org
 help / color / mirror / Atom feed
* [PATCH 2/9] Use unsigned as base type for some enums
  2018-08-27 14:57 [PATCH 0/9] Add UBSan to the build Tom Tromey
  2018-08-27 14:57 ` [PATCH 1/9] Do not pass NULL to memcpy Tom Tromey
  2018-08-27 14:57 ` [PATCH 5/9] Avoid undefined behavior in parse_number Tom Tromey
@ 2018-08-27 14:57 ` Tom Tromey
  2018-08-27 19:22   ` Simon Marchi
  2018-08-27 14:57 ` [PATCH 3/9] Avoid undefined behavior in extract_integer Tom Tromey
                   ` (6 subsequent siblings)
  9 siblings, 1 reply; 27+ messages in thread
From: Tom Tromey @ 2018-08-27 14:57 UTC (permalink / raw)
  To: gdb-patches; +Cc: Tom Tromey

-fsanitize=undefined complains about using operator~ on various enum
types that are used with DEF_ENUM_FLAGS_TYPE.  This patch fixes these
problems by explicitly setting the base type for these enums to
unsigned.

ChangeLog
2018-08-27  Tom Tromey  <tom@tromey.com>

	* symfile-add-flags.h (enum symfile_add_flag): Use unsigned as
	base type.
	* objfile-flags.h (enum objfile_flag): Use unsigned as base type.
	* gdbtypes.h (enum type_instance_flag_value): Use unsigned as base
	type.
	* c-lang.h (enum c_string_type_values): Use unsigned as base
	type.
	* btrace.h (enum btrace_thread_flag): Use unsigned as base type.
---
 gdb/ChangeLog           | 11 +++++++++++
 gdb/btrace.h            |  2 +-
 gdb/c-lang.h            |  2 +-
 gdb/gdbtypes.h          |  2 +-
 gdb/objfile-flags.h     |  2 +-
 gdb/symfile-add-flags.h |  2 +-
 6 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/gdb/btrace.h b/gdb/btrace.h
index bfb0b9f2787..0448bd6d498 100644
--- a/gdb/btrace.h
+++ b/gdb/btrace.h
@@ -228,7 +228,7 @@ struct btrace_call_history
 };
 
 /* Branch trace thread flags.  */
-enum btrace_thread_flag
+enum btrace_thread_flag : unsigned
 {
   /* The thread is to be stepped forwards.  */
   BTHR_STEP = (1 << 0),
diff --git a/gdb/c-lang.h b/gdb/c-lang.h
index b7bf08992b9..a6d5753a287 100644
--- a/gdb/c-lang.h
+++ b/gdb/c-lang.h
@@ -35,7 +35,7 @@ struct parser_state;
 /* The various kinds of C string and character.  Note that these
    values are chosen so that they may be or'd together in certain
    ways.  */
-enum c_string_type_values
+enum c_string_type_values : unsigned
   {
     /* An ordinary string: "value".  */
     C_STRING = 0,
diff --git a/gdb/gdbtypes.h b/gdb/gdbtypes.h
index 14059ab3dc5..f7ea2ac6df5 100644
--- a/gdb/gdbtypes.h
+++ b/gdb/gdbtypes.h
@@ -192,7 +192,7 @@ enum type_code
 /* * Some bits for the type's instance_flags word.  See the macros
    below for documentation on each bit.  */
 
-enum type_instance_flag_value
+enum type_instance_flag_value : unsigned
 {
   TYPE_INSTANCE_FLAG_CONST = (1 << 0),
   TYPE_INSTANCE_FLAG_VOLATILE = (1 << 1),
diff --git a/gdb/objfile-flags.h b/gdb/objfile-flags.h
index aeaa8fbc04d..6f5760d021d 100644
--- a/gdb/objfile-flags.h
+++ b/gdb/objfile-flags.h
@@ -25,7 +25,7 @@
 /* Defines for the objfile flags field.  Defined in a separate file to
    break circular header dependencies.  */
 
-enum objfile_flag
+enum objfile_flag : unsigned
   {
     /* When an object file has its functions reordered (currently
        Irix-5.2 shared libraries exhibit this behaviour), we will need
diff --git a/gdb/symfile-add-flags.h b/gdb/symfile-add-flags.h
index 3c07513e395..35241fe4a04 100644
--- a/gdb/symfile-add-flags.h
+++ b/gdb/symfile-add-flags.h
@@ -26,7 +26,7 @@
    symbol_file_add, etc.  Defined in a separate file to break circular
    header dependencies.  */
 
-enum symfile_add_flag
+enum symfile_add_flag : unsigned
   {
     /* Be chatty about what you are doing.  */
     SYMFILE_VERBOSE = 1 << 1,
-- 
2.13.6

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH 1/9] Do not pass NULL to memcpy
  2018-08-27 14:57 [PATCH 0/9] Add UBSan to the build Tom Tromey
@ 2018-08-27 14:57 ` Tom Tromey
  2018-08-27 19:12   ` Simon Marchi
  2018-08-27 14:57 ` [PATCH 5/9] Avoid undefined behavior in parse_number Tom Tromey
                   ` (8 subsequent siblings)
  9 siblings, 1 reply; 27+ messages in thread
From: Tom Tromey @ 2018-08-27 14:57 UTC (permalink / raw)
  To: gdb-patches; +Cc: Tom Tromey

-fsanitize=undefined pointed out a couple of spots that pass NULL to
memcpy, which is undefined behavior according to the C standard.

ChangeLog
2018-08-27  Tom Tromey  <tom@tromey.com>

	* namespace.c (add_using_directive): Don't pass NULL to memcpy.
	* dwarf2-frame.h (struct dwarf2_frame_state_reg_info): Don't pass
	NULL to memcpy.
---
 gdb/ChangeLog      | 6 ++++++
 gdb/dwarf2-frame.h | 3 ++-
 gdb/namespace.c    | 5 +++--
 3 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/gdb/dwarf2-frame.h b/gdb/dwarf2-frame.h
index 52316e5e168..6844010c8df 100644
--- a/gdb/dwarf2-frame.h
+++ b/gdb/dwarf2-frame.h
@@ -110,7 +110,8 @@ struct dwarf2_frame_state_reg_info
     size_t size = src.num_regs * sizeof (struct dwarf2_frame_state_reg);
 
     reg = (struct dwarf2_frame_state_reg *) xmalloc (size);
-    memcpy (reg, src.reg, size);
+    if (size > 0)
+      memcpy (reg, src.reg, size);
   }
 
   /* Assignment operator for both move-assignment and copy-assignment.  */
diff --git a/gdb/namespace.c b/gdb/namespace.c
index be998d9d491..85c0c4b14d7 100644
--- a/gdb/namespace.c
+++ b/gdb/namespace.c
@@ -111,8 +111,9 @@ add_using_directive (struct using_direct **using_directives,
   else
     newobj->declaration = declaration;
 
-  memcpy (newobj->excludes, excludes.data (),
-	  excludes.size () * sizeof (*newobj->excludes));
+  if (!excludes.empty ())
+    memcpy (newobj->excludes, excludes.data (),
+	    excludes.size () * sizeof (*newobj->excludes));
   newobj->excludes[excludes.size ()] = NULL;
 
   newobj->next = *using_directives;
-- 
2.13.6

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH 6/9] Avoid undefined behavior in read_signed_leb128
  2018-08-27 14:57 [PATCH 0/9] Add UBSan to the build Tom Tromey
                   ` (3 preceding siblings ...)
  2018-08-27 14:57 ` [PATCH 3/9] Avoid undefined behavior in extract_integer Tom Tromey
@ 2018-08-27 14:57 ` Tom Tromey
  2018-08-27 14:57 ` [PATCH 7/9] Avoid undefined behavior in ada_operator_length Tom Tromey
                   ` (4 subsequent siblings)
  9 siblings, 0 replies; 27+ messages in thread
From: Tom Tromey @ 2018-08-27 14:57 UTC (permalink / raw)
  To: gdb-patches; +Cc: Tom Tromey

-fsanitize=undefined pointed out that read_signed_leb128 had an
undefined left-shift when processing the final byte of a 64-bit leb:

    runtime error: left shift of 127 by 63 places cannot be represented in type 'long int'

and an undefined negation:

    runtime error: negation of -9223372036854775808 cannot be represented in type 'long int'; cast to an unsigned type to negate this value to itself

Both of these problems are readily avoided by havinng
read_signed_leb128 work in an unsigned type, and then casting to the
signed type at the return.

ChangeLog
2018-08-27  Tom Tromey  <tom@tromey.com>

	* dwarf2read.c (read_signed_leb128): Work in ULONGEST.
---
 gdb/ChangeLog    | 4 ++++
 gdb/dwarf2read.c | 6 +++---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/gdb/dwarf2read.c b/gdb/dwarf2read.c
index 86ef1c4040b..e61d6e04cb4 100644
--- a/gdb/dwarf2read.c
+++ b/gdb/dwarf2read.c
@@ -19600,7 +19600,7 @@ static LONGEST
 read_signed_leb128 (bfd *abfd, const gdb_byte *buf,
 		    unsigned int *bytes_read_ptr)
 {
-  LONGEST result;
+  ULONGEST result;
   int shift, num_read;
   unsigned char byte;
 
@@ -19612,7 +19612,7 @@ read_signed_leb128 (bfd *abfd, const gdb_byte *buf,
       byte = bfd_get_8 (abfd, buf);
       buf++;
       num_read++;
-      result |= ((LONGEST) (byte & 127) << shift);
+      result |= ((ULONGEST) (byte & 127) << shift);
       shift += 7;
       if ((byte & 128) == 0)
 	{
@@ -19620,7 +19620,7 @@ read_signed_leb128 (bfd *abfd, const gdb_byte *buf,
 	}
     }
   if ((shift < 8 * sizeof (result)) && (byte & 0x40))
-    result |= -(((LONGEST) 1) << shift);
+    result |= -(((ULONGEST) 1) << shift);
   *bytes_read_ptr = num_read;
   return result;
 }
-- 
2.13.6

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH 5/9] Avoid undefined behavior in parse_number
  2018-08-27 14:57 [PATCH 0/9] Add UBSan to the build Tom Tromey
  2018-08-27 14:57 ` [PATCH 1/9] Do not pass NULL to memcpy Tom Tromey
@ 2018-08-27 14:57 ` Tom Tromey
  2018-08-27 14:57 ` [PATCH 2/9] Use unsigned as base type for some enums Tom Tromey
                   ` (7 subsequent siblings)
  9 siblings, 0 replies; 27+ messages in thread
From: Tom Tromey @ 2018-08-27 14:57 UTC (permalink / raw)
  To: gdb-patches; +Cc: Tom Tromey

-fsanitize=undefined pointed out that c-exp.y relied on undefined
behavior here:

      if (c != 'l' && c != 'u')
	n *= base;

...when a large hex constant "just fit" into a LONGEST, causing the
high bit to be set.

This fixes the problem by having the function work in an unsigned
type.

ChangeLog
2018-08-27  Tom Tromey  <tom@tromey.com>

	* c-exp.y (parse_number): Work in unsigned.  Remove casts.
---
 gdb/ChangeLog |  4 ++++
 gdb/c-exp.y   | 10 ++++------
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/gdb/c-exp.y b/gdb/c-exp.y
index a9ccbdcb65e..accee5d5451 100644
--- a/gdb/c-exp.y
+++ b/gdb/c-exp.y
@@ -1760,10 +1760,8 @@ static int
 parse_number (struct parser_state *par_state,
 	      const char *buf, int len, int parsed_float, YYSTYPE *putithere)
 {
-  /* FIXME: Shouldn't these be unsigned?  We don't deal with negative values
-     here, and we do kind of silly things like cast to unsigned.  */
-  LONGEST n = 0;
-  LONGEST prevn = 0;
+  ULONGEST n = 0;
+  ULONGEST prevn = 0;
   ULONGEST un;
 
   int i = 0;
@@ -1922,7 +1920,7 @@ parse_number (struct parser_state *par_state,
 	 on 0x123456789 when LONGEST is 32 bits.  */
       if (c != 'l' && c != 'u' && n != 0)
 	{	
-	  if ((unsigned_p && (ULONGEST) prevn >= (ULONGEST) n))
+	  if (unsigned_p && prevn >= n)
 	    error (_("Numeric constant too large."));
 	}
       prevn = n;
@@ -1940,7 +1938,7 @@ parse_number (struct parser_state *par_state,
      the case where it is we just always shift the value more than
      once, with fewer bits each time.  */
 
-  un = (ULONGEST)n >> 2;
+  un = n >> 2;
   if (long_p == 0
       && (un >> (gdbarch_int_bit (parse_gdbarch (par_state)) - 2)) == 0)
     {
-- 
2.13.6

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH 7/9] Avoid undefined behavior in ada_operator_length
  2018-08-27 14:57 [PATCH 0/9] Add UBSan to the build Tom Tromey
                   ` (4 preceding siblings ...)
  2018-08-27 14:57 ` [PATCH 6/9] Avoid undefined behavior in read_signed_leb128 Tom Tromey
@ 2018-08-27 14:57 ` Tom Tromey
  2018-08-27 14:59 ` [PATCH 8/9] Avoid undefined behavior in expression dumping Tom Tromey
                   ` (3 subsequent siblings)
  9 siblings, 0 replies; 27+ messages in thread
From: Tom Tromey @ 2018-08-27 14:57 UTC (permalink / raw)
  To: gdb-patches; +Cc: Tom Tromey

-fsanitize=undefined pointed out this error:

    runtime error: load of value 2887952, which is not a valid value for type 'exp_opcode'

This happens in gdb.ada/complete.exp when processing "complete p
my_glob".  This does not parse, so the Ada parser throws an exception;
but then the code in parse_exp_in_context_1 accepts the expression
anyway.  However, as no elements have been written to the expression,
undefined behavior results.

The fix is to notice this case in parse_exp_in_context_1.  This patch
also adds an assertion to prefixify_expression to enforce this
pre-existing constraint.

ChangeLog
2018-08-27  Tom Tromey  <tom@tromey.com>

	* parse.c (prefixify_expression): Add assert.
	(parse_exp_in_context_1): Throw exception if the expression is
	empty.
---
 gdb/ChangeLog | 8 +++++++-
 gdb/parse.c   | 6 +++++-
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/gdb/parse.c b/gdb/parse.c
index 1b58073a529..a9b802d0e6e 100644
--- a/gdb/parse.c
+++ b/gdb/parse.c
@@ -792,6 +792,7 @@ copy_name (struct stoken token)
 int
 prefixify_expression (struct expression *expr)
 {
+  gdb_assert (expr->nelts > 0);
   int len = sizeof (struct expression) + EXP_ELEM_TO_BYTES (expr->nelts);
   struct expression *temp;
   int inpos = expr->nelts, outpos = 0;
@@ -1205,7 +1206,10 @@ parse_exp_in_context_1 (const char **stringptr, CORE_ADDR pc,
     }
   CATCH (except, RETURN_MASK_ALL)
     {
-      if (! parse_completion)
+      /* If parsing for completion, allow this to succeed; but if no
+	 expression elements have been written, then there's nothing
+	 to do, so fail.  */
+      if (! parse_completion || ps.expout_ptr == 0)
 	throw_exception (except);
     }
   END_CATCH
-- 
2.13.6

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH 3/9] Avoid undefined behavior in extract_integer
  2018-08-27 14:57 [PATCH 0/9] Add UBSan to the build Tom Tromey
                   ` (2 preceding siblings ...)
  2018-08-27 14:57 ` [PATCH 2/9] Use unsigned as base type for some enums Tom Tromey
@ 2018-08-27 14:57 ` Tom Tromey
  2018-08-28 18:39   ` Pedro Alves
  2018-08-27 14:57 ` [PATCH 6/9] Avoid undefined behavior in read_signed_leb128 Tom Tromey
                   ` (5 subsequent siblings)
  9 siblings, 1 reply; 27+ messages in thread
From: Tom Tromey @ 2018-08-27 14:57 UTC (permalink / raw)
  To: gdb-patches; +Cc: Tom Tromey

-fsanitize=undefined showed that extract_integer could left-shift a
negative value, which is undefined.  This patch fixes the problem by
doing all the work in an unsigned type, and then using a static_cast
at the end of the function.  This relies on implementation-defined
behavior, but I tend to think we are on safe ground there.  (Also, if
need be, violations of this could probably be detected, either by
configure or by a static_assert.)

ChangeLog
2018-08-27  Tom Tromey  <tom@tromey.com>

	* findvar.c (extract_integer): Do work in an unsigned type and
	cast at the end.
---
 gdb/ChangeLog | 5 +++++
 gdb/findvar.c | 4 ++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/gdb/findvar.c b/gdb/findvar.c
index 9256833ab60..f2b84db82a1 100644
--- a/gdb/findvar.c
+++ b/gdb/findvar.c
@@ -50,7 +50,7 @@ template<typename T, typename>
 T
 extract_integer (const gdb_byte *addr, int len, enum bfd_endian byte_order)
 {
-  T retval = 0;
+  typename std::make_unsigned<T>::type retval = 0;
   const unsigned char *p;
   const unsigned char *startaddr = addr;
   const unsigned char *endaddr = startaddr + len;
@@ -86,7 +86,7 @@ That operation is not available on integers of more than %d bytes."),
       for (; p >= startaddr; --p)
 	retval = (retval << 8) | *p;
     }
-  return retval;
+  return static_cast<T> (retval);
 }
 
 /* Explicit instantiations.  */
-- 
2.13.6

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH 0/9] Add UBSan to the build
@ 2018-08-27 14:57 Tom Tromey
  2018-08-27 14:57 ` [PATCH 1/9] Do not pass NULL to memcpy Tom Tromey
                   ` (9 more replies)
  0 siblings, 10 replies; 27+ messages in thread
From: Tom Tromey @ 2018-08-27 14:57 UTC (permalink / raw)
  To: gdb-patches

This series adds undefined behavior checking to the build and fixes
all the cases of UB that are found by the test suite.  See the final
patch for details, but basically UBsan is enabled by default only in
development mode.

None of the current cases of UB seem to cause any bugs; but of course
compilers have a history of exploiting UB for optimizations, so it's
possible that UB will result in bugs in the future.

This series could be improved in a couple of ways.

First, it does not check for UB in any of the libraries used by gdb.

Second, most of the builders do not have the ubsan runtime library
installed.  (I don't know why this isn't just a dependency of gcc; it
seems strange to ship a non-working -fsanitize=undefined by default.)
It's possible that installing this library on the builders will result
in new failures.

Finally, I think it would be good -- in fact, even more useful -- to
treat the address sanitizer in a similar way.  I have some patches
toward this goal, but I haven't submitted them yet because there is
one ASan failure that I haven't fixed.

Let me know what you think.

Tom

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH 4/9] Avoid undefined behavior in read_subrange_type
  2018-08-27 14:57 [PATCH 0/9] Add UBSan to the build Tom Tromey
                   ` (7 preceding siblings ...)
  2018-08-27 14:59 ` [PATCH 9/9] Add --enable-ubsan Tom Tromey
@ 2018-08-27 14:59 ` Tom Tromey
  2018-08-28 19:03 ` [PATCH 0/9] Add UBSan to the build Pedro Alves
  9 siblings, 0 replies; 27+ messages in thread
From: Tom Tromey @ 2018-08-27 14:59 UTC (permalink / raw)
  To: gdb-patches; +Cc: Tom Tromey

-fsanitize=undefined pointed out an undefined shift of a negative
value in read_subrange_type.  The fix is to do the work in an unsigned
type, where this is defined.

ChangeLog
2018-08-27  Tom Tromey  <tom@tromey.com>

	* dwarf2read.c (read_subrange_type): Make "negative_mask"
	unsigned.
---
 gdb/ChangeLog    | 5 +++++
 gdb/dwarf2read.c | 4 ++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/gdb/dwarf2read.c b/gdb/dwarf2read.c
index 8834d08a1c6..86ef1c4040b 100644
--- a/gdb/dwarf2read.c
+++ b/gdb/dwarf2read.c
@@ -17682,7 +17682,7 @@ read_subrange_type (struct die_info *die, struct dwarf2_cu *cu)
   int low_default_is_valid;
   int high_bound_is_count = 0;
   const char *name;
-  LONGEST negative_mask;
+  ULONGEST negative_mask;
 
   orig_base_type = die_type (die, cu);
   /* If ORIG_BASE_TYPE is a typedef, it will not be TYPE_UNSIGNED,
@@ -17815,7 +17815,7 @@ read_subrange_type (struct die_info *die, struct dwarf2_cu *cu)
      the bounds as signed, and thus sign-extend their values, when
      the base type is signed.  */
   negative_mask =
-    -((LONGEST) 1 << (TYPE_LENGTH (base_type) * TARGET_CHAR_BIT - 1));
+    -((ULONGEST) 1 << (TYPE_LENGTH (base_type) * TARGET_CHAR_BIT - 1));
   if (low.kind == PROP_CONST
       && !TYPE_UNSIGNED (base_type) && (low.data.const_val & negative_mask))
     low.data.const_val |= negative_mask;
-- 
2.13.6

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH 8/9] Avoid undefined behavior in expression dumping
  2018-08-27 14:57 [PATCH 0/9] Add UBSan to the build Tom Tromey
                   ` (5 preceding siblings ...)
  2018-08-27 14:57 ` [PATCH 7/9] Avoid undefined behavior in ada_operator_length Tom Tromey
@ 2018-08-27 14:59 ` Tom Tromey
  2018-08-28 18:49   ` Pedro Alves
  2018-08-27 14:59 ` [PATCH 9/9] Add --enable-ubsan Tom Tromey
                   ` (2 subsequent siblings)
  9 siblings, 1 reply; 27+ messages in thread
From: Tom Tromey @ 2018-08-27 14:59 UTC (permalink / raw)
  To: gdb-patches; +Cc: Tom Tromey

-fsanitize=undefined pointed out undefined behavior in
dump_raw_expression like:

    runtime error: load of value 2887952, which is not a valid value for type 'exp_opcode'

dump_raw_expression will try to print the opcode for each element of
the expression, even when it is not valid.  To allow this, but have it
avoid undefined behavior, this patch sets the underlying type of enum
exp_opcode, and arranges for op_name to handle invalid opcodes more
nicely.

ChangeLog
2018-08-27  Tom Tromey  <tom@tromey.com>

	* expression.h (enum exp_opcode): Use uint8_t as base type.
	* expprint.c (op_name): Handle invalid opcodes.
---
 gdb/ChangeLog    | 5 +++++
 gdb/expprint.c   | 6 ++++++
 gdb/expression.h | 2 +-
 3 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/gdb/expprint.c b/gdb/expprint.c
index d6ed41253ed..7c91cc73749 100644
--- a/gdb/expprint.c
+++ b/gdb/expprint.c
@@ -687,6 +687,12 @@ static int dump_subexp_body (struct expression *exp, struct ui_file *, int);
 const char *
 op_name (struct expression *exp, enum exp_opcode opcode)
 {
+  if (opcode >= OP_UNUSED_LAST)
+    {
+      char *cell = get_print_cell ();
+      xsnprintf (cell, PRINT_CELL_SIZE, "unknown opcode: %d", int (opcode));
+      return cell;
+    }
   return exp->language_defn->la_exp_desc->op_name (opcode);
 }
 
diff --git a/gdb/expression.h b/gdb/expression.h
index 9f26bb8d60b..db572efe2a3 100644
--- a/gdb/expression.h
+++ b/gdb/expression.h
@@ -39,7 +39,7 @@
    and skip that many.  Strings, like numbers, are indicated
    by the preceding opcode.  */
 
-enum exp_opcode
+enum exp_opcode : uint8_t
   {
 #define OP(name) name ,
 
-- 
2.13.6

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH 9/9] Add --enable-ubsan
  2018-08-27 14:57 [PATCH 0/9] Add UBSan to the build Tom Tromey
                   ` (6 preceding siblings ...)
  2018-08-27 14:59 ` [PATCH 8/9] Avoid undefined behavior in expression dumping Tom Tromey
@ 2018-08-27 14:59 ` Tom Tromey
  2018-08-27 14:59 ` [PATCH 4/9] Avoid undefined behavior in read_subrange_type Tom Tromey
  2018-08-28 19:03 ` [PATCH 0/9] Add UBSan to the build Pedro Alves
  9 siblings, 0 replies; 27+ messages in thread
From: Tom Tromey @ 2018-08-27 14:59 UTC (permalink / raw)
  To: gdb-patches; +Cc: Tom Tromey

This adds --enable-ubsan to gdb's configure.  By default it is enabled
in development mode, and disabled otherwise.  This passes both
-fsanitize=undefined and -fno-sanitize-recover=undefined to
compilations, so that undefined behavior violations will be sure to
cause test failures.

ChangeLog
2018-08-27  Tom Tromey  <tom@tromey.com>

	* acinclude.m4: Include sanitize.m4.
	* configure: Rebuild.
	* configure.ac: Call AM_GDB_UBSAN.
	* sanitize.m4: New file.
---
 gdb/ChangeLog    |   7 ++++
 gdb/acinclude.m4 |   3 ++
 gdb/configure    | 105 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 gdb/configure.ac |   1 +
 gdb/sanitize.m4  |  46 ++++++++++++++++++++++++
 5 files changed, 162 insertions(+)
 create mode 100644 gdb/sanitize.m4

diff --git a/gdb/acinclude.m4 b/gdb/acinclude.m4
index 3c2d01015bf..52ba3f9ed6e 100644
--- a/gdb/acinclude.m4
+++ b/gdb/acinclude.m4
@@ -15,6 +15,9 @@ sinclude(transform.m4)
 # This gets AM_GDB_WARNINGS.
 sinclude(warning.m4)
 
+# AM_GDB_UBSAN
+sinclude(sanitize.m4)
+
 dnl gdb/configure.in uses BFD_NEED_DECLARATION, so get its definition.
 sinclude(../bfd/bfd.m4)
 
diff --git a/gdb/configure b/gdb/configure
index 9cd0036848c..eb5aafd5717 100755
--- a/gdb/configure
+++ b/gdb/configure
@@ -886,6 +886,7 @@ with_system_gdbinit
 enable_werror
 enable_build_warnings
 enable_gdb_build_warnings
+enable_ubsan
 with_lzma
 with_liblzma_prefix
 with_tcl
@@ -1558,6 +1559,7 @@ Optional Features:
   --enable-gdb-build-warnings
                           enable GDB specific build-time compiler warnings if
                           gcc is used
+  --enable-ubsan          enable undefined behavior sanitizer (auto/yes/no)
   --enable-sim            link gdb with simulator
   --enable-multi-ice      build the multi-ice-gdb-server
   --enable-gdbserver      automatically build gdbserver (yes/no/auto, default
@@ -2451,6 +2453,52 @@ $as_echo "$ac_res" >&6; }
   eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
 
 } # ac_fn_c_check_decl
+
+# ac_fn_cxx_try_link LINENO
+# -------------------------
+# Try to link conftest.$ac_ext, and return whether this succeeded.
+ac_fn_cxx_try_link ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  rm -f conftest.$ac_objext conftest$ac_exeext
+  if { { ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_link") 2>conftest.err
+  ac_status=$?
+  if test -s conftest.err; then
+    grep -v '^ *+' conftest.err >conftest.er1
+    cat conftest.er1 >&5
+    mv -f conftest.er1 conftest.err
+  fi
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } && {
+	 test -z "$ac_cxx_werror_flag" ||
+	 test ! -s conftest.err
+       } && test -s conftest$ac_exeext && {
+	 test "$cross_compiling" = yes ||
+	 test -x conftest$ac_exeext
+       }; then :
+  ac_retval=0
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_retval=1
+fi
+  # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information
+  # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would
+  # interfere with the next link command; also delete a directory that is
+  # left behind by Apple's compiler.  We do this before executing the actions.
+  rm -rf conftest.dSYM conftest_ipa8_conftest.oo
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+  as_fn_set_status $ac_retval
+
+} # ac_fn_cxx_try_link
 cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
@@ -15555,6 +15603,63 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
 
 
+# Check whether --enable-ubsan was given.
+if test "${enable_ubsan+set}" = set; then :
+  enableval=$enable_ubsan;
+else
+  enable_ubsan=auto
+fi
+
+if test "x$enable_ubsan" = xauto; then
+  if $development; then
+    enable_ubsan=yes
+  fi
+fi
+ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+if test "x$enable_ubsan" = xyes; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether -fsanitize=undefined is accepted" >&5
+$as_echo_n "checking whether -fsanitize=undefined is accepted... " >&6; }
+  saved_CXXFLAGS="$CXXFLAGS"
+  CXXFLAGS="$CXXFLAGS -fsanitize=undefined -fno-sanitize-recover=undefined"
+        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_link "$LINENO"; then :
+  enable_ubsan=yes
+else
+  enable_ubsan=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+  CXXFLAGS="$saved_CXXFLAGS"
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $enable_ubsan" >&5
+$as_echo "$enable_ubsan" >&6; }
+  if test "x$enable_ubsan" = xyes; then
+    WARN_CFLAGS="$WARN_CFLAGS -fsanitize=undefined -fno-sanitize-recover=undefined"
+    CONFIG_LDFLAGS="$CONFIG_LDFLAGS -fsanitize=undefined"
+  fi
+fi
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+
 # In the Cygwin environment, we need some additional flags.
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for cygwin" >&5
 $as_echo_n "checking for cygwin... " >&6; }
diff --git a/gdb/configure.ac b/gdb/configure.ac
index 13bc5f9a8f2..a87a5ff43ac 100644
--- a/gdb/configure.ac
+++ b/gdb/configure.ac
@@ -1860,6 +1860,7 @@ GDB_AC_WITH_DIR(SYSTEM_GDBINIT, system-gdbinit,
     [])
 
 AM_GDB_WARNINGS
+AM_GDB_UBSAN
 
 # In the Cygwin environment, we need some additional flags.
 AC_CACHE_CHECK([for cygwin], gdb_cv_os_cygwin,
diff --git a/gdb/sanitize.m4 b/gdb/sanitize.m4
new file mode 100644
index 00000000000..3e2207b466c
--- /dev/null
+++ b/gdb/sanitize.m4
@@ -0,0 +1,46 @@
+dnl Autoconf configure script for GDB, the GNU debugger.
+dnl Copyright (C) 2018 Free Software Foundation, Inc.
+dnl
+dnl This file is part of GDB.
+dnl
+dnl This program is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU General Public License as published by
+dnl the Free Software Foundation; either version 3 of the License, or
+dnl (at your option) any later version.
+dnl
+dnl This program is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+dnl GNU General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU General Public License
+dnl along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+AC_DEFUN([AM_GDB_UBSAN],[
+AC_ARG_ENABLE(ubsan,
+  AS_HELP_STRING([--enable-ubsan],
+                 [enable undefined behavior sanitizer (auto/yes/no)]),
+  [],enable_ubsan=auto)
+if test "x$enable_ubsan" = xauto; then
+  if $development; then
+    enable_ubsan=yes
+  fi
+fi
+AC_LANG_PUSH([C++])
+if test "x$enable_ubsan" = xyes; then
+  AC_MSG_CHECKING(whether -fsanitize=undefined is accepted)
+  saved_CXXFLAGS="$CXXFLAGS"
+  CXXFLAGS="$CXXFLAGS -fsanitize=undefined -fno-sanitize-recover=undefined"
+  dnl A link check is required because it is possible to install gcc
+  dnl without libubsan, leading to link failures when compiling with
+  dnl -fsanitize=undefined.
+  AC_TRY_LINK([],[],enable_ubsan=yes,enable_ubsan=no)
+  CXXFLAGS="$saved_CXXFLAGS"
+  AC_MSG_RESULT($enable_ubsan)
+  if test "x$enable_ubsan" = xyes; then
+    WARN_CFLAGS="$WARN_CFLAGS -fsanitize=undefined -fno-sanitize-recover=undefined"
+    CONFIG_LDFLAGS="$CONFIG_LDFLAGS -fsanitize=undefined"
+  fi
+fi
+AC_LANG_POP([C++])
+])
-- 
2.13.6

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 1/9] Do not pass NULL to memcpy
  2018-08-27 14:57 ` [PATCH 1/9] Do not pass NULL to memcpy Tom Tromey
@ 2018-08-27 19:12   ` Simon Marchi
  2018-08-28 22:40     ` Tom Tromey
  0 siblings, 1 reply; 27+ messages in thread
From: Simon Marchi @ 2018-08-27 19:12 UTC (permalink / raw)
  To: Tom Tromey, gdb-patches

On 2018-08-27 10:56 AM, Tom Tromey wrote:
> -fsanitize=undefined pointed out a couple of spots that pass NULL to
> memcpy, which is undefined behavior according to the C standard.
> 
> ChangeLog
> 2018-08-27  Tom Tromey  <tom@tromey.com>
> 
> 	* namespace.c (add_using_directive): Don't pass NULL to memcpy.
> 	* dwarf2-frame.h (struct dwarf2_frame_state_reg_info): Don't pass
> 	NULL to memcpy.
> ---
>  gdb/ChangeLog      | 6 ++++++
>  gdb/dwarf2-frame.h | 3 ++-
>  gdb/namespace.c    | 5 +++--
>  3 files changed, 11 insertions(+), 3 deletions(-)
> 
> diff --git a/gdb/dwarf2-frame.h b/gdb/dwarf2-frame.h
> index 52316e5e168..6844010c8df 100644
> --- a/gdb/dwarf2-frame.h
> +++ b/gdb/dwarf2-frame.h
> @@ -110,7 +110,8 @@ struct dwarf2_frame_state_reg_info
>      size_t size = src.num_regs * sizeof (struct dwarf2_frame_state_reg);
>  
>      reg = (struct dwarf2_frame_state_reg *) xmalloc (size);
> -    memcpy (reg, src.reg, size);
> +    if (size > 0)
> +      memcpy (reg, src.reg, size);
>    }

While the patch does not look wrong, I think the problem would "solve itself"
"reg" was an std::vector, since an std::vector already has an appropriate copy
constructor.  It would also simplify a lot this area of the code, for example,
the alloc_regs method would become a one-liner.

>  
>    /* Assignment operator for both move-assignment and copy-assignment.  */
> diff --git a/gdb/namespace.c b/gdb/namespace.c
> index be998d9d491..85c0c4b14d7 100644
> --- a/gdb/namespace.c
> +++ b/gdb/namespace.c
> @@ -111,8 +111,9 @@ add_using_directive (struct using_direct **using_directives,
>    else
>      newobj->declaration = declaration;
>  
> -  memcpy (newobj->excludes, excludes.data (),
> -	  excludes.size () * sizeof (*newobj->excludes));
> +  if (!excludes.empty ())
> +    memcpy (newobj->excludes, excludes.data (),
> +	    excludes.size () * sizeof (*newobj->excludes));
>    newobj->excludes[excludes.size ()] = NULL;
>  
>    newobj->next = *using_directives;
> 

Here too it would be nice to have make using_direct::excludes an std::vector,
but it is not as simple, so I think adding that "if" is reasonnable.

Simon

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/9] Use unsigned as base type for some enums
  2018-08-27 14:57 ` [PATCH 2/9] Use unsigned as base type for some enums Tom Tromey
@ 2018-08-27 19:22   ` Simon Marchi
  2018-08-27 20:22     ` Tom Tromey
  0 siblings, 1 reply; 27+ messages in thread
From: Simon Marchi @ 2018-08-27 19:22 UTC (permalink / raw)
  To: Tom Tromey, gdb-patches

On 2018-08-27 10:56 AM, Tom Tromey wrote:
> -fsanitize=undefined complains about using operator~ on various enum
> types that are used with DEF_ENUM_FLAGS_TYPE.  This patch fixes these
> problems by explicitly setting the base type for these enums to
> unsigned.

Can you give an example of how the error manifests itself (I'm not really
familiar with -fsanitize=undefined).  Is the error reported at compile-time
or run-time?  I'm not able to make a synthetic standalone example to reproduce
the error.

In any case, that LGTM if that makes the compiler happy.  If the error reported
by -fsanitize=undefined is at run-time, could we add a static assert in there
to make sure the underlying types of types used with DEF_ENUM_FLAGS_TYPE are
unsigned, to get a compilation error?

Simon

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/9] Use unsigned as base type for some enums
  2018-08-27 19:22   ` Simon Marchi
@ 2018-08-27 20:22     ` Tom Tromey
  2018-08-27 21:26       ` Simon Marchi
  0 siblings, 1 reply; 27+ messages in thread
From: Tom Tromey @ 2018-08-27 20:22 UTC (permalink / raw)
  To: Simon Marchi; +Cc: Tom Tromey, gdb-patches

>>>>> "Simon" == Simon Marchi <simon.marchi@ericsson.com> writes:

Simon> Can you give an example of how the error manifests itself (I'm not really
Simon> familiar with -fsanitize=undefined).  Is the error reported at compile-time
Simon> or run-time?  I'm not able to make a synthetic standalone example to reproduce
Simon> the error.

You will get an error at runtime, and with the flags added by the last
patch in the series, a crash.

The error looks somewhat like the error from the expression dumping
patch:

  runtime error: load of value 2887952, which is not a valid value for type 'exp_opcode'

(I don't have an exact error handy, this was just taken from the other
patch.)

Simon> In any case, that LGTM if that makes the compiler happy.  If the error reported
Simon> by -fsanitize=undefined is at run-time, could we add a static assert in there
Simon> to make sure the underlying types of types used with DEF_ENUM_FLAGS_TYPE are
Simon> unsigned, to get a compilation error?

With the final patch, any UB will cause gdb to crash (in development
mode), presumably leading to a test suite failure.  I think it isn't
necessary to require unsigned as the underlying type -- any type will
do.  However I don't know how to assert that.

Tom

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/9] Use unsigned as base type for some enums
  2018-08-27 20:22     ` Tom Tromey
@ 2018-08-27 21:26       ` Simon Marchi
  2018-08-28 18:54         ` Pedro Alves
  2018-08-28 22:42         ` Tom Tromey
  0 siblings, 2 replies; 27+ messages in thread
From: Simon Marchi @ 2018-08-27 21:26 UTC (permalink / raw)
  To: Tom Tromey; +Cc: gdb-patches

On 2018-08-27 04:21 PM, Tom Tromey wrote:
>>>>>> "Simon" == Simon Marchi <simon.marchi@ericsson.com> writes:
> 
> Simon> Can you give an example of how the error manifests itself (I'm not really
> Simon> familiar with -fsanitize=undefined).  Is the error reported at compile-time
> Simon> or run-time?  I'm not able to make a synthetic standalone example to reproduce
> Simon> the error.
> 
> You will get an error at runtime, and with the flags added by the last
> patch in the series, a crash.
> 
> The error looks somewhat like the error from the expression dumping
> patch:
> 
>   runtime error: load of value 2887952, which is not a valid value for type 'exp_opcode'
> 
> (I don't have an exact error handy, this was just taken from the other
> patch.)
> 
> Simon> In any case, that LGTM if that makes the compiler happy.  If the error reported
> Simon> by -fsanitize=undefined is at run-time, could we add a static assert in there
> Simon> to make sure the underlying types of types used with DEF_ENUM_FLAGS_TYPE are
> Simon> unsigned, to get a compilation error?
> 
> With the final patch, any UB will cause gdb to crash (in development
> mode), presumably leading to a test suite failure.  I think it isn't
> necessary to require unsigned as the underlying type -- any type will
> do.  However I don't know how to assert that.

Indeed, it's only necessary if the ~ operator is used.  OTOH, it doesn't really
make sense to use a signed type for flags, so we wouldn't lose anything by enforcing
unsigned types.  If I understand correctly, the errors come from code like this, when
making a bit mask to clear some bits:

  btinfo->flags &= ~(BTHR_MOVE | BTHR_STOP);

Doing a static assert like this:

diff --git a/gdb/common/enum-flags.h b/gdb/common/enum-flags.h
index 82568a5..c82970c 100644
--- a/gdb/common/enum-flags.h
+++ b/gdb/common/enum-flags.h
@@ -92,6 +92,7 @@ class enum_flags
 public:
   typedef E enum_type;
   typedef typename enum_underlying_type<enum_type>::type underlying_type;
+  gdb_static_assert (std::is_unsigned<underlying_type>::value);

 private:
   /* Private type used to support initializing flag types with zero:


would enforce it at compile time, which is preferable than finding it at
runtime.

Simon

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 3/9] Avoid undefined behavior in extract_integer
  2018-08-27 14:57 ` [PATCH 3/9] Avoid undefined behavior in extract_integer Tom Tromey
@ 2018-08-28 18:39   ` Pedro Alves
  2018-08-29  0:11     ` Tom Tromey
  0 siblings, 1 reply; 27+ messages in thread
From: Pedro Alves @ 2018-08-28 18:39 UTC (permalink / raw)
  To: Tom Tromey, gdb-patches

On 08/27/2018 03:56 PM, Tom Tromey wrote:
> -fsanitize=undefined showed that extract_integer could left-shift a
> negative value, which is undefined.  This patch fixes the problem by
> doing all the work in an unsigned type, and then using a static_cast
> at the end of the function.  This relies on implementation-defined
> behavior, but I tend to think we are on safe ground there.  (Also, if
> need be, violations of this could probably be detected, either by
> configure or by a static_assert.)
> 
> ChangeLog
> 2018-08-27  Tom Tromey  <tom@tromey.com>
> 
> 	* findvar.c (extract_integer): Do work in an unsigned type and
> 	cast at the end.

LGTM.

I suspect we assume two's complement in a good number of
places, and I don't think it's worth it to bother with anything
else.  There's even been discussion in the C++ committee about
baking the the assumption into the language.

Is the cast really necessary, though?  What error do you get?

Thanks,
Pedro Alves

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 8/9] Avoid undefined behavior in expression dumping
  2018-08-27 14:59 ` [PATCH 8/9] Avoid undefined behavior in expression dumping Tom Tromey
@ 2018-08-28 18:49   ` Pedro Alves
  2018-08-29  0:27     ` Tom Tromey
  0 siblings, 1 reply; 27+ messages in thread
From: Pedro Alves @ 2018-08-28 18:49 UTC (permalink / raw)
  To: Tom Tromey, gdb-patches

On 08/27/2018 03:56 PM, Tom Tromey wrote:
> -fsanitize=undefined pointed out undefined behavior in
> dump_raw_expression like:
> 
>     runtime error: load of value 2887952, which is not a valid value for type 'exp_opcode'
> 
> dump_raw_expression will try to print the opcode for each element of
> the expression, even when it is not valid.  To allow this, but have it
> avoid undefined behavior, this patch sets the underlying type of enum
> exp_opcode, and arranges for op_name to handle invalid opcodes more
> nicely.

Could you include before/after example output?

>  const char *
>  op_name (struct expression *exp, enum exp_opcode opcode)
>  {
> +  if (opcode >= OP_UNUSED_LAST)
> +    {
> +      char *cell = get_print_cell ();
> +      xsnprintf (cell, PRINT_CELL_SIZE, "unknown opcode: %d", int (opcode));

If the underlying type is unsigned, what is the rationale for
printing it as signed?

> +      return cell;
> +    }
>    return exp->language_defn->la_exp_desc->op_name (opcode);
>  }
>  
> diff --git a/gdb/expression.h b/gdb/expression.h
> index 9f26bb8d60b..db572efe2a3 100644
> --- a/gdb/expression.h
> +++ b/gdb/expression.h
> @@ -39,7 +39,7 @@
>     and skip that many.  Strings, like numbers, are indicated
>     by the preceding opcode.  */
>  
> -enum exp_opcode
> +enum exp_opcode : uint8_t
>    {
>  #define OP(name) name ,
>  
> 

Thanks,
Pedro Alves

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/9] Use unsigned as base type for some enums
  2018-08-27 21:26       ` Simon Marchi
@ 2018-08-28 18:54         ` Pedro Alves
  2018-08-28 22:42         ` Tom Tromey
  1 sibling, 0 replies; 27+ messages in thread
From: Pedro Alves @ 2018-08-28 18:54 UTC (permalink / raw)
  To: Simon Marchi, Tom Tromey; +Cc: gdb-patches

On 08/27/2018 10:26 PM, Simon Marchi wrote:
> On 2018-08-27 04:21 PM, Tom Tromey wrote:
>>>>>>> "Simon" == Simon Marchi <simon.marchi@ericsson.com> writes:
>>
>> Simon> Can you give an example of how the error manifests itself (I'm not really
>> Simon> familiar with -fsanitize=undefined).  Is the error reported at compile-time
>> Simon> or run-time?  I'm not able to make a synthetic standalone example to reproduce
>> Simon> the error.
>>
>> You will get an error at runtime, and with the flags added by the last
>> patch in the series, a crash.
>>
>> The error looks somewhat like the error from the expression dumping
>> patch:
>>
>>   runtime error: load of value 2887952, which is not a valid value for type 'exp_opcode'
>>
>> (I don't have an exact error handy, this was just taken from the other
>> patch.)
>>
>> Simon> In any case, that LGTM if that makes the compiler happy.  If the error reported
>> Simon> by -fsanitize=undefined is at run-time, could we add a static assert in there
>> Simon> to make sure the underlying types of types used with DEF_ENUM_FLAGS_TYPE are
>> Simon> unsigned, to get a compilation error?
>>
>> With the final patch, any UB will cause gdb to crash (in development
>> mode), presumably leading to a test suite failure.  I think it isn't
>> necessary to require unsigned as the underlying type -- any type will
>> do.  However I don't know how to assert that.
> 
> Indeed, it's only necessary if the ~ operator is used.  OTOH, it doesn't really
> make sense to use a signed type for flags, so we wouldn't lose anything by enforcing
> unsigned types.  If I understand correctly, the errors come from code like this, when
> making a bit mask to clear some bits:
> 
>   btinfo->flags &= ~(BTHR_MOVE | BTHR_STOP);
> 
> Doing a static assert like this:
> 
> diff --git a/gdb/common/enum-flags.h b/gdb/common/enum-flags.h
> index 82568a5..c82970c 100644
> --- a/gdb/common/enum-flags.h
> +++ b/gdb/common/enum-flags.h
> @@ -92,6 +92,7 @@ class enum_flags
>  public:
>    typedef E enum_type;
>    typedef typename enum_underlying_type<enum_type>::type underlying_type;
> +  gdb_static_assert (std::is_unsigned<underlying_type>::value);
> 

Yeah.  

I've been meaning to find time to repost by enum-flags v2 fixes,
but, it's never happened...  One of the things that I wanted to
look at is exactly this issue, seeing about coming up with some
solution that is compatible with C++98/C++03, given that the GCC
folks expressed interest in sharing the enum-flags.h header.
Off hand, all I can think of is to introduce a "clear(unsigned mask)"
method that would be used instead of &= and ~...  But maybe really
best is to ignore that (C++98), expecting that GCC will come around
to requiring C++11 in some reasonable timeframe...

>  private:
>    /* Private type used to support initializing flag types with zero:
> 
> 
> would enforce it at compile time, which is preferable than finding it at
> runtime.
Yes, I think that we should add that.

Thanks,
Pedro Alves

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 0/9] Add UBSan to the build
  2018-08-27 14:57 [PATCH 0/9] Add UBSan to the build Tom Tromey
                   ` (8 preceding siblings ...)
  2018-08-27 14:59 ` [PATCH 4/9] Avoid undefined behavior in read_subrange_type Tom Tromey
@ 2018-08-28 19:03 ` Pedro Alves
  2018-08-29  0:45   ` Tom Tromey
  9 siblings, 1 reply; 27+ messages in thread
From: Pedro Alves @ 2018-08-28 19:03 UTC (permalink / raw)
  To: Tom Tromey, gdb-patches

On 08/27/2018 03:56 PM, Tom Tromey wrote:
> This series adds undefined behavior checking to the build and fixes
> all the cases of UB that are found by the test suite.  See the final
> patch for details, but basically UBsan is enabled by default only in
> development mode.
> 
> None of the current cases of UB seem to cause any bugs; but of course
> compilers have a history of exploiting UB for optimizations, so it's
> possible that UB will result in bugs in the future.
> 
> This series could be improved in a couple of ways.
> 
> First, it does not check for UB in any of the libraries used by gdb.
> 
> Second, most of the builders do not have the ubsan runtime library
> installed.  (I don't know why this isn't just a dependency of gcc; it
> seems strange to ship a non-working -fsanitize=undefined by default.)
> It's possible that installing this library on the builders will result
> in new failures.
> 
> Finally, I think it would be good -- in fact, even more useful -- to
> treat the address sanitizer in a similar way.  I have some patches
> toward this goal, but I haven't submitted them yet because there is
> one ASan failure that I haven't fixed.
> 
> Let me know what you think.

I think this is a good idea.  On enabling this by default on devel
builds, do you have a sense of CPU/memory overhead this introduces?

Thanks,
Pedro Alves

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 1/9] Do not pass NULL to memcpy
  2018-08-27 19:12   ` Simon Marchi
@ 2018-08-28 22:40     ` Tom Tromey
  0 siblings, 0 replies; 27+ messages in thread
From: Tom Tromey @ 2018-08-28 22:40 UTC (permalink / raw)
  To: Simon Marchi; +Cc: Tom Tromey, gdb-patches

>>>>> "Simon" == Simon Marchi <simon.marchi@ericsson.com> writes:

Simon> While the patch does not look wrong, I think the problem would "solve itself"
Simon> "reg" was an std::vector, since an std::vector already has an appropriate copy
Simon> constructor.  It would also simplify a lot this area of the code, for example,
Simon> the alloc_regs method would become a one-liner.

I did this and I'll include it as a separate patch in the next round.

Tom

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/9] Use unsigned as base type for some enums
  2018-08-27 21:26       ` Simon Marchi
  2018-08-28 18:54         ` Pedro Alves
@ 2018-08-28 22:42         ` Tom Tromey
  2018-08-29  0:01           ` Tom Tromey
  1 sibling, 1 reply; 27+ messages in thread
From: Tom Tromey @ 2018-08-28 22:42 UTC (permalink / raw)
  To: Simon Marchi; +Cc: Tom Tromey, gdb-patches

>>>>> "Simon" == Simon Marchi <simon.marchi@ericsson.com> writes:

Simon> Doing a static assert like this:

Simon> +  gdb_static_assert (std::is_unsigned<underlying_type>::value);

Simon> would enforce it at compile time, which is preferable than finding it at
Simon> runtime.

I made this change.

Tom

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/9] Use unsigned as base type for some enums
  2018-08-28 22:42         ` Tom Tromey
@ 2018-08-29  0:01           ` Tom Tromey
  0 siblings, 0 replies; 27+ messages in thread
From: Tom Tromey @ 2018-08-29  0:01 UTC (permalink / raw)
  To: Tom Tromey; +Cc: Simon Marchi, gdb-patches

>>>>> "Tom" == Tom Tromey <tom@tromey.com> writes:

>>>>> "Simon" == Simon Marchi <simon.marchi@ericsson.com> writes:
Simon> Doing a static assert like this:

Simon> +  gdb_static_assert (std::is_unsigned<underlying_type>::value);

Simon> would enforce it at compile time, which is preferable than finding it at
Simon> runtime.

Tom> I made this change.

This change turns out to be a bit too eager.  It requires changing all
the enums that are used by DEF_ENUM_FLAGS_TYPE; including
gcc_qualifiers, which is maintained in gcc (and theoretically at least
is C compatible) -- but operator~ is never actually used with these
other enums, so there isn't any UB to avoid.

However, moving the static assert to operator~ seems to work.

Tom

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 3/9] Avoid undefined behavior in extract_integer
  2018-08-28 18:39   ` Pedro Alves
@ 2018-08-29  0:11     ` Tom Tromey
  0 siblings, 0 replies; 27+ messages in thread
From: Tom Tromey @ 2018-08-29  0:11 UTC (permalink / raw)
  To: Pedro Alves; +Cc: Tom Tromey, gdb-patches

>>>>> "Pedro" == Pedro Alves <palves@redhat.com> writes:

Pedro> Is the cast really necessary, though?  What error do you get?

It's not necessary, so I've removed it.

Tom

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 8/9] Avoid undefined behavior in expression dumping
  2018-08-28 18:49   ` Pedro Alves
@ 2018-08-29  0:27     ` Tom Tromey
  0 siblings, 0 replies; 27+ messages in thread
From: Tom Tromey @ 2018-08-29  0:27 UTC (permalink / raw)
  To: Pedro Alves; +Cc: Tom Tromey, gdb-patches

>>>>> "Pedro" == Pedro Alves <palves@redhat.com> writes:

Pedro> Could you include before/after example output?

It'll be in the next revision.

>> const char *
>> op_name (struct expression *exp, enum exp_opcode opcode)
>> {
>> +  if (opcode >= OP_UNUSED_LAST)
>> +    {
>> +      char *cell = get_print_cell ();
>> +      xsnprintf (cell, PRINT_CELL_SIZE, "unknown opcode: %d", int (opcode));

Pedro> If the underlying type is unsigned, what is the rationale for
Pedro> printing it as signed?

Thinko.

Tom

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 0/9] Add UBSan to the build
  2018-08-28 19:03 ` [PATCH 0/9] Add UBSan to the build Pedro Alves
@ 2018-08-29  0:45   ` Tom Tromey
  2018-08-30 18:41     ` Pedro Alves
  0 siblings, 1 reply; 27+ messages in thread
From: Tom Tromey @ 2018-08-29  0:45 UTC (permalink / raw)
  To: Pedro Alves; +Cc: Tom Tromey, gdb-patches

>>>>> "Pedro" == Pedro Alves <palves@redhat.com> writes:

Pedro> I think this is a good idea.  On enabling this by default on devel
Pedro> builds, do you have a sense of CPU/memory overhead this introduces?

I have no idea about the CPU.  It doesn't seem notably slower to me but
I am not sure I would really notice.

I don't think -fsanitize=undefined has notable memory overhead.  I
believe it just introduces checks at spots where undefined behavior
might be possible.

It does increase the size of gdb:

size before:

   text	   data	    bss	    dec	    hex	filename
23070184	2372034	 323664	25765882	18927fa	gdb

size after:

   text	   data	    bss	    dec	    hex	filename
26879230	9135154	 324688	36339072	22a7d80	gdb


I think this isn't so important because it's only for development mode
and because it can be turned off if need be.

Though, let's not oversell it -- UB sanitization didn't catch anything
very important.  Address sanitizer, on the other hand, did; and if I can
fix that last bug I would like to give -fsanitize=address the same
treatment as was done here.

Tom

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 0/9] Add UBSan to the build
  2018-08-29  0:45   ` Tom Tromey
@ 2018-08-30 18:41     ` Pedro Alves
  2018-08-30 18:56       ` Tom Tromey
  0 siblings, 1 reply; 27+ messages in thread
From: Pedro Alves @ 2018-08-30 18:41 UTC (permalink / raw)
  To: Tom Tromey; +Cc: gdb-patches

On 08/29/2018 01:44 AM, Tom Tromey wrote:
>>>>>> "Pedro" == Pedro Alves <palves@redhat.com> writes:
> 
> Pedro> I think this is a good idea.  On enabling this by default on devel
> Pedro> builds, do you have a sense of CPU/memory overhead this introduces?
> 
> I have no idea about the CPU.  It doesn't seem notably slower to me but
> I am not sure I would really notice.

Here:

  https://developer.apple.com/documentation/code_diagnostics/undefined_behavior_sanitizer

it says:

 "
 Performance Impact

 The performance impact of the Undefined Behavior Sanitizer is minimal, with
 with an average 20% CPU overhead in the Debug configuration.
 "

20% seems quite a bit.   Might be noticeable in "make check" time.
If you're doing performance analysis, say, running "make check-perf", or running
some use case under "perf", sounds like you'll need to be sure to
disable UBSan.  I also mildly worry about random people comparing the
performance of master GDB or some ftp snapshot against previous GDB versions
or against other debuggers and being mislead.  If that slowdown is true, I think we
should at least document it somewhere more prominently, as I think that so
far release vs development mode didn't have much of an impact?

Kind of like how GCC documents --enable-checking, at
<https://gcc.gnu.org/install/configure.html>:

"When you specify this option, the compiler is built to perform internal consistency
checks of the requested complexity. This does not change the generated code, but adds
error checking within the compiler. This will slow down the compiler and may only work
properly if you are building the compiler with GCC. This is ‘yes,extra’ by default when
building from SVN or snapshots, but ‘release’ for releases."

Thanks,
Pedro Alves

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 0/9] Add UBSan to the build
  2018-08-30 18:41     ` Pedro Alves
@ 2018-08-30 18:56       ` Tom Tromey
  2018-08-30 19:06         ` Pedro Alves
  0 siblings, 1 reply; 27+ messages in thread
From: Tom Tromey @ 2018-08-30 18:56 UTC (permalink / raw)
  To: Pedro Alves; +Cc: Tom Tromey, gdb-patches

>>>>> "Pedro" == Pedro Alves <palves@redhat.com> writes:

Pedro> Might be noticeable in "make check" time.
Pedro> If you're doing performance analysis, say, running "make check-perf", or running
Pedro> some use case under "perf", sounds like you'll need to be sure to
Pedro> disable UBSan.  I also mildly worry about random people comparing the
Pedro> performance of master GDB or some ftp snapshot against previous GDB versions
Pedro> or against other debuggers and being mislead.  If that slowdown is true, I think we
Pedro> should at least document it somewhere more prominently, as I think that so
Pedro> far release vs development mode didn't have much of an impact?

Anyone doing performance analysis really must compile with -O2 and
disable all the checking.  It really does matter, IIRC the sleb/uleb
decoders really suffer without optimization.

Pedro> Kind of like how GCC documents --enable-checking, at
Pedro> <https://gcc.gnu.org/install/configure.html>:

Pedro> "When you specify this option, the compiler is built to perform internal consistency
Pedro> checks of the requested complexity. This does not change the generated code, but adds
Pedro> error checking within the compiler. This will slow down the compiler and may only work
Pedro> properly if you are building the compiler with GCC. This is ‘yes,extra’ by default when
Pedro> building from SVN or snapshots, but ‘release’ for releases."

Do we have a similar doc somewhere?  I could update it.
Also I think this series should update NEWS for the new configure option.

Tom

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 0/9] Add UBSan to the build
  2018-08-30 18:56       ` Tom Tromey
@ 2018-08-30 19:06         ` Pedro Alves
  0 siblings, 0 replies; 27+ messages in thread
From: Pedro Alves @ 2018-08-30 19:06 UTC (permalink / raw)
  To: Tom Tromey; +Cc: gdb-patches

On 08/30/2018 07:55 PM, Tom Tromey wrote:
>>>>>> "Pedro" == Pedro Alves <palves@redhat.com> writes:
> 
> Pedro> Might be noticeable in "make check" time.
> Pedro> If you're doing performance analysis, say, running "make check-perf", or running
> Pedro> some use case under "perf", sounds like you'll need to be sure to
> Pedro> disable UBSan.  I also mildly worry about random people comparing the
> Pedro> performance of master GDB or some ftp snapshot against previous GDB versions
> Pedro> or against other debuggers and being mislead.  If that slowdown is true, I think we
> Pedro> should at least document it somewhere more prominently, as I think that so
> Pedro> far release vs development mode didn't have much of an impact?
> 
> Anyone doing performance analysis really must compile with -O2 and
> disable all the checking.  It really does matter, IIRC the sleb/uleb
> decoders really suffer without optimization.

I think optimization/-O2 is much more obvious.  Enabling/disabling
the checking so far hasn't been an issue so far, I don't think.

> 
> Pedro> Kind of like how GCC documents --enable-checking, at
> Pedro> <https://gcc.gnu.org/install/configure.html>:
> 
> Pedro> "When you specify this option, the compiler is built to perform internal consistency
> Pedro> checks of the requested complexity. This does not change the generated code, but adds
> Pedro> error checking within the compiler. This will slow down the compiler and may only work
> Pedro> properly if you are building the compiler with GCC. This is ‘yes,extra’ by default when
> Pedro> building from SVN or snapshots, but ‘release’ for releases."
> 
> Do we have a similar doc somewhere?  I could update it.

There's a "configure options" node in the manual.

And also a "configure' options" section in gdb/README too.

> Also I think this series should update NEWS for the new configure option.

/me nods.

Thanks,
Pedro Alves

^ permalink raw reply	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2018-08-30 19:06 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-08-27 14:57 [PATCH 0/9] Add UBSan to the build Tom Tromey
2018-08-27 14:57 ` [PATCH 1/9] Do not pass NULL to memcpy Tom Tromey
2018-08-27 19:12   ` Simon Marchi
2018-08-28 22:40     ` Tom Tromey
2018-08-27 14:57 ` [PATCH 5/9] Avoid undefined behavior in parse_number Tom Tromey
2018-08-27 14:57 ` [PATCH 2/9] Use unsigned as base type for some enums Tom Tromey
2018-08-27 19:22   ` Simon Marchi
2018-08-27 20:22     ` Tom Tromey
2018-08-27 21:26       ` Simon Marchi
2018-08-28 18:54         ` Pedro Alves
2018-08-28 22:42         ` Tom Tromey
2018-08-29  0:01           ` Tom Tromey
2018-08-27 14:57 ` [PATCH 3/9] Avoid undefined behavior in extract_integer Tom Tromey
2018-08-28 18:39   ` Pedro Alves
2018-08-29  0:11     ` Tom Tromey
2018-08-27 14:57 ` [PATCH 6/9] Avoid undefined behavior in read_signed_leb128 Tom Tromey
2018-08-27 14:57 ` [PATCH 7/9] Avoid undefined behavior in ada_operator_length Tom Tromey
2018-08-27 14:59 ` [PATCH 8/9] Avoid undefined behavior in expression dumping Tom Tromey
2018-08-28 18:49   ` Pedro Alves
2018-08-29  0:27     ` Tom Tromey
2018-08-27 14:59 ` [PATCH 9/9] Add --enable-ubsan Tom Tromey
2018-08-27 14:59 ` [PATCH 4/9] Avoid undefined behavior in read_subrange_type Tom Tromey
2018-08-28 19:03 ` [PATCH 0/9] Add UBSan to the build Pedro Alves
2018-08-29  0:45   ` Tom Tromey
2018-08-30 18:41     ` Pedro Alves
2018-08-30 18:56       ` Tom Tromey
2018-08-30 19:06         ` Pedro Alves

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).