public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] set range for strlen(array) to avoid spurious -Wstringop-overflow (PR 83373 , PR 78450)
@ 2017-12-12 20:15 Martin Sebor
  2017-12-13  0:35 ` Jeff Law
  2017-12-13  7:25 ` Bernhard Reutner-Fischer
  0 siblings, 2 replies; 18+ messages in thread
From: Martin Sebor @ 2017-12-12 20:15 UTC (permalink / raw)
  To: Gcc Patch List

[-- Attachment #1: Type: text/plain, Size: 819 bytes --]

Bug 83373 - False positive reported by -Wstringop-overflow, is
another example of warning triggered by a missed optimization
opportunity, this time in the strlen pass.  The optimization
is discussed in pr78450 - strlen(s) return value can be assumed
to be less than the size of s.  The gist of it is that the result
of strlen(array) can be assumed to be less than the size of
the array (except in the corner case of last struct members).

To avoid the false positive the attached patch adds this
optimization to the strlen pass.  Although the patch passes
bootstrap and regression tests for all front-ends I'm not sure
the way it determines the upper bound of the range is 100%
correct for languages with arrays with a non-zero lower bound.
Maybe it's just not as tight as it could be.

Tested on x86_64-linux.

Martin

[-- Attachment #2: gcc-83373.diff --]
[-- Type: text/x-patch, Size: 8863 bytes --]

PR middle-end/83373 - False positive reported by -Wstringop-overflow
PR tree-optimization/78450 - strlen(s) return value can be assumed to be less than the size of s

gcc/ChangeLog:

	PR middle-end/83373
	PR tree-optimization/78450
	* tree-ssa-strlen.c (maybe_set_strlen_range): New function.
	(handle_builtin_strlen): Call it.


gcc/testsuite/ChangeLog:

	PR middle-end/83373
	PR tree-optimization/78450
	* gcc.dg/pr83373.c: New test.
	* gcc.dg/strlenopt-36.c: New test.
	* gcc.dg/strlenopt-37.c: New test.

diff --git a/gcc/testsuite/gcc.dg/pr83373.c b/gcc/testsuite/gcc.dg/pr83373.c
new file mode 100644
index 0000000..6b0de09
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr83373.c
@@ -0,0 +1,33 @@
+/* PR middle-end/83373 - False positive reported by -Wstringop-overflow
+   { dg-do compile }
+   { dg-options "-O2 -Wstringop-overflow" }  */
+
+typedef __SIZE_TYPE__ size_t;
+
+char buf[100];
+
+void get_data (char*);
+
+__attribute__ ((nonnull(1, 2)))
+inline char* my_strcpy (char* dst, const char* src, size_t size)
+{
+  size_t len = __builtin_strlen (src);
+  if (len < size)
+    __builtin_memcpy (dst, src, len + 1);
+  else
+    {
+      __builtin_memcpy (dst, src, size - 1); /* { dg-bogus "\\\[-Wstringop-oveflow]" } */
+      dst[size - 1] = '\0';
+    }
+
+  return dst;
+}
+
+void test(void)
+{
+  char data[20] = "12345";
+
+  get_data (data);
+
+  my_strcpy (buf, data, sizeof buf);
+}
diff --git a/gcc/testsuite/gcc.dg/strlenopt-36.c b/gcc/testsuite/gcc.dg/strlenopt-36.c
new file mode 100644
index 0000000..d6fcca2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/strlenopt-36.c
@@ -0,0 +1,86 @@
+/* PR tree-optimization/78450 - strlen(s) return value can be assumed
+   to be less than the size of s
+   { dg-do compile }
+   { dg-options "-O2 -fdump-tree-optimized" } */
+
+#include "strlenopt.h"
+
+extern char a7[7], a6[6], a5[5], a4[4], a3[3], a2[2], a1[1];
+extern char a0[0];   /* Intentionally not tested here.  */
+extern char ax[];    /* Same.  */
+
+struct MemArrays {
+  char a7[7], a6[6], a5[5], a4[4], a3[3], a2[2], a1[1];
+  char a0[0];   /* Not tested here.  */
+};
+
+struct NestedMemArrays {
+  struct { char a7[7]; } ma7;
+  struct { char a6[6]; } ma6;
+  struct { char a5[5]; } ma5;
+  struct { char a4[4]; } ma4;
+  struct { char a3[3]; } ma3;
+  struct { char a2[2]; } ma2;
+  struct { char a1[1]; } ma1;
+  struct { char a0[0]; } ma0;
+  char last;
+};
+
+extern void failure_on_line (int);
+
+#define TEST_FAIL(line)					\
+  do {							\
+    failure_on_line (line);				\
+  } while (0)
+
+#define T(expr)						\
+  if (!(expr)) TEST_FAIL (__LINE__); else (void)0
+
+
+void test_array (void)
+{
+  T (strlen (a7) < sizeof a7);
+  T (strlen (a6) < sizeof a6);
+  T (strlen (a5) < sizeof a5);
+  T (strlen (a4) < sizeof a4);
+  T (strlen (a3) < sizeof a3);
+
+  /* The following two calls are folded too early which defeats
+     the strlen() optimization.
+    T (strlen (a2) == 1);
+    T (strlen (a1) == 0);  */
+}
+
+void test_memarray (struct MemArrays *ma)
+{
+  T (strlen (ma->a7) < sizeof ma->a7);
+  T (strlen (ma->a6) < sizeof ma->a6);
+  T (strlen (ma->a5) < sizeof ma->a5);
+  T (strlen (ma->a4) < sizeof ma->a4);
+  T (strlen (ma->a3) < sizeof ma->a3);
+
+  /* The following two calls are folded too early which defeats
+     the strlen() optimization.
+  T (strlen (ma->a2) == 1);
+  T (strlen (ma->a1) == 0);  */
+}
+
+/* Verify that the range of strlen(A) of a last struct member is
+   set even when the array is the sole member of a struct as long
+   as the struct itself is a member of another struct.  The converse
+   is tested in stlenopt-37.c.  */
+void test_nested_memarray (struct NestedMemArrays *ma)
+{
+  T (strlen (ma->ma7.a7) < sizeof ma->ma7.a7);
+  T (strlen (ma->ma6.a6) < sizeof ma->ma6.a6);
+  T (strlen (ma->ma5.a5) < sizeof ma->ma5.a5);
+  T (strlen (ma->ma4.a4) < sizeof ma->ma4.a4);
+  T (strlen (ma->ma3.a3) < sizeof ma->ma3.a3);
+
+  /* The following two calls are folded too early which defeats
+     the strlen() optimization.
+  T (strlen (ma->ma2.a2) == 1);
+  T (strlen (ma->ma1.a1) == 0);  */
+}
+
+/* { dg-final { scan-tree-dump-not "failure_on_line" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/strlenopt-37.c b/gcc/testsuite/gcc.dg/strlenopt-37.c
new file mode 100644
index 0000000..865653c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/strlenopt-37.c
@@ -0,0 +1,83 @@
+/* PR tree-optimization/78450 - strlen(s) return value can be assumed
+   to be less than the size of s
+   { dg-do compile }
+   { dg-options "-O2 -fdump-tree-optimized" } */
+
+#include "strlenopt.h"
+
+extern char ax[];
+
+struct MemArray7 { char a7[7]; };
+struct MemArray6 { char a6[6]; };
+struct MemArray5 { char a5[5]; };
+struct MemArray4 { char a4[4]; };
+struct MemArray3 { char a3[3]; };
+struct MemArray2 { char a2[2]; };
+struct MemArray1 { char a1[1]; };
+struct MemArray0 { int n; char a0[0]; };
+struct MemArrayX { int n; char ax[]; };
+
+struct MemArrays
+{
+  struct MemArray7 *ma7;
+  struct MemArray6 *ma6;
+  struct MemArray5 *ma5;
+  struct MemArray4 *ma4;
+  struct MemArray3 *ma3;
+  struct MemArray2 *ma2;
+  struct MemArray1 *ma1;
+  struct MemArray0 *ma0;
+  struct MemArrayX *max;
+};
+
+extern void if_stmt_on_line (int);
+extern void else_stmt_on_line (int);
+
+#define T(expr)								\
+  (!!(expr) ? if_stmt_on_line (__LINE__) : else_stmt_on_line (__LINE__))
+
+void test_memarray_lt (struct MemArrays *p)
+{
+  T (strlen (p->ma7->a7) < sizeof p->ma7->a7);
+  T (strlen (p->ma6->a6) < sizeof p->ma6->a6);
+  T (strlen (p->ma5->a5) < sizeof p->ma5->a5);
+  T (strlen (p->ma4->a4) < sizeof p->ma4->a4);
+  T (strlen (p->ma3->a3) < sizeof p->ma3->a3);
+  T (strlen (p->ma2->a2) < sizeof p->ma2->a2);
+  T (strlen (p->ma1->a1) < sizeof p->ma1->a1);
+
+  T (strlen (p->ma0->a0) < 1);
+  T (strlen (p->max->ax) < 1);
+}
+
+void test_memarray_eq (struct MemArrays *p)
+{
+  T (strlen (p->ma7->a7) == sizeof p->ma7->a7);
+  T (strlen (p->ma6->a6) == sizeof p->ma6->a6);
+  T (strlen (p->ma5->a5) == sizeof p->ma5->a5);
+  T (strlen (p->ma4->a4) == sizeof p->ma4->a4);
+  T (strlen (p->ma3->a3) == sizeof p->ma3->a3);
+  T (strlen (p->ma2->a2) == sizeof p->ma2->a2);
+  T (strlen (p->ma1->a1) == sizeof p->ma1->a1);
+
+  T (strlen (p->ma0->a0) == 1);
+  T (strlen (p->max->ax) == 1);
+}
+
+void test_memarray_gt (struct MemArrays *p)
+{
+  T (strlen (p->ma7->a7) > sizeof p->ma7->a7);
+  T (strlen (p->ma6->a6) > sizeof p->ma6->a6);
+  T (strlen (p->ma5->a5) > sizeof p->ma5->a5);
+  T (strlen (p->ma4->a4) > sizeof p->ma4->a4);
+  T (strlen (p->ma3->a3) > sizeof p->ma3->a3);
+  T (strlen (p->ma2->a2) > sizeof p->ma2->a2);
+  T (strlen (p->ma1->a1) > sizeof p->ma1->a1);
+
+  T (strlen (p->ma0->a0) > 1);
+  T (strlen (p->max->ax) > 1);
+ }
+
+/* Verify that no if or else statements have been eliminated.
+   { dg-final { scan-tree-dump-times "if_stmt_on_line" 27 "optimized" } }
+   { dg-final { scan-tree-dump-times "else_stmt_on_line" 27 "optimized" } }  */
diff --git a/gcc/tree-ssa-strlen.c b/gcc/tree-ssa-strlen.c
index 94f20ef..d89677b 100644
--- a/gcc/tree-ssa-strlen.c
+++ b/gcc/tree-ssa-strlen.c
@@ -1150,6 +1150,44 @@ adjust_last_stmt (strinfo *si, gimple *stmt, bool is_strcat)
   update_stmt (last.stmt);
 }
 
+/* For an LHS that is an SSA_NAME and for strlen() argument SRC, set
+   LHS range info to [0, N] if SRC refers to a character array A[N]
+   with unknown length bounded by N.  */
+
+static void
+maybe_set_strlen_range (tree lhs, tree src)
+{
+  if (TREE_CODE (lhs) != SSA_NAME)
+    return;
+
+  if (TREE_CODE (src) == SSA_NAME)
+    {
+      gimple *def = SSA_NAME_DEF_STMT (src);
+      if (is_gimple_assign (def)
+	  && gimple_assign_rhs_code (def) == ADDR_EXPR)
+	src = gimple_assign_rhs1 (def);
+    }
+
+  if (TREE_CODE (src) != ADDR_EXPR)
+    return;
+
+  /* The last array member of a struct can be bigger than its size
+     suggests if it's treated as a poor-man's flexible array member.  */
+  src = TREE_OPERAND (src, 0);
+  if (TREE_CODE (TREE_TYPE (src)) != ARRAY_TYPE
+      || array_at_struct_end_p (src))
+    return;
+
+  tree type = TREE_TYPE (src);
+  if (tree dom = TYPE_DOMAIN (type))
+    if (tree maxval = TYPE_MAX_VALUE (dom))
+      {
+	wide_int max = wi::to_wide (maxval);
+	wide_int min = wi::zero (max.get_precision ());
+	set_range_info (lhs, VR_RANGE, min, max);
+      }
+}
+
 /* Handle a strlen call.  If strlen of the argument is known, replace
    the strlen call with the known value, otherwise remember that strlen
    of the argument is stored in the lhs SSA_NAME.  */
@@ -1260,6 +1298,10 @@ handle_builtin_strlen (gimple_stmt_iterator *gsi)
       set_strinfo (idx, si);
       find_equal_ptrs (src, idx);
 
+      /* For SRC that is an array of N elements, set LHS's range
+	 to [0, N].  */
+      maybe_set_strlen_range (lhs, src);
+
       if (strlen_to_stridx)
 	{
 	  location_t loc = gimple_location (stmt);

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] set range for strlen(array) to avoid spurious -Wstringop-overflow (PR 83373 , PR 78450)
  2017-12-12 20:15 [PATCH] set range for strlen(array) to avoid spurious -Wstringop-overflow (PR 83373 , PR 78450) Martin Sebor
@ 2017-12-13  0:35 ` Jeff Law
  2017-12-13  3:47   ` Martin Sebor
  2017-12-13  7:25 ` Bernhard Reutner-Fischer
  1 sibling, 1 reply; 18+ messages in thread
From: Jeff Law @ 2017-12-13  0:35 UTC (permalink / raw)
  To: Martin Sebor, Gcc Patch List

On 12/12/2017 01:15 PM, Martin Sebor wrote:
> Bug 83373 - False positive reported by -Wstringop-overflow, is
> another example of warning triggered by a missed optimization
> opportunity, this time in the strlen pass.  The optimization
> is discussed in pr78450 - strlen(s) return value can be assumed
> to be less than the size of s.  The gist of it is that the result
> of strlen(array) can be assumed to be less than the size of
> the array (except in the corner case of last struct members).
> 
> To avoid the false positive the attached patch adds this
> optimization to the strlen pass.  Although the patch passes
> bootstrap and regression tests for all front-ends I'm not sure
> the way it determines the upper bound of the range is 100%
> correct for languages with arrays with a non-zero lower bound.
> Maybe it's just not as tight as it could be.
What about something hideous like

struct fu {
  char x1[10];
  char x2[10];
  int avoid_trailing_array;
}

Where objects stored in x1 are not null terminated.  Are we in the realm
of undefined behavior at that point (I hope so)?

jeff

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] set range for strlen(array) to avoid spurious -Wstringop-overflow (PR 83373 , PR 78450)
  2017-12-13  0:35 ` Jeff Law
@ 2017-12-13  3:47   ` Martin Sebor
  2017-12-14 10:43     ` Richard Biener
  2017-12-14 16:13     ` Jeff Law
  0 siblings, 2 replies; 18+ messages in thread
From: Martin Sebor @ 2017-12-13  3:47 UTC (permalink / raw)
  To: Jeff Law, Gcc Patch List

On 12/12/2017 05:35 PM, Jeff Law wrote:
> On 12/12/2017 01:15 PM, Martin Sebor wrote:
>> Bug 83373 - False positive reported by -Wstringop-overflow, is
>> another example of warning triggered by a missed optimization
>> opportunity, this time in the strlen pass.  The optimization
>> is discussed in pr78450 - strlen(s) return value can be assumed
>> to be less than the size of s.  The gist of it is that the result
>> of strlen(array) can be assumed to be less than the size of
>> the array (except in the corner case of last struct members).
>>
>> To avoid the false positive the attached patch adds this
>> optimization to the strlen pass.  Although the patch passes
>> bootstrap and regression tests for all front-ends I'm not sure
>> the way it determines the upper bound of the range is 100%
>> correct for languages with arrays with a non-zero lower bound.
>> Maybe it's just not as tight as it could be.
> What about something hideous like
>
> struct fu {
>   char x1[10];
>   char x2[10];
>   int avoid_trailing_array;
> }
>
> Where objects stored in x1 are not null terminated.  Are we in the realm
> of undefined behavior at that point (I hope so)?

Yes, this is undefined.  Pointer arithmetic (either direct or
via standard library functions) is only defined for pointers
to the same object or subobject.  So even something like

  memcpy (pfu->x1, pfu->x1 + 10, 10);

is undefined.  _FORTIFY_SOURCE allows it for raw memory functions
because some low level code copies regions of the same object that
span two or more members (I've seen an example or two in the Linux
kernel but, IIRC, nowhere else).  With the patch, using memchr
would be the only way to get away with it.

Other than that, the new -Wstringop-truncation warning is designed
to prevent creating unterminated character arrays and the manual
suggests using attribute nonstring when it's necessary.  The
-Wrestrict warning I'm about to check in also complains about
forming invalid pointers by built-ins with restrict-qualified
arguments (although it won't diagnose the above).

Although I would prefer not to, I suppose if letting strlen cross
the boundaries of subobjects was considered an important use to
accommodate in limited cases the optimization could be disabled
for member arrays declared with the new nonstring attribute (while
still issuing a warning for it as GCC does today).

Another alternative (if the above use case is considered important
enough) might be to suppress the optimization for member character
arrays that are immediately followed by another such array.

Martin

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] set range for strlen(array) to avoid spurious -Wstringop-overflow (PR 83373 , PR 78450)
  2017-12-12 20:15 [PATCH] set range for strlen(array) to avoid spurious -Wstringop-overflow (PR 83373 , PR 78450) Martin Sebor
  2017-12-13  0:35 ` Jeff Law
@ 2017-12-13  7:25 ` Bernhard Reutner-Fischer
  2017-12-13 16:16   ` Martin Sebor
  1 sibling, 1 reply; 18+ messages in thread
From: Bernhard Reutner-Fischer @ 2017-12-13  7:25 UTC (permalink / raw)
  To: gcc-patches, Martin Sebor, Gcc Patch List

On 12 December 2017 21:15:25 CET, Martin Sebor <msebor@gmail.com> wrote:

>
>Tested on x86_64-linux.

I assume this test worked even before this patch. Thus: 

s/oveflow/overflow/

thanks, 

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] set range for strlen(array) to avoid spurious -Wstringop-overflow (PR 83373 , PR 78450)
  2017-12-13  7:25 ` Bernhard Reutner-Fischer
@ 2017-12-13 16:16   ` Martin Sebor
  0 siblings, 0 replies; 18+ messages in thread
From: Martin Sebor @ 2017-12-13 16:16 UTC (permalink / raw)
  To: Bernhard Reutner-Fischer, gcc-patches

On 12/13/2017 12:25 AM, Bernhard Reutner-Fischer wrote:
> On 12 December 2017 21:15:25 CET, Martin Sebor <msebor@gmail.com> wrote:
>
>>
>> Tested on x86_64-linux.
>
> I assume this test worked even before this patch.

Of the tests added by the patch, strlenopt-37.c passes without
the compiler changes and strlenopt-36.c fails.  Both are expected.
-37.c verifies the optimization doesn't happen when it shouldn't
happen and -36.c verifies it does take place when it's safe.

The regression test for the warning fails without the patch and
passes with the patch applied.

> Thus:
>
> s/oveflow/overflow/

Thanks.  I'll fix that if/when the patch is approved.

Martin

>
> thanks,
>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] set range for strlen(array) to avoid spurious -Wstringop-overflow (PR 83373 , PR 78450)
  2017-12-13  3:47   ` Martin Sebor
@ 2017-12-14 10:43     ` Richard Biener
  2017-12-14 16:01       ` Martin Sebor
  2017-12-14 16:13     ` Jeff Law
  1 sibling, 1 reply; 18+ messages in thread
From: Richard Biener @ 2017-12-14 10:43 UTC (permalink / raw)
  To: Martin Sebor; +Cc: Jeff Law, Gcc Patch List

On Wed, Dec 13, 2017 at 4:47 AM, Martin Sebor <msebor@gmail.com> wrote:
> On 12/12/2017 05:35 PM, Jeff Law wrote:
>>
>> On 12/12/2017 01:15 PM, Martin Sebor wrote:
>>>
>>> Bug 83373 - False positive reported by -Wstringop-overflow, is
>>> another example of warning triggered by a missed optimization
>>> opportunity, this time in the strlen pass.  The optimization
>>> is discussed in pr78450 - strlen(s) return value can be assumed
>>> to be less than the size of s.  The gist of it is that the result
>>> of strlen(array) can be assumed to be less than the size of
>>> the array (except in the corner case of last struct members).
>>>
>>> To avoid the false positive the attached patch adds this
>>> optimization to the strlen pass.  Although the patch passes
>>> bootstrap and regression tests for all front-ends I'm not sure
>>> the way it determines the upper bound of the range is 100%
>>> correct for languages with arrays with a non-zero lower bound.
>>> Maybe it's just not as tight as it could be.
>>
>> What about something hideous like
>>
>> struct fu {
>>   char x1[10];
>>   char x2[10];
>>   int avoid_trailing_array;
>> }
>>
>> Where objects stored in x1 are not null terminated.  Are we in the realm
>> of undefined behavior at that point (I hope so)?
>
>
> Yes, this is undefined.  Pointer arithmetic (either direct or
> via standard library functions) is only defined for pointers
> to the same object or subobject.  So even something like
>
>  memcpy (pfu->x1, pfu->x1 + 10, 10);
>
> is undefined.

There's nothing undefined here - computing the pointer pointing
to one-after-the-last element of an array is valid (you are just
not allowed to dereference it).

>  _FORTIFY_SOURCE allows it for raw memory functions
> because some low level code copies regions of the same object that
> span two or more members (I've seen an example or two in the Linux
> kernel but, IIRC, nowhere else).  With the patch, using memchr
> would be the only way to get away with it.
>
> Other than that, the new -Wstringop-truncation warning is designed
> to prevent creating unterminated character arrays and the manual
> suggests using attribute nonstring when it's necessary.  The
> -Wrestrict warning I'm about to check in also complains about
> forming invalid pointers by built-ins with restrict-qualified
> arguments (although it won't diagnose the above).
>
> Although I would prefer not to, I suppose if letting strlen cross
> the boundaries of subobjects was considered an important use to
> accommodate in limited cases the optimization could be disabled
> for member arrays declared with the new nonstring attribute (while
> still issuing a warning for it as GCC does today).
>
> Another alternative (if the above use case is considered important
> enough) might be to suppress the optimization for member character
> arrays that are immediately followed by another such array.
>
> Martin

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] set range for strlen(array) to avoid spurious -Wstringop-overflow (PR 83373 , PR 78450)
  2017-12-14 10:43     ` Richard Biener
@ 2017-12-14 16:01       ` Martin Sebor
  2017-12-15  8:50         ` Richard Biener
  0 siblings, 1 reply; 18+ messages in thread
From: Martin Sebor @ 2017-12-14 16:01 UTC (permalink / raw)
  To: Richard Biener; +Cc: Jeff Law, Gcc Patch List

On 12/14/2017 03:43 AM, Richard Biener wrote:
> On Wed, Dec 13, 2017 at 4:47 AM, Martin Sebor <msebor@gmail.com> wrote:
>> On 12/12/2017 05:35 PM, Jeff Law wrote:
>>>
>>> On 12/12/2017 01:15 PM, Martin Sebor wrote:
>>>>
>>>> Bug 83373 - False positive reported by -Wstringop-overflow, is
>>>> another example of warning triggered by a missed optimization
>>>> opportunity, this time in the strlen pass.  The optimization
>>>> is discussed in pr78450 - strlen(s) return value can be assumed
>>>> to be less than the size of s.  The gist of it is that the result
>>>> of strlen(array) can be assumed to be less than the size of
>>>> the array (except in the corner case of last struct members).
>>>>
>>>> To avoid the false positive the attached patch adds this
>>>> optimization to the strlen pass.  Although the patch passes
>>>> bootstrap and regression tests for all front-ends I'm not sure
>>>> the way it determines the upper bound of the range is 100%
>>>> correct for languages with arrays with a non-zero lower bound.
>>>> Maybe it's just not as tight as it could be.
>>>
>>> What about something hideous like
>>>
>>> struct fu {
>>>   char x1[10];
>>>   char x2[10];
>>>   int avoid_trailing_array;
>>> }
>>>
>>> Where objects stored in x1 are not null terminated.  Are we in the realm
>>> of undefined behavior at that point (I hope so)?
>>
>>
>> Yes, this is undefined.  Pointer arithmetic (either direct or
>> via standard library functions) is only defined for pointers
>> to the same object or subobject.  So even something like
>>
>>  memcpy (pfu->x1, pfu->x1 + 10, 10);
>>
>> is undefined.
>
> There's nothing undefined here - computing the pointer pointing
> to one-after-the-last element of an array is valid (you are just
> not allowed to dereference it).

Right, and memcpy dereferences it, so it's undefined.

Martin

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] set range for strlen(array) to avoid spurious -Wstringop-overflow (PR 83373 , PR 78450)
  2017-12-13  3:47   ` Martin Sebor
  2017-12-14 10:43     ` Richard Biener
@ 2017-12-14 16:13     ` Jeff Law
  2017-12-14 16:19       ` Jakub Jelinek
  1 sibling, 1 reply; 18+ messages in thread
From: Jeff Law @ 2017-12-14 16:13 UTC (permalink / raw)
  To: Martin Sebor, Gcc Patch List

On 12/12/2017 08:47 PM, Martin Sebor wrote:
>>
>> struct fu {
>>   char x1[10];
>>   char x2[10];
>>   int avoid_trailing_array;
>> }
>>
>> Where objects stored in x1 are not null terminated.  Are we in the realm
>> of undefined behavior at that point (I hope so)?
> 
> Yes, this is undefined.  Pointer arithmetic (either direct or
> via standard library functions) is only defined for pointers
> to the same object or subobject.  So even something like
That's what I expected -- I just wanted to be sure there wasn't some
kind of escape clause that would allow one to compose an object of that
nature, then use pfu->x1 to read/write pfu->x2.


> 
>  memcpy (pfu->x1, pfu->x1 + 10, 10);
> 
> is undefined.  _FORTIFY_SOURCE allows it for raw memory functions
> because some low level code copies regions of the same object that
> span two or more members (I've seen an example or two in the Linux
> kernel but, IIRC, nowhere else).  With the patch, using memchr
> would be the only way to get away with it.
Presumably the right way to go about things in this situation is to use
a pointer to the outer object.

> 
> Other than that, the new -Wstringop-truncation warning is designed
> to prevent creating unterminated character arrays and the manual
> suggests using attribute nonstring when it's necessary.  The
> -Wrestrict warning I'm about to check in also complains about
> forming invalid pointers by built-ins with restrict-qualified
> arguments (although it won't diagnose the above).
Right.  The concern is that it's an new option and folks likely haven't
cleaned up their codebases for these kinds of issues.

> 
> Although I would prefer not to, I suppose if letting strlen cross
> the boundaries of subobjects was considered an important use to
> accommodate in limited cases the optimization could be disabled
> for member arrays declared with the new nonstring attribute (while
> still issuing a warning for it as GCC does today).
> 
> Another alternative (if the above use case is considered important
> enough) might be to suppress the optimization for member character
> arrays that are immediately followed by another such array.
History tells us that there will be someone out there that does this
kind of thing -- the question is how pervasive is it.  My suspicion is
that it is not common.

Given that I don't expect those uses to be common, the only thing that
should break is non-conforming code and we have a (new) warning for such
code my inclination is to go forward.

So I'm OK with the patch.  I'd give folks till Monday to chime in with
dissenting opinions.

Jeff

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] set range for strlen(array) to avoid spurious -Wstringop-overflow (PR 83373 , PR 78450)
  2017-12-14 16:13     ` Jeff Law
@ 2017-12-14 16:19       ` Jakub Jelinek
  2017-12-14 18:51         ` Martin Sebor
  0 siblings, 1 reply; 18+ messages in thread
From: Jakub Jelinek @ 2017-12-14 16:19 UTC (permalink / raw)
  To: Jeff Law; +Cc: Martin Sebor, Gcc Patch List

On Thu, Dec 14, 2017 at 09:13:21AM -0700, Jeff Law wrote:
> > Although I would prefer not to, I suppose if letting strlen cross
> > the boundaries of subobjects was considered an important use to
> > accommodate in limited cases the optimization could be disabled
> > for member arrays declared with the new nonstring attribute (while
> > still issuing a warning for it as GCC does today).
> > 
> > Another alternative (if the above use case is considered important
> > enough) might be to suppress the optimization for member character
> > arrays that are immediately followed by another such array.
> History tells us that there will be someone out there that does this
> kind of thing -- the question is how pervasive is it.  My suspicion is
> that it is not common.
> 
> Given that I don't expect those uses to be common, the only thing that
> should break is non-conforming code and we have a (new) warning for such
> code my inclination is to go forward.
> 
> So I'm OK with the patch.  I'd give folks till Monday to chime in with
> dissenting opinions.

Well, it would be nice to get sanitizers diagnose this at runtime.  If we
know the array length at compile time, simply compare after the strlen
call the result and fail if it returns something above it.  Or replace
the strlen call with strnlen for the compile time known size and add
instrumentation if strnlen returns the second argument.

	Jakub

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] set range for strlen(array) to avoid spurious -Wstringop-overflow (PR 83373 , PR 78450)
  2017-12-14 16:19       ` Jakub Jelinek
@ 2017-12-14 18:51         ` Martin Sebor
  2017-12-14 18:55           ` Jakub Jelinek
  0 siblings, 1 reply; 18+ messages in thread
From: Martin Sebor @ 2017-12-14 18:51 UTC (permalink / raw)
  To: Jakub Jelinek, Jeff Law; +Cc: Gcc Patch List

On 12/14/2017 09:18 AM, Jakub Jelinek wrote:
> On Thu, Dec 14, 2017 at 09:13:21AM -0700, Jeff Law wrote:
>>> Although I would prefer not to, I suppose if letting strlen cross
>>> the boundaries of subobjects was considered an important use to
>>> accommodate in limited cases the optimization could be disabled
>>> for member arrays declared with the new nonstring attribute (while
>>> still issuing a warning for it as GCC does today).
>>>
>>> Another alternative (if the above use case is considered important
>>> enough) might be to suppress the optimization for member character
>>> arrays that are immediately followed by another such array.
>> History tells us that there will be someone out there that does this
>> kind of thing -- the question is how pervasive is it.  My suspicion is
>> that it is not common.
>>
>> Given that I don't expect those uses to be common, the only thing that
>> should break is non-conforming code and we have a (new) warning for such
>> code my inclination is to go forward.
>>
>> So I'm OK with the patch.  I'd give folks till Monday to chime in with
>> dissenting opinions.
>
> Well, it would be nice to get sanitizers diagnose this at runtime.  If we
> know the array length at compile time, simply compare after the strlen
> call the result and fail if it returns something above it.  Or replace
> the strlen call with strnlen for the compile time known size and add
> instrumentation if strnlen returns the second argument.

Sure, that sounds like a useful enhancement.  I'll look into
adding it as a follow-on patch unless you feel that it needs
to be part of the same package.

Martin

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] set range for strlen(array) to avoid spurious -Wstringop-overflow (PR 83373 , PR 78450)
  2017-12-14 18:51         ` Martin Sebor
@ 2017-12-14 18:55           ` Jakub Jelinek
  2017-12-14 19:04             ` Jeff Law
  0 siblings, 1 reply; 18+ messages in thread
From: Jakub Jelinek @ 2017-12-14 18:55 UTC (permalink / raw)
  To: Martin Sebor; +Cc: Jeff Law, Gcc Patch List

On Thu, Dec 14, 2017 at 11:51:26AM -0700, Martin Sebor wrote:
> > Well, it would be nice to get sanitizers diagnose this at runtime.  If we
> > know the array length at compile time, simply compare after the strlen
> > call the result and fail if it returns something above it.  Or replace
> > the strlen call with strnlen for the compile time known size and add
> > instrumentation if strnlen returns the second argument.
> 
> Sure, that sounds like a useful enhancement.  I'll look into
> adding it as a follow-on patch unless you feel that it needs
> to be part of the same package.

The problem is if we'll need changes to libubsan for that (which we'll
likely do), then those need to be upstreamed, and e.g. my attempts
to upstream simple patch to diagnose noreturn function returns is suspended
upstream because clang doesn't have that support (and I have no interest
in adding to to clang).

In theory we could have some GCC only file in there, but then we'd be ABI
incompatible with them.

	Jakub

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] set range for strlen(array) to avoid spurious -Wstringop-overflow (PR 83373 , PR 78450)
  2017-12-14 18:55           ` Jakub Jelinek
@ 2017-12-14 19:04             ` Jeff Law
  2017-12-18 22:53               ` Martin Sebor
  0 siblings, 1 reply; 18+ messages in thread
From: Jeff Law @ 2017-12-14 19:04 UTC (permalink / raw)
  To: Jakub Jelinek, Martin Sebor; +Cc: Gcc Patch List

On 12/14/2017 11:55 AM, Jakub Jelinek wrote:
> On Thu, Dec 14, 2017 at 11:51:26AM -0700, Martin Sebor wrote:
>>> Well, it would be nice to get sanitizers diagnose this at runtime.  If we
>>> know the array length at compile time, simply compare after the strlen
>>> call the result and fail if it returns something above it.  Or replace
>>> the strlen call with strnlen for the compile time known size and add
>>> instrumentation if strnlen returns the second argument.
>>
>> Sure, that sounds like a useful enhancement.  I'll look into
>> adding it as a follow-on patch unless you feel that it needs
>> to be part of the same package.
> 
> The problem is if we'll need changes to libubsan for that (which we'll
> likely do), then those need to be upstreamed, and e.g. my attempts
> to upstream simple patch to diagnose noreturn function returns is suspended
> upstream because clang doesn't have that support (and I have no interest
> in adding to to clang).
> 
> In theory we could have some GCC only file in there, but then we'd be ABI
> incompatible with them.
So defer the sanitization side until Clang catches up?

jeff

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] set range for strlen(array) to avoid spurious -Wstringop-overflow (PR 83373 , PR 78450)
  2017-12-14 16:01       ` Martin Sebor
@ 2017-12-15  8:50         ` Richard Biener
  2017-12-15 15:58           ` Martin Sebor
  0 siblings, 1 reply; 18+ messages in thread
From: Richard Biener @ 2017-12-15  8:50 UTC (permalink / raw)
  To: Martin Sebor; +Cc: Jeff Law, Gcc Patch List

On Thu, Dec 14, 2017 at 5:01 PM, Martin Sebor <msebor@gmail.com> wrote:
> On 12/14/2017 03:43 AM, Richard Biener wrote:
>>
>> On Wed, Dec 13, 2017 at 4:47 AM, Martin Sebor <msebor@gmail.com> wrote:
>>>
>>> On 12/12/2017 05:35 PM, Jeff Law wrote:
>>>>
>>>>
>>>> On 12/12/2017 01:15 PM, Martin Sebor wrote:
>>>>>
>>>>>
>>>>> Bug 83373 - False positive reported by -Wstringop-overflow, is
>>>>> another example of warning triggered by a missed optimization
>>>>> opportunity, this time in the strlen pass.  The optimization
>>>>> is discussed in pr78450 - strlen(s) return value can be assumed
>>>>> to be less than the size of s.  The gist of it is that the result
>>>>> of strlen(array) can be assumed to be less than the size of
>>>>> the array (except in the corner case of last struct members).
>>>>>
>>>>> To avoid the false positive the attached patch adds this
>>>>> optimization to the strlen pass.  Although the patch passes
>>>>> bootstrap and regression tests for all front-ends I'm not sure
>>>>> the way it determines the upper bound of the range is 100%
>>>>> correct for languages with arrays with a non-zero lower bound.
>>>>> Maybe it's just not as tight as it could be.
>>>>
>>>>
>>>> What about something hideous like
>>>>
>>>> struct fu {
>>>>   char x1[10];
>>>>   char x2[10];
>>>>   int avoid_trailing_array;
>>>> }
>>>>
>>>> Where objects stored in x1 are not null terminated.  Are we in the realm
>>>> of undefined behavior at that point (I hope so)?
>>>
>>>
>>>
>>> Yes, this is undefined.  Pointer arithmetic (either direct or
>>> via standard library functions) is only defined for pointers
>>> to the same object or subobject.  So even something like
>>>
>>>  memcpy (pfu->x1, pfu->x1 + 10, 10);
>>>
>>> is undefined.
>>
>>
>> There's nothing undefined here - computing the pointer pointing
>> to one-after-the-last element of an array is valid (you are just
>> not allowed to dereference it).
>
>
> Right, and memcpy dereferences it, so it's undefined.

That's interpretation of the standard that I don't share.

Also, if I have struct f { int i; int j; };  and a int * that points
to the j member you say I have no standard conforming way
to get at a pointer to the i member from this, right?  Because
the pointer points to an 'int' object.  But it also points within
a struct f object!  So at least maybe (int *)((char *)p - offsetof
(struct f, j))
should be valid?  This means that pfu->x1 + 10 is a valid pointer
into *pfu no matter what you say and you can dereference it.

Richard.

> Martin
>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] set range for strlen(array) to avoid spurious -Wstringop-overflow (PR 83373 , PR 78450)
  2017-12-15  8:50         ` Richard Biener
@ 2017-12-15 15:58           ` Martin Sebor
  2017-12-15 16:17             ` Richard Biener
  0 siblings, 1 reply; 18+ messages in thread
From: Martin Sebor @ 2017-12-15 15:58 UTC (permalink / raw)
  To: Richard Biener; +Cc: Jeff Law, Gcc Patch List

On 12/15/2017 01:48 AM, Richard Biener wrote:
> On Thu, Dec 14, 2017 at 5:01 PM, Martin Sebor <msebor@gmail.com> wrote:
>> On 12/14/2017 03:43 AM, Richard Biener wrote:
>>>
>>> On Wed, Dec 13, 2017 at 4:47 AM, Martin Sebor <msebor@gmail.com> wrote:
>>>>
>>>> On 12/12/2017 05:35 PM, Jeff Law wrote:
>>>>>
>>>>>
>>>>> On 12/12/2017 01:15 PM, Martin Sebor wrote:
>>>>>>
>>>>>>
>>>>>> Bug 83373 - False positive reported by -Wstringop-overflow, is
>>>>>> another example of warning triggered by a missed optimization
>>>>>> opportunity, this time in the strlen pass.  The optimization
>>>>>> is discussed in pr78450 - strlen(s) return value can be assumed
>>>>>> to be less than the size of s.  The gist of it is that the result
>>>>>> of strlen(array) can be assumed to be less than the size of
>>>>>> the array (except in the corner case of last struct members).
>>>>>>
>>>>>> To avoid the false positive the attached patch adds this
>>>>>> optimization to the strlen pass.  Although the patch passes
>>>>>> bootstrap and regression tests for all front-ends I'm not sure
>>>>>> the way it determines the upper bound of the range is 100%
>>>>>> correct for languages with arrays with a non-zero lower bound.
>>>>>> Maybe it's just not as tight as it could be.
>>>>>
>>>>>
>>>>> What about something hideous like
>>>>>
>>>>> struct fu {
>>>>>   char x1[10];
>>>>>   char x2[10];
>>>>>   int avoid_trailing_array;
>>>>> }
>>>>>
>>>>> Where objects stored in x1 are not null terminated.  Are we in the realm
>>>>> of undefined behavior at that point (I hope so)?
>>>>
>>>>
>>>>
>>>> Yes, this is undefined.  Pointer arithmetic (either direct or
>>>> via standard library functions) is only defined for pointers
>>>> to the same object or subobject.  So even something like
>>>>
>>>>  memcpy (pfu->x1, pfu->x1 + 10, 10);
>>>>
>>>> is undefined.
>>>
>>>
>>> There's nothing undefined here - computing the pointer pointing
>>> to one-after-the-last element of an array is valid (you are just
>>> not allowed to dereference it).
>>
>>
>> Right, and memcpy dereferences it, so it's undefined.
>
> That's interpretation of the standard that I don't share.

It's not an interpretation.  It's a basic rule of the languages
that the standards are explicit about.  In C11 you will find
this specified in detail in 6.5.6, paragraph 7 and 8 (of
particular relevance to your question below is p7: "a pointer
to an object that is not an element of an array behaves the same
as a pointer to the first element of an array of length one.")

> Also, if I have struct f { int i; int j; };  and a int * that points
> to the j member you say I have no standard conforming way
> to get at a pointer to the i member from this, right?

Correct.  See above.

> Because
> the pointer points to an 'int' object.  But it also points within
> a struct f object!  So at least maybe (int *)((char *)p - offsetof
> (struct f, j))
> should be valid?

No, not really.  It works in practice but it's not well-defined.
It doesn't matter how you get at the result.  What matters is
what you start with.  As Jeff said, to derive a pointer to
distinct suobjects of a larger object you need to start with
a pointer to the larger object and treat it as an array of
chars.

> This means that pfu->x1 + 10 is a valid pointer
> into *pfu no matter what you say and you can dereference it.

No.

As another hopefully more convincing example consider a multi-
dimensional array A[2][2].  The value of the offset of A[i][j]
is sizeof A[i] + j.  With that, the offset of A[1][0] is
sizeof A[1] + 0, and so would be the offset of A[0][2]. But
that doesn't make A[0][2] a valid reference to an element of
A (because A[0] has only two elements, A[0][0] and A[0][1]),
or &A[0] + 2 a derefernceable pointer.  It's a pointer that
points just past the last element of the array A[0].  That
there's another array right after A[0] (namely A[1]) is
immaterial, same as in the struct f example above.

Martin

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] set range for strlen(array) to avoid spurious -Wstringop-overflow (PR 83373 , PR 78450)
  2017-12-15 15:58           ` Martin Sebor
@ 2017-12-15 16:17             ` Richard Biener
  2017-12-15 17:29               ` Martin Sebor
  0 siblings, 1 reply; 18+ messages in thread
From: Richard Biener @ 2017-12-15 16:17 UTC (permalink / raw)
  To: Martin Sebor; +Cc: Jeff Law, Gcc Patch List

On December 15, 2017 4:58:14 PM GMT+01:00, Martin Sebor <msebor@gmail.com> wrote:
>On 12/15/2017 01:48 AM, Richard Biener wrote:
>> On Thu, Dec 14, 2017 at 5:01 PM, Martin Sebor <msebor@gmail.com>
>wrote:
>>> On 12/14/2017 03:43 AM, Richard Biener wrote:
>>>>
>>>> On Wed, Dec 13, 2017 at 4:47 AM, Martin Sebor <msebor@gmail.com>
>wrote:
>>>>>
>>>>> On 12/12/2017 05:35 PM, Jeff Law wrote:
>>>>>>
>>>>>>
>>>>>> On 12/12/2017 01:15 PM, Martin Sebor wrote:
>>>>>>>
>>>>>>>
>>>>>>> Bug 83373 - False positive reported by -Wstringop-overflow, is
>>>>>>> another example of warning triggered by a missed optimization
>>>>>>> opportunity, this time in the strlen pass.  The optimization
>>>>>>> is discussed in pr78450 - strlen(s) return value can be assumed
>>>>>>> to be less than the size of s.  The gist of it is that the
>result
>>>>>>> of strlen(array) can be assumed to be less than the size of
>>>>>>> the array (except in the corner case of last struct members).
>>>>>>>
>>>>>>> To avoid the false positive the attached patch adds this
>>>>>>> optimization to the strlen pass.  Although the patch passes
>>>>>>> bootstrap and regression tests for all front-ends I'm not sure
>>>>>>> the way it determines the upper bound of the range is 100%
>>>>>>> correct for languages with arrays with a non-zero lower bound.
>>>>>>> Maybe it's just not as tight as it could be.
>>>>>>
>>>>>>
>>>>>> What about something hideous like
>>>>>>
>>>>>> struct fu {
>>>>>>   char x1[10];
>>>>>>   char x2[10];
>>>>>>   int avoid_trailing_array;
>>>>>> }
>>>>>>
>>>>>> Where objects stored in x1 are not null terminated.  Are we in
>the realm
>>>>>> of undefined behavior at that point (I hope so)?
>>>>>
>>>>>
>>>>>
>>>>> Yes, this is undefined.  Pointer arithmetic (either direct or
>>>>> via standard library functions) is only defined for pointers
>>>>> to the same object or subobject.  So even something like
>>>>>
>>>>>  memcpy (pfu->x1, pfu->x1 + 10, 10);
>>>>>
>>>>> is undefined.
>>>>
>>>>
>>>> There's nothing undefined here - computing the pointer pointing
>>>> to one-after-the-last element of an array is valid (you are just
>>>> not allowed to dereference it).
>>>
>>>
>>> Right, and memcpy dereferences it, so it's undefined.
>>
>> That's interpretation of the standard that I don't share.
>
>It's not an interpretation.  It's a basic rule of the languages
>that the standards are explicit about.  In C11 you will find
>this specified in detail in 6.5.6, paragraph 7 and 8 (of
>particular relevance to your question below is p7: "a pointer
>to an object that is not an element of an array behaves the same
>as a pointer to the first element of an array of length one.")

I know. 

>> Also, if I have struct f { int i; int j; };  and a int * that points
>> to the j member you say I have no standard conforming way
>> to get at a pointer to the i member from this, right?
>
>Correct.  See above.
>
>> Because
>> the pointer points to an 'int' object.  But it also points within
>> a struct f object!  So at least maybe (int *)((char *)p - offsetof
>> (struct f, j))
>> should be valid?
>
>No, not really.  It works in practice but it's not well-defined.
>It doesn't matter how you get at the result.  What matters is
>what you start with.  As Jeff said, to derive a pointer to
>distinct suobjects of a larger object you need to start with
>a pointer to the larger object and treat it as an array of
>chars.

That's obviously not constraints people use C and C++ with so I see no way to enforce this within gimple.

>> This means that pfu->x1 + 10 is a valid pointer
>> into *pfu no matter what you say and you can dereference it.
>
>No.
>
>As another hopefully more convincing example consider a multi-
>dimensional array A[2][2].  The value of the offset of A[i][j]
>is sizeof A[i] + j.  With that, the offset of A[1][0] is
>sizeof A[1] + 0, and so would be the offset of A[0][2]. But
>that doesn't make A[0][2] a valid reference to an element of
>A (because A[0] has only two elements, A[0][0] and A[0][1]),
>or &A[0] + 2 a derefernceable pointer.  It's a pointer that
>points just past the last element of the array A[0].  That
>there's another array right after A[0] (namely A[1]) is
>immaterial, same as in the struct f example above.

I know. Dependence analysis relies on this. We've had bugs in the past with gcc itself introducing such bogus references. 

Richard. 

>
>Martin

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] set range for strlen(array) to avoid spurious -Wstringop-overflow (PR 83373 , PR 78450)
  2017-12-15 16:17             ` Richard Biener
@ 2017-12-15 17:29               ` Martin Sebor
  2017-12-16 17:38                 ` Martin Sebor
  0 siblings, 1 reply; 18+ messages in thread
From: Martin Sebor @ 2017-12-15 17:29 UTC (permalink / raw)
  To: Richard Biener; +Cc: Jeff Law, Gcc Patch List

On 12/15/2017 09:17 AM, Richard Biener wrote:
> On December 15, 2017 4:58:14 PM GMT+01:00, Martin Sebor <msebor@gmail.com> wrote:
>> On 12/15/2017 01:48 AM, Richard Biener wrote:
>>> On Thu, Dec 14, 2017 at 5:01 PM, Martin Sebor <msebor@gmail.com>
>> wrote:
>>>> On 12/14/2017 03:43 AM, Richard Biener wrote:
>>>>>
>>>>> On Wed, Dec 13, 2017 at 4:47 AM, Martin Sebor <msebor@gmail.com>
>> wrote:
>>>>>>
>>>>>> On 12/12/2017 05:35 PM, Jeff Law wrote:
>>>>>>>
>>>>>>>
>>>>>>> On 12/12/2017 01:15 PM, Martin Sebor wrote:
>>>>>>>>
>>>>>>>>
>>>>>>>> Bug 83373 - False positive reported by -Wstringop-overflow, is
>>>>>>>> another example of warning triggered by a missed optimization
>>>>>>>> opportunity, this time in the strlen pass.  The optimization
>>>>>>>> is discussed in pr78450 - strlen(s) return value can be assumed
>>>>>>>> to be less than the size of s.  The gist of it is that the
>> result
>>>>>>>> of strlen(array) can be assumed to be less than the size of
>>>>>>>> the array (except in the corner case of last struct members).
>>>>>>>>
>>>>>>>> To avoid the false positive the attached patch adds this
>>>>>>>> optimization to the strlen pass.  Although the patch passes
>>>>>>>> bootstrap and regression tests for all front-ends I'm not sure
>>>>>>>> the way it determines the upper bound of the range is 100%
>>>>>>>> correct for languages with arrays with a non-zero lower bound.
>>>>>>>> Maybe it's just not as tight as it could be.
>>>>>>>
>>>>>>>
>>>>>>> What about something hideous like
>>>>>>>
>>>>>>> struct fu {
>>>>>>>   char x1[10];
>>>>>>>   char x2[10];
>>>>>>>   int avoid_trailing_array;
>>>>>>> }
>>>>>>>
>>>>>>> Where objects stored in x1 are not null terminated.  Are we in
>> the realm
>>>>>>> of undefined behavior at that point (I hope so)?
>>>>>>
>>>>>>
>>>>>>
>>>>>> Yes, this is undefined.  Pointer arithmetic (either direct or
>>>>>> via standard library functions) is only defined for pointers
>>>>>> to the same object or subobject.  So even something like
>>>>>>
>>>>>>  memcpy (pfu->x1, pfu->x1 + 10, 10);
>>>>>>
>>>>>> is undefined.
>>>>>
>>>>>
>>>>> There's nothing undefined here - computing the pointer pointing
>>>>> to one-after-the-last element of an array is valid (you are just
>>>>> not allowed to dereference it).
>>>>
>>>>
>>>> Right, and memcpy dereferences it, so it's undefined.
>>>
>>> That's interpretation of the standard that I don't share.
>>
>> It's not an interpretation.  It's a basic rule of the languages
>> that the standards are explicit about.  In C11 you will find
>> this specified in detail in 6.5.6, paragraph 7 and 8 (of
>> particular relevance to your question below is p7: "a pointer
>> to an object that is not an element of an array behaves the same
>> as a pointer to the first element of an array of length one.")
>
> I know.
>
>>> Also, if I have struct f { int i; int j; };  and a int * that points
>>> to the j member you say I have no standard conforming way
>>> to get at a pointer to the i member from this, right?
>>
>> Correct.  See above.
>>
>>> Because
>>> the pointer points to an 'int' object.  But it also points within
>>> a struct f object!  So at least maybe (int *)((char *)p - offsetof
>>> (struct f, j))
>>> should be valid?
>>
>> No, not really.  It works in practice but it's not well-defined.
>> It doesn't matter how you get at the result.  What matters is
>> what you start with.  As Jeff said, to derive a pointer to
>> distinct suobjects of a larger object you need to start with
>> a pointer to the larger object and treat it as an array of
>> chars.
>
> That's obviously not constraints people use C and C++ with so I see no way to enforce this within gimple.

There's code out there that relies on all sorts of undefined
behavior.  It's a judgment call in each instance as to how much
of such code exists and how important it is.  In this case, I'd
expect it to be confined to low-level software like OS kernels
and such whose authors use C as a more convenient assembly
language to talk directly to the hardware.  Programmers in other
domains are usually more conscious of the requirements and limited
guarantees of the language and less willing to make assumptions
based on what this or that processor lets them get away with.

That being said, it certainly is possible to enforce this
constraint within GIMPLE.  My -Wrestrict patch does it to
an extent for the memory and string built-ins.  The -Warray-bounds
patch I submitted for offsets does it for all other expressions.
Neither patch exposed any such code in the Linux kernel, so it
doesn't look like abuses of this sort are common even in low-level
code.

Martin

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] set range for strlen(array) to avoid spurious -Wstringop-overflow (PR 83373 , PR 78450)
  2017-12-15 17:29               ` Martin Sebor
@ 2017-12-16 17:38                 ` Martin Sebor
  0 siblings, 0 replies; 18+ messages in thread
From: Martin Sebor @ 2017-12-16 17:38 UTC (permalink / raw)
  To: Richard Biener; +Cc: Jeff Law, Gcc Patch List

On 12/15/2017 10:29 AM, Martin Sebor wrote:
> On 12/15/2017 09:17 AM, Richard Biener wrote:
>> On December 15, 2017 4:58:14 PM GMT+01:00, Martin Sebor
>> <msebor@gmail.com> wrote:
>>> On 12/15/2017 01:48 AM, Richard Biener wrote:
>>>> On Thu, Dec 14, 2017 at 5:01 PM, Martin Sebor <msebor@gmail.com>
>>> wrote:
>>>>> On 12/14/2017 03:43 AM, Richard Biener wrote:
>>>>>>
>>>>>> On Wed, Dec 13, 2017 at 4:47 AM, Martin Sebor <msebor@gmail.com>
>>> wrote:
>>>>>>>
>>>>>>> On 12/12/2017 05:35 PM, Jeff Law wrote:
>>>>>>>>
>>>>>>>>
>>>>>>>> On 12/12/2017 01:15 PM, Martin Sebor wrote:
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> Bug 83373 - False positive reported by -Wstringop-overflow, is
>>>>>>>>> another example of warning triggered by a missed optimization
>>>>>>>>> opportunity, this time in the strlen pass.  The optimization
>>>>>>>>> is discussed in pr78450 - strlen(s) return value can be assumed
>>>>>>>>> to be less than the size of s.  The gist of it is that the
>>> result
>>>>>>>>> of strlen(array) can be assumed to be less than the size of
>>>>>>>>> the array (except in the corner case of last struct members).
>>>>>>>>>
>>>>>>>>> To avoid the false positive the attached patch adds this
>>>>>>>>> optimization to the strlen pass.  Although the patch passes
>>>>>>>>> bootstrap and regression tests for all front-ends I'm not sure
>>>>>>>>> the way it determines the upper bound of the range is 100%
>>>>>>>>> correct for languages with arrays with a non-zero lower bound.
>>>>>>>>> Maybe it's just not as tight as it could be.
>>>>>>>>
>>>>>>>>
>>>>>>>> What about something hideous like
>>>>>>>>
>>>>>>>> struct fu {
>>>>>>>>   char x1[10];
>>>>>>>>   char x2[10];
>>>>>>>>   int avoid_trailing_array;
>>>>>>>> }
>>>>>>>>
>>>>>>>> Where objects stored in x1 are not null terminated.  Are we in
>>> the realm
>>>>>>>> of undefined behavior at that point (I hope so)?
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> Yes, this is undefined.  Pointer arithmetic (either direct or
>>>>>>> via standard library functions) is only defined for pointers
>>>>>>> to the same object or subobject.  So even something like
>>>>>>>
>>>>>>>  memcpy (pfu->x1, pfu->x1 + 10, 10);
>>>>>>>
>>>>>>> is undefined.
>>>>>>
>>>>>>
>>>>>> There's nothing undefined here - computing the pointer pointing
>>>>>> to one-after-the-last element of an array is valid (you are just
>>>>>> not allowed to dereference it).
>>>>>
>>>>>
>>>>> Right, and memcpy dereferences it, so it's undefined.
>>>>
>>>> That's interpretation of the standard that I don't share.
>>>
>>> It's not an interpretation.  It's a basic rule of the languages
>>> that the standards are explicit about.  In C11 you will find
>>> this specified in detail in 6.5.6, paragraph 7 and 8 (of
>>> particular relevance to your question below is p7: "a pointer
>>> to an object that is not an element of an array behaves the same
>>> as a pointer to the first element of an array of length one.")
>>
>> I know.
>>
>>>> Also, if I have struct f { int i; int j; };  and a int * that points
>>>> to the j member you say I have no standard conforming way
>>>> to get at a pointer to the i member from this, right?
>>>
>>> Correct.  See above.
>>>
>>>> Because
>>>> the pointer points to an 'int' object.  But it also points within
>>>> a struct f object!  So at least maybe (int *)((char *)p - offsetof
>>>> (struct f, j))
>>>> should be valid?
>>>
>>> No, not really.  It works in practice but it's not well-defined.
>>> It doesn't matter how you get at the result.  What matters is
>>> what you start with.  As Jeff said, to derive a pointer to
>>> distinct suobjects of a larger object you need to start with
>>> a pointer to the larger object and treat it as an array of
>>> chars.
>>
>> That's obviously not constraints people use C and C++ with so I see no
>> way to enforce this within gimple.
>
> There's code out there that relies on all sorts of undefined
> behavior.  It's a judgment call in each instance as to how much
> of such code exists and how important it is.  In this case, I'd
> expect it to be confined to low-level software like OS kernels
> and such whose authors use C as a more convenient assembly
> language to talk directly to the hardware.  Programmers in other
> domains are usually more conscious of the requirements and limited
> guarantees of the language and less willing to make assumptions
> based on what this or that processor lets them get away with.
>
> That being said, it certainly is possible to enforce this
> constraint within GIMPLE.  My -Wrestrict patch does it to
> an extent for the memory and string built-ins.  The -Warray-bounds
> patch I submitted for offsets does it for all other expressions.
> Neither patch exposed any such code in the Linux kernel, so it
> doesn't look like abuses of this sort are common even in low-level
> code.

Let me correct myself on this last point.  I forgot I had disabled
the offset checking for memcpy.  With it enabled, the enhanced
-Warray-bounds warning finds about a dozen distinct instances of
memcpy calls in the Linux kernel that cross the subobject boundary.

Most of them involve struct ieee80211_hdr and its adjacent unsigned
char array members.  I think it would be worthwhile to do a more
careful review and, if this turns out to be the only (or predominant)
use case, consider tightening up GCC checkers to allow only it but
not cases involving members of any other types (especially
non-arrays).  The most troublesome of those being variations on:

   struct S {
     ...
     char array[N];
     void (*pfunc)(void);
     ...
   };

   extern struct S *ps;

   memcpy (ps->array, data, N + X);

where the memcpy call clobbers the function pointer ps->pfunc.

Martin

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] set range for strlen(array) to avoid spurious -Wstringop-overflow (PR 83373 , PR 78450)
  2017-12-14 19:04             ` Jeff Law
@ 2017-12-18 22:53               ` Martin Sebor
  0 siblings, 0 replies; 18+ messages in thread
From: Martin Sebor @ 2017-12-18 22:53 UTC (permalink / raw)
  To: Jeff Law, Jakub Jelinek; +Cc: Gcc Patch List

On 12/14/2017 12:04 PM, Jeff Law wrote:
> On 12/14/2017 11:55 AM, Jakub Jelinek wrote:
>> On Thu, Dec 14, 2017 at 11:51:26AM -0700, Martin Sebor wrote:
>>>> Well, it would be nice to get sanitizers diagnose this at runtime.  If we
>>>> know the array length at compile time, simply compare after the strlen
>>>> call the result and fail if it returns something above it.  Or replace
>>>> the strlen call with strnlen for the compile time known size and add
>>>> instrumentation if strnlen returns the second argument.
>>>
>>> Sure, that sounds like a useful enhancement.  I'll look into
>>> adding it as a follow-on patch unless you feel that it needs
>>> to be part of the same package.
>>
>> The problem is if we'll need changes to libubsan for that (which we'll
>> likely do), then those need to be upstreamed, and e.g. my attempts
>> to upstream simple patch to diagnose noreturn function returns is suspended
>> upstream because clang doesn't have that support (and I have no interest
>> in adding to to clang).
>>
>> In theory we could have some GCC only file in there, but then we'd be ABI
>> incompatible with them.
> So defer the sanitization side until Clang catches up?

I've committed the patch as is in r255790.  If I have some spare
cycles I'll see if the instrumentation is possible without changing
libubsan.

Martin

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2017-12-18 22:53 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-12-12 20:15 [PATCH] set range for strlen(array) to avoid spurious -Wstringop-overflow (PR 83373 , PR 78450) Martin Sebor
2017-12-13  0:35 ` Jeff Law
2017-12-13  3:47   ` Martin Sebor
2017-12-14 10:43     ` Richard Biener
2017-12-14 16:01       ` Martin Sebor
2017-12-15  8:50         ` Richard Biener
2017-12-15 15:58           ` Martin Sebor
2017-12-15 16:17             ` Richard Biener
2017-12-15 17:29               ` Martin Sebor
2017-12-16 17:38                 ` Martin Sebor
2017-12-14 16:13     ` Jeff Law
2017-12-14 16:19       ` Jakub Jelinek
2017-12-14 18:51         ` Martin Sebor
2017-12-14 18:55           ` Jakub Jelinek
2017-12-14 19:04             ` Jeff Law
2017-12-18 22:53               ` Martin Sebor
2017-12-13  7:25 ` Bernhard Reutner-Fischer
2017-12-13 16:16   ` Martin Sebor

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).