public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] c/67882 - improve -Warray-bounds for invalid offsetof
@ 2015-10-09  2:55 Martin Sebor
  2015-10-15 21:59 ` [PING] " Martin Sebor
  2015-10-16 12:28 ` Bernd Schmidt
  0 siblings, 2 replies; 30+ messages in thread
From: Martin Sebor @ 2015-10-09  2:55 UTC (permalink / raw)
  To: Gcc Patch List

[-- Attachment #1: Type: text/plain, Size: 439 bytes --]

Gcc attempts to diagnose invalid offsetof expressions whose member
designator is an array element with an out-of-bounds index. The
logic in the function that does this detection is incomplete, leading
to false negatives. Since the result of the expression in these cases
can be surprising, this patch tightens up the logic to diagnose more
such cases.

Tested by boostrapping and running c/c++ tests on x86_64 with no
regressions.

Martin

[-- Attachment #2: gcc-67882-surprising_offsetof_result.patch --]
[-- Type: text/x-patch, Size: 12130 bytes --]

diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
index 4b922bf..fc7c991d 100644
--- a/gcc/c-family/c-common.c
+++ b/gcc/c-family/c-common.c
@@ -10536,12 +10536,31 @@ c_common_to_target_charset (HOST_WIDE_INT c)
 
 /* Fold an offsetof-like expression.  EXPR is a nested sequence of component
    references with an INDIRECT_REF of a constant at the bottom; much like the
-   traditional rendering of offsetof as a macro.  Return the folded result.  */
+   traditional rendering of offsetof as a macro.  Return the folded result.
+   PCTX, which is initially null, is set by the first recursive call of
+   the function to refer to a local object describing the potentially out-
+   of-bounds index of the array member whose offset is being computed, and
+   to indicate whether all indices to the same array object have the highest
+   valid value.  The function issues a warning for out-of-bounds array indices
+   that either refer to elements past the one just past end of the array object
+   or that exceed any of the major bounds.  */
+
+struct offsetof_ctx_t
+{
+  tree inx;     /* The invalid array index or NULL_TREE.  */
+  int maxinx;   /* All indices to the array have the highest valid value. */
+};
 
 tree
-fold_offsetof_1 (tree expr)
+fold_offsetof_1 (tree expr, offsetof_ctx_t *pctx /* = 0 */)
 {
   tree base, off, t;
+  offsetof_ctx_t ctx = { NULL_TREE, -1 };
+
+  /* Set the context pointer to point to the local context object
+     to use by subsequent recursive calls.  */
+  if (!pctx)
+    pctx = &ctx;
 
   switch (TREE_CODE (expr))
     {
@@ -10567,10 +10586,19 @@ fold_offsetof_1 (tree expr)
       return TREE_OPERAND (expr, 0);
 
     case COMPONENT_REF:
-      base = fold_offsetof_1 (TREE_OPERAND (expr, 0));
+      base = fold_offsetof_1 (TREE_OPERAND (expr, 0), pctx);
       if (base == error_mark_node)
 	return base;
 
+      if (ctx.inx != NULL_TREE) {
+	warning (OPT_Warray_bounds,
+		 "index %E denotes an offset "
+		 "greater than size of %qT",
+		 ctx.inx, TREE_TYPE (TREE_OPERAND (expr, 0)));
+	/* Reset to avoid diagnosing the same expression multiple times.  */
+	pctx->inx = NULL_TREE;
+      }
+
       t = TREE_OPERAND (expr, 1);
       if (DECL_C_BIT_FIELD (t))
 	{
@@ -10581,10 +10609,11 @@ fold_offsetof_1 (tree expr)
       off = size_binop_loc (input_location, PLUS_EXPR, DECL_FIELD_OFFSET (t),
 			    size_int (tree_to_uhwi (DECL_FIELD_BIT_OFFSET (t))
 				      / BITS_PER_UNIT));
+      pctx->maxinx = -1;
       break;
 
     case ARRAY_REF:
-      base = fold_offsetof_1 (TREE_OPERAND (expr, 0));
+      base = fold_offsetof_1 (TREE_OPERAND (expr, 0), pctx);
       if (base == error_mark_node)
 	return base;
 
@@ -10601,8 +10630,10 @@ fold_offsetof_1 (tree expr)
 	    {
 	      upbound = size_binop (PLUS_EXPR, upbound,
 				    build_int_cst (TREE_TYPE (upbound), 1));
-	      if (tree_int_cst_lt (upbound, t))
-		{
+
+	      if (tree_int_cst_lt (upbound, t)) {
+		pctx->inx = t;
+
 		tree v;
 
 		for (v = TREE_OPERAND (expr, 0);
@@ -10622,25 +10653,61 @@ fold_offsetof_1 (tree expr)
 		/* Don't warn if the array might be considered a poor
 		   man's flexible array member with a very permissive
 		   definition thereof.  */
-		  if (TREE_CODE (v) == ARRAY_REF
-		      || TREE_CODE (v) == COMPONENT_REF)
+		if (TREE_CODE (v) != ARRAY_REF
+		    && TREE_CODE (v) != COMPONENT_REF)
+		  pctx = NULL;
+	      }
+	      else if (pctx->inx == NULL_TREE && tree_int_cst_equal (upbound, t))
+		{
+		  /* Index is considered valid when it's either less than
+		     the upper bound or equal to it and refers to the lowest
+		     rank.  Since in the latter case it may not at this point
+		     in the recursive call to the function be known whether
+		     the element at the index is used to do more than to
+		     compute its offset (e.g., it can be used to designate
+		     a member of the type to which the just past-the-end
+		     element refers), point the INX variable at the index
+		     and leave it up to the caller to decide whether or not
+		     to diagnose it.  Special handling is required for minor
+		     index values referring to the element just past the end
+		     of the array object.  */
+		  pctx->inx = t;
+		  tree_code lastcode = TREE_CODE (TREE_OPERAND (expr, 0));
+		  if ((lastcode != ARRAY_REF && pctx != &ctx)
+		      || (pctx == &ctx && pctx->maxinx))
+		    pctx = NULL;
+		}
+	      else
+		{
+		  HOST_WIDE_INT ubi = tree_to_shwi (upbound);
+		  HOST_WIDE_INT inx = tree_to_shwi (t);
+
+		  if (pctx->maxinx)
+		    pctx->maxinx = inx + 1 == ubi;
+		}
+	    }
+	}
+
+      if (pctx && pctx->inx)
+	{
 	  warning (OPT_Warray_bounds,
 		   "index %E denotes an offset "
 		   "greater than size of %qT",
-			     t, TREE_TYPE (TREE_OPERAND (expr, 0)));
-		}
-	    }
+		   pctx->inx, TREE_TYPE (TREE_OPERAND (expr, 0)));
+
+	  pctx->inx = NULL_TREE;
 	}
 
       t = convert (sizetype, t);
       off = size_binop (MULT_EXPR, TYPE_SIZE_UNIT (TREE_TYPE (expr)), t);
+
       break;
 
     case COMPOUND_EXPR:
       /* Handle static members of volatile structs.  */
       t = TREE_OPERAND (expr, 1);
       gcc_assert (VAR_P (t));
-      return fold_offsetof_1 (t);
+      return fold_offsetof_1 (t, pctx);
 
     default:
       gcc_unreachable ();
diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h
index 74d1bc1..72e2f95 100644
--- a/gcc/c-family/c-common.h
+++ b/gcc/c-family/c-common.h
@@ -1015,7 +1015,8 @@ extern bool c_dump_tree (void *, tree);
 
 extern void verify_sequence_points (tree);
 
-extern tree fold_offsetof_1 (tree);
+struct offsetof_ctx_t;
+extern tree fold_offsetof_1 (tree, offsetof_ctx_t* = NULL);
 extern tree fold_offsetof (tree);
 
 /* Places where an lvalue, or modifiable lvalue, may be required.
diff --git a/gcc/testsuite/c-c++-common/builtin-offsetof-2.c b/gcc/testsuite/c-c++-common/builtin-offsetof-2.c
new file mode 100644
index 0000000..09979fd
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/builtin-offsetof-2.c
@@ -0,0 +1,157 @@
+// { dg-options "-Warray-bounds" }
+// { dg-do compile }
+
+// Test case exercising pr c/67882 - surprising offsetof result
+//   on an invalid array member without diagnostic.
+
+typedef struct A {
+  char a1_1[1][1];
+  char a[];
+} A;
+
+typedef struct A2 {
+  char a1_1[1][1];
+  char c;
+} A2;
+
+typedef struct B {
+  A2 a2_3[2][3];
+  char a1_1[3][5];
+  char a[];
+} B;
+
+// Structures with members that contain flexible array members are
+// an extension accepted by GCC.
+typedef struct C {
+  A a5_7 [5][7];
+  int a;
+} C;
+
+// Structs with a "fake" flexible array member (a GCC extension).
+typedef struct FA0 {
+  int i;
+  char a0 [0];
+} FA0;
+
+typedef struct FA1 {
+  int i;
+  char a1 [1];
+} FA1;
+
+typedef struct FA3 {
+  int i;
+  char a3 [3];
+} FA3;
+
+// A "fake" multidimensional flexible array member deserves special
+// treatment since the offsetof exression yields the same result for
+// references to apparently distinct members (e.g., a5_7[0][7] is
+// at the same offset as a5_7[1][0] which may be surprising).
+typedef struct FA5_7 {
+  int i;
+  char a5_7 [5][7];
+} FA5_7;
+
+static void test (void)
+{
+  // Verify that offsetof references to array elements past the end of
+  // the array member are diagnosed.  As an extension, permit references
+  // to the element just past-the-end of the array.
+
+  int a[] = {
+    __builtin_offsetof (A, a),              // valid
+    __builtin_offsetof (A, a [0]),          // valid
+    __builtin_offsetof (A, a [1]),          // valid
+    __builtin_offsetof (A, a [99]),         // valid
+
+    __builtin_offsetof (A, a1_1 [0][0]),    // valid
+
+    // The following expression is silently accepted as an extension
+    // because it simply forms the equivalent of a just-past-the-end
+    // address.
+    __builtin_offsetof (A, a1_1 [0][1]),    // extension
+
+    __builtin_offsetof (A, a1_1 [1][0]),    // { dg-warning "index" }
+    __builtin_offsetof (A, a1_1 [1][1]),    // { dg-warning "index" }
+
+    __builtin_offsetof (B, a2_3 [0][0].c),           // valid
+    __builtin_offsetof (B, a2_3 [0][0].a1_1 [0][0]), // valid
+
+    // The following expressions are silently accepted as an extension
+    // because they simply form the equivalent of a just-past-the-end
+    // address.
+    __builtin_offsetof (B, a2_3 [1][3]),             // extension
+    __builtin_offsetof (B, a2_3 [0][0].a1_1 [0][1]), // extension
+
+    __builtin_offsetof (B, a2_3 [0][0].a1_1 [1][0]), // { dg-warning "index" }
+    __builtin_offsetof (B, a2_3 [0][0].a1_1 [1][1]), // { dg-warning "index" }
+
+    __builtin_offsetof (B, a2_3 [1][2].a1_1 [0][0]), // valid
+
+    // Forming an offset to the just-past-end element is accepted.
+    __builtin_offsetof (B, a2_3 [1][2].a1_1 [0][1]), // extension
+    __builtin_offsetof (B, a2_3 [1][2].a1_1 [1][0]), // { dg-warning "index" }
+    __builtin_offsetof (B, a2_3 [1][2].a1_1 [1][1]), // { dg-warning "index" }
+
+    // Forming an offset to the just-past-end element is accepted.
+    __builtin_offsetof (B, a2_3 [1][3]),             // exension
+    // ...but these are diagnosed because they dereference a just-path-the-end
+    // element.
+    __builtin_offsetof (B, a2_3 [1][3].a1_1 [0][0]), // { dg-warning "index" }
+    __builtin_offsetof (B, a2_3 [1][3].a1_1 [0][0]), // { dg-warning "index" }
+    __builtin_offsetof (B, a2_3 [1][3].a1_1 [0][1]), // { dg-warning "index" }
+    __builtin_offsetof (B, a2_3 [1][3].a1_1 [1][0]), // { dg-warning "index" }
+    __builtin_offsetof (B, a2_3 [1][3].a1_1 [1][1]), // { dg-warning "index" }
+
+    // Analogous to the case above, these are both diagnosed because they
+    // dereference just-path-the-end elements of the a2_3 array.
+    __builtin_offsetof (B, a2_3 [1][3].c),       // { dg-warning "index" }
+    __builtin_offsetof (B, a2_3 [1][3].c),       // { dg-warning "index" }
+
+    // The following are all invalid because of the reference to a2_3[2].
+    __builtin_offsetof (B, a2_3 [2][0].a1_1 [0][0]), // { dg-warning "index" }
+    __builtin_offsetof (B, a2_3 [2][0].a1_1 [0][1]), // { dg-warning "index" }
+    __builtin_offsetof (B, a2_3 [2][0].a1_1 [1][0]), // { dg-warning "index" }
+    __builtin_offsetof (B, a2_3 [2][0].a1_1 [1][1]), // { dg-warning "index" }
+    __builtin_offsetof (B, a2_3 [2][0].c),           // { dg-warning "index" }
+
+    __builtin_offsetof (C, a5_7 [4][6]),
+    __builtin_offsetof (C, a5_7 [4][6].a),
+    __builtin_offsetof (C, a5_7 [4][6].a [0]),
+    __builtin_offsetof (C, a5_7 [4][6].a [99]),
+
+    __builtin_offsetof (C, a5_7 [4][7]),             // extension
+    // Diagnose the following even though the object whose offset is
+    // computed is a flexible array member.
+    __builtin_offsetof (C, a5_7 [4][7].a),           // { dg-warning "index" }
+    __builtin_offsetof (C, a5_7 [4][7].a [0]),       // { dg-warning "index" }
+    __builtin_offsetof (C, a5_7 [4][7].a [99]),      // { dg-warning "index" }
+
+    // Verify that no diagnostic is issued for offsetof expressions
+    // involving structs where the array has a rank of 1 and is the last
+    // member (e.g., those are treated as flexible array members).
+    __builtin_offsetof (FA0, a0 [0]),
+    __builtin_offsetof (FA0, a0 [1]),
+    __builtin_offsetof (FA0, a0 [99]),
+
+    __builtin_offsetof (FA1, a1 [0]),
+    __builtin_offsetof (FA1, a1 [1]),
+    __builtin_offsetof (FA1, a1 [99]),
+
+    __builtin_offsetof (FA3, a3 [0]),
+    __builtin_offsetof (FA3, a3 [3]),
+    __builtin_offsetof (FA3, a3 [99]),
+
+    __builtin_offsetof (FA5_7, a5_7 [0][0]),
+
+    // Unlike one-dimensional arrays, verify that out-of-bounds references
+    // to arrays with rank of 2 and greater are diagnosed.
+    __builtin_offsetof (FA5_7, a5_7 [0][7]),         // { dg-warning "index" }
+    __builtin_offsetof (FA5_7, a5_7 [1][7]),         // { dg-warning "index" }
+    __builtin_offsetof (FA5_7, a5_7 [5][0]),         // { dg-warning "index" }
+    __builtin_offsetof (FA5_7, a5_7 [5][7]),         // { dg-warning "index" }
+    __builtin_offsetof (FA5_7, a5_7 [99][99])        // { dg-warning "index" }
+  };
+
+  (void)&a;
+}

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PING] [PATCH] c/67882 - improve -Warray-bounds for invalid offsetof
  2015-10-09  2:55 [PATCH] c/67882 - improve -Warray-bounds for invalid offsetof Martin Sebor
@ 2015-10-15 21:59 ` Martin Sebor
  2015-10-16 12:28 ` Bernd Schmidt
  1 sibling, 0 replies; 30+ messages in thread
From: Martin Sebor @ 2015-10-15 21:59 UTC (permalink / raw)
  To: Gcc Patch List

The patch is at the following link:

   https://gcc.gnu.org/ml/gcc-patches/2015-10/msg00915.html

Martin

On 10/08/2015 08:55 PM, Martin Sebor wrote:
> Gcc attempts to diagnose invalid offsetof expressions whose member
> designator is an array element with an out-of-bounds index. The
> logic in the function that does this detection is incomplete, leading
> to false negatives. Since the result of the expression in these cases
> can be surprising, this patch tightens up the logic to diagnose more
> such cases.
>
> Tested by boostrapping and running c/c++ tests on x86_64 with no
> regressions.
>
> Martin

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] c/67882 - improve -Warray-bounds for invalid offsetof
  2015-10-09  2:55 [PATCH] c/67882 - improve -Warray-bounds for invalid offsetof Martin Sebor
  2015-10-15 21:59 ` [PING] " Martin Sebor
@ 2015-10-16 12:28 ` Bernd Schmidt
  2015-10-16 17:27   ` Joseph Myers
  2015-10-16 19:34   ` Martin Sebor
  1 sibling, 2 replies; 30+ messages in thread
From: Bernd Schmidt @ 2015-10-16 12:28 UTC (permalink / raw)
  To: Martin Sebor, Gcc Patch List, Joseph Myers

On 10/09/2015 04:55 AM, Martin Sebor wrote:
> Gcc attempts to diagnose invalid offsetof expressions whose member
> designator is an array element with an out-of-bounds index. The
> logic in the function that does this detection is incomplete, leading
> to false negatives. Since the result of the expression in these cases
> can be surprising, this patch tightens up the logic to diagnose more
> such cases.

In the future, please explain more clearly in the patch submission what 
the false negatives are. That'll make the reviewer's job easier.

> Tested by boostrapping and running c/c++ tests on x86_64 with no
> regressions.

Should run the full testsuite (standard practice, and library tests 
might have occurrences of offsetof).

A ChangeLog is missing. (Not that I personally care about ChangeLogs, 
but apparently others do.)

> +struct offsetof_ctx_t
> +{
> +  tree inx;     /* The invalid array index or NULL_TREE.  */
> +  int maxinx;   /* All indices to the array have the highest valid value. */
> +};

I think "idx" is commonly used, I've never seen the spelling "inx". 
Also, typically comments go on their own lines before the field.

> +
> +	      if (tree_int_cst_lt (upbound, t)) {
> +		pctx->inx = t;

None-standard formatting. Elsewhere too.

> +		  /* Index is considered valid when it's either less than
> +		     the upper bound or equal to it and refers to the lowest
> +		     rank.  Since in the latter case it may not at this point
> +		     in the recursive call to the function be known whether
> +		     the element at the index is used to do more than to
> +		     compute its offset (e.g., it can be used to designate
> +		     a member of the type to which the just past-the-end
> +		     element refers), point the INX variable at the index
> +		     and leave it up to the caller to decide whether or not
> +		     to diagnose it.  Special handling is required for minor
> +		     index values referring to the element just past the end
> +		     of the array object.  */

I admit to having trouble parsing this comment. Can you write that in a 
clearer way somehow? I'm still trying to make my mind up whether the 
logic in this patch could be simplified.

>         t = convert (sizetype, t);
>         off = size_binop (MULT_EXPR, TYPE_SIZE_UNIT (TREE_TYPE (expr)), t);
> +
>         break;

Spurious change, please remove.

> +extern tree fold_offsetof_1 (tree, offsetof_ctx_t* = NULL);

Space before *.

> +// treatment since the offsetof exression yields the same result for

"expression".

> +    // The following expression is silently accepted as an extension
> +    // because it simply forms the equivalent of a just-past-the-end
> +    // address.
> +    __builtin_offsetof (A, a1_1 [0][1]),    // extension

Hmm, do we really want to support any kind of multidimensional array for 
this extension? My guess would have been to warn here.

So I checked and it looks like we accept flexible array member syntax 
like "int a[][2];", which suggests that the test might have the right 
idea, but has the indices swapped (the first one is the flexible one)? 
Ccing Joseph for a ruling.


Bernd

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] c/67882 - improve -Warray-bounds for invalid offsetof
  2015-10-16 12:28 ` Bernd Schmidt
@ 2015-10-16 17:27   ` Joseph Myers
  2015-10-16 19:34   ` Martin Sebor
  1 sibling, 0 replies; 30+ messages in thread
From: Joseph Myers @ 2015-10-16 17:27 UTC (permalink / raw)
  To: Bernd Schmidt; +Cc: Martin Sebor, Gcc Patch List

On Fri, 16 Oct 2015, Bernd Schmidt wrote:

> > +    // The following expression is silently accepted as an extension
> > +    // because it simply forms the equivalent of a just-past-the-end
> > +    // address.
> > +    __builtin_offsetof (A, a1_1 [0][1]),    // extension
> 
> Hmm, do we really want to support any kind of multidimensional array for this
> extension? My guess would have been to warn here.

We do deliberately want to support sequences of array indexing and 
structure / union member reference inside __builtin_offsetof.

> So I checked and it looks like we accept flexible array member syntax like
> "int a[][2];", which suggests that the test might have the right idea, but has
> the indices swapped (the first one is the flexible one)? Ccing Joseph for a
> ruling.

"int a[][2];" is a fully valid flexible array member (whose elements have 
type int[2]) - ISO C, not an extension.

Given such a flexible array member, [anything][0] and [anything][1] are 
logically valid if [anything] is nonnegative (of course, the array might 
or might not have enough element at runtime).  [anything][2] is a 
reference to just past the end of an element of the flexible array member; 
[anything][3] is clearly invalid.

Regarding the actual testcase, accepting a1_1[0][1] seems fine (it's 
referencing the just-past-end element of the valid array a1_1[0]).  The 
test unfortunately doesn't show what happens in cases with fewer indices - 
it should be expanded that way.  I'd say a1_1[1] should be accepted as 
just past end, even if a1_1[1][0] gets a warning as shown in the test.

-- 
Joseph S. Myers
joseph@codesourcery.com

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] c/67882 - improve -Warray-bounds for invalid offsetof
  2015-10-16 12:28 ` Bernd Schmidt
  2015-10-16 17:27   ` Joseph Myers
@ 2015-10-16 19:34   ` Martin Sebor
  2015-10-20 13:21     ` Bernd Schmidt
  1 sibling, 1 reply; 30+ messages in thread
From: Martin Sebor @ 2015-10-16 19:34 UTC (permalink / raw)
  To: Bernd Schmidt, Gcc Patch List, Joseph Myers

[-- Attachment #1: Type: text/plain, Size: 2681 bytes --]

On 10/16/2015 06:27 AM, Bernd Schmidt wrote:
> On 10/09/2015 04:55 AM, Martin Sebor wrote:
>> Gcc attempts to diagnose invalid offsetof expressions whose member
>> designator is an array element with an out-of-bounds index. The
>> logic in the function that does this detection is incomplete, leading
>> to false negatives. Since the result of the expression in these cases
>> can be surprising, this patch tightens up the logic to diagnose more
>> such cases.

Thank you for the review. Attached is an updated patch that hopefully
addresses all your comments. I ran the check_GNU_style.sh script on
it to make sure I didn't miss something.  I've also added replies to
a few of your comments below.

>
> In the future, please explain more clearly in the patch submission what
> the false negatives are. That'll make the reviewer's job easier.

The false negatives are at a high level explained in the bug:

   GCC fails to diagnose cases of invalid offsetof expressions whose
   member designator refers to an array element past the end of the
   array plus one.

The example there is intended to illustrate the general problem.
Beyond that, the test included in the patch shows other examples.
With an unpatched GCC, the latest one shows failures on the
following lines:

   128, 129, 134, 141, 142, 148, 149, 155, 156, 157, 158, 159, 163,
   164, 167, 168, 169, 170, 171, 181, 182, 183, 204, 205, 206, and
   207.

Hopefully that along with the comments in the code makes the problem
clear enough but I'd be happy to add more of either if that helps.

>> Tested by boostrapping and running c/c++ tests on x86_64 with no
>> regressions.
>
> Should run the full testsuite (standard practice, and library tests
> might have occurrences of offsetof).

Yes, that is what I meant by running c/c+ tests (i.e., I configured
gcc with --enable-languages=c,c++, bootstrapped it, and ran make
check).

>> +          /* Index is considered valid when it's either less than
>> +             ...
> I admit to having trouble parsing this comment. Can you write that in a
> clearer way somehow? I'm still trying to make my mind up whether the
> logic in this patch could be simplified.

I've reworded and expanded the comment in the updated patch. Please
let me know if it's still unclear.

> So I checked and it looks like we accept flexible array member syntax
> like "int a[][2];", which suggests that the test might have the right
> idea, but has the indices swapped (the first one is the flexible one)?
> Ccing Joseph for a ruling.

I believe the test is in line with Joseph's expectation. I added
a few more test cases to cover the constructs he referred to in
his response (IIUC).

Martin

[-- Attachment #2: gcc-67882-surprising_offsetof_result.patch --]
[-- Type: text/x-patch, Size: 16722 bytes --]

gcc/ChangeLog
2015-10-16  Martin Sebor  <msebor@redhat.com>

	PR c++-common/67882
	* c-family/c-common.h (struct offsetof_ctx_t): Declare.
	(fold_offsetof_1): Add argument.
	* c-family/c-common.c (struct offsetof_ctx_t): Define.
	(fold_offsetof_1): Diagnose more invalid offsetof expressions
	that reference elements past the end of an array.

testsuite/ChangeLog
2015-10-16  Martin Sebor  <msebor@redhat.com>

	PR c++-common/67882
	* c-c++-common/builtin-offsetof-2.c: New test.

diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
index 4b922bf..c313a78 100644
--- a/gcc/c-family/c-common.c
+++ b/gcc/c-family/c-common.c
@@ -10535,13 +10535,38 @@ c_common_to_target_charset (HOST_WIDE_INT c)
 }

 /* Fold an offsetof-like expression.  EXPR is a nested sequence of component
-   references with an INDIRECT_REF of a constant at the bottom; much like the
-   traditional rendering of offsetof as a macro.  Return the folded result.  */
+   references with an INDIRECT_REF of a constant at the bottom; much like
+   the traditional rendering of offsetof as a macro.  Return the folded
+   result.  PCTX, which is initially null, is intended only for internal
+   use by the function.  It is set by the first recursive invocation of
+   the function to refer to a local object describing the potentially
+   out-of-bounds index of the array member whose offset is being computed,
+   and to indicate whether all indices to the same array object have
+   the highest valid value.  The function issues a warning for out-of-
+   bounds array indices that either refer to elements past the one just
+   past the end of the array object or that exceed any of the major
+   bounds.  */
+
+struct offsetof_ctx_t
+{
+  /* The possibly invalid array index or NULL_TREE.  */
+  tree index;
+  /* Clear when no index to the (possibly multi-dimensional) array
+     is known to have the same value as the corresponding upper bound
+     minus one.  Negative when unknown/don't care, positive otherwise.  */
+  int max_index;
+};

 tree
-fold_offsetof_1 (tree expr)
+fold_offsetof_1 (tree expr, offsetof_ctx_t *pctx /* = 0 */)
 {
   tree base, off, t;
+  offsetof_ctx_t ctx = { NULL_TREE, -1 };
+
+  /* Set the context pointer to point to the local context object
+     to use by subsequent recursive calls.  */
+  if (!pctx)
+    pctx = &ctx;

   switch (TREE_CODE (expr))
     {
@@ -10567,10 +10592,19 @@ fold_offsetof_1 (tree expr)
       return TREE_OPERAND (expr, 0);

     case COMPONENT_REF:
-      base = fold_offsetof_1 (TREE_OPERAND (expr, 0));
+      base = fold_offsetof_1 (TREE_OPERAND (expr, 0), pctx);
       if (base == error_mark_node)
 	return base;

+      if (ctx.index != NULL_TREE)
+	{
+	  warning (OPT_Warray_bounds,
+		   "index %E denotes an offset greater than size of %qT",
+		   ctx.index, TREE_TYPE (TREE_OPERAND (expr, 0)));
+	  /* Reset to avoid diagnosing the same expression multiple times.  */
+	  pctx->index = NULL_TREE;
+	}
+
       t = TREE_OPERAND (expr, 1);
       if (DECL_C_BIT_FIELD (t))
 	{
@@ -10581,10 +10615,15 @@ fold_offsetof_1 (tree expr)
       off = size_binop_loc (input_location, PLUS_EXPR, DECL_FIELD_OFFSET (t),
 			    size_int (tree_to_uhwi (DECL_FIELD_BIT_OFFSET (t))
 				      / BITS_PER_UNIT));
+      /* At this point we've either diagnosed a prior out of bounds array
+	 index above, or the array index is valid (or one hasn't been yet
+	 seen).  Reset MAX_INDEX to unknown/don't care and start from
+	 scratch.  */
+      pctx->max_index = -1;
       break;

     case ARRAY_REF:
-      base = fold_offsetof_1 (TREE_OPERAND (expr, 0));
+      base = fold_offsetof_1 (TREE_OPERAND (expr, 0), pctx);
       if (base == error_mark_node)
 	return base;

@@ -10603,6 +10642,8 @@ fold_offsetof_1 (tree expr)
 				    build_int_cst (TREE_TYPE (upbound), 1));
 	      if (tree_int_cst_lt (upbound, t))
 		{
+		  pctx->index = t;
+
 		  tree v;

 		  for (v = TREE_OPERAND (expr, 0);
@@ -10622,14 +10663,77 @@ fold_offsetof_1 (tree expr)
 		  /* Don't warn if the array might be considered a poor
 		     man's flexible array member with a very permissive
 		     definition thereof.  */
-		  if (TREE_CODE (v) == ARRAY_REF
-		      || TREE_CODE (v) == COMPONENT_REF)
+		  if (TREE_CODE (v) != ARRAY_REF
+		      && TREE_CODE (v) != COMPONENT_REF)
+		    pctx = NULL;
+		}
+	      else if (pctx->index == NULL_TREE
+		       && tree_int_cst_equal (upbound, t))
+		{
+		  /* The value of an index into an array is considered valid
+		     under any of the following conditions:
+
+		     1) it's less than the upper bound of the corresponding
+		     dimension of the array (where the upper bound is the
+		     size of the dimension, such as 3 in A[3]), or
+
+		     2) as a GCC extension, it's equal to the upper bound
+		     and the (non-existent, just-past-the-end) element it
+		     refers to is not used to reference a member of the
+		     element's type, or
+
+		     3) as a GCC extension, it's equal to the upper bound
+		     of the lower dimension of a flexible array member.
+
+		     For example, given struct S containing the member
+		     A[3][7], the expression offsetof (S, A[2][7]) is
+		     considered valid since it computes the equivalent
+		     of the address of A's last element plus one.  Given
+		     an object of struct S named s, this expression yields
+		     the same result as (char*)&s.A[2][7] - (char*)&s,
+		     which is a valid expression.
+		     On the other hand, offsetof (S, A[3]0]) is not valid,
+		     and neither is offsetof (S, A[2][7].foo).
+
+		     When an index is found to be equal to the upper bound
+		     of the lowest dimension, it is not known at this point
+		     in the recursive invocation of the function whether
+		     the offsetof expression is used to compute the offset
+		     of the just-past-the-end element at the index, or to
+		     designate a member of the type to which the just-past-
+		     the-end element refers (as in offsetof(S, A[2][7].foo)).
+		     Therefore, point the INDEX variable at the index and
+		     leave it up to the caller to decide whether or not
+		     to diagnose it.
+
+		     Special handling is also required for minor index
+		     values referring to the element just past the end
+		     of the array object.  */
+		  pctx->index = t;
+		  tree_code lastcode = TREE_CODE (TREE_OPERAND (expr, 0));
+		  if ((lastcode != ARRAY_REF && pctx != &ctx)
+		      || (pctx == &ctx && pctx->max_index))
+		    pctx = NULL;
+		}
+	      else
+		{
+		  HOST_WIDE_INT ubi = tree_to_shwi (upbound);
+		  HOST_WIDE_INT idx = tree_to_shwi (t);
+
+		  if (pctx->max_index)
+		    pctx->max_index = idx + 1 == ubi;
+		}
+	    }
+	}
+
+      if (pctx && pctx->index)
+	{
 	  warning (OPT_Warray_bounds,
 		   "index %E denotes an offset "
 		   "greater than size of %qT",
-			     t, TREE_TYPE (TREE_OPERAND (expr, 0)));
-		}
-	    }
+		   pctx->index, TREE_TYPE (TREE_OPERAND (expr, 0)));
+
+	  pctx->index = NULL_TREE;
 	}

       t = convert (sizetype, t);
@@ -10640,7 +10744,7 @@ fold_offsetof_1 (tree expr)
       /* Handle static members of volatile structs.  */
       t = TREE_OPERAND (expr, 1);
       gcc_assert (VAR_P (t));
-      return fold_offsetof_1 (t);
+      return fold_offsetof_1 (t, pctx);

     default:
       gcc_unreachable ();
diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h
index 74d1bc1..a4ccf2f 100644
--- a/gcc/c-family/c-common.h
+++ b/gcc/c-family/c-common.h
@@ -1015,7 +1015,8 @@ extern bool c_dump_tree (void *, tree);

 extern void verify_sequence_points (tree);

-extern tree fold_offsetof_1 (tree);
+struct offsetof_ctx_t;
+extern tree fold_offsetof_1 (tree, offsetof_ctx_t * = NULL);
 extern tree fold_offsetof (tree);

 /* Places where an lvalue, or modifiable lvalue, may be required.
diff --git a/gcc/testsuite/c-c++-common/builtin-offsetof-2.c b/gcc/testsuite/c-c++-common/builtin-offsetof-2.c
new file mode 100644
index 0000000..490ecf1
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/builtin-offsetof-2.c
@@ -0,0 +1,212 @@
+// { dg-options "-Warray-bounds" }
+// { dg-do compile }
+
+// Test case exercising pr c/67882 - surprising offsetof result
+//   on an invalid array member without diagnostic.
+
+typedef struct A1 {
+  char a1[1];
+  char c;
+} A1;
+
+typedef struct A1_x_2 {
+  char a1[1];
+  char a[][2];
+} A1_x_2;
+
+typedef struct A1_1_x {
+  char a1_1[1][1];
+  char a[];
+} A1_1_x;
+
+typedef struct Ax_2_3 {
+  int i;
+  char a_x_2_3[][2][3];
+} Ax_2_3;
+
+typedef struct A1_1 {
+  char a1_1[1][1];
+  char c;
+} A1_1;
+
+typedef struct B {
+  A1_1 a2_3[2][3];
+  char a1_1[3][5];
+  char a[];
+} B;
+
+// Structures with members that contain flexible array members are
+// an extension accepted by GCC.
+typedef struct C {
+  A1_1_x a5_7 [5][7];
+  int a;
+} C;
+
+// Structs with a "fake" flexible array member (a GCC extension).
+typedef struct FA0 {
+  int i;
+  char a0 [0];
+} FA0;
+
+typedef struct FA1 {
+  int i;
+  char a1 [1];
+} FA1;
+
+typedef struct FA3 {
+  int i;
+  char a3 [3];
+} FA3;
+
+// A "fake" multidimensional flexible array member deserves special
+// treatment since the offsetof exression yields the same result for
+// references to apparently distinct members (e.g., a5_7[0][7] is
+// at the same offset as a5_7[1][0] which may be surprising).
+typedef struct FA5_7 {
+  int i;
+  char a5_7 [5][7];
+} FA5_7;
+
+static void test (void)
+{
+  // Verify that offsetof references to array elements past the end of
+  // the array member are diagnosed.  As an extension, permit references
+  // to the element just past-the-end of the array.
+
+  int a[] = {
+    __builtin_offsetof (A1, a1),                 // valid
+    __builtin_offsetof (A1, a1 [0]),             // valid
+
+    // The following expression is silently accepted as an extension
+    // because it simply forms the equivalent of an address pointing
+    // just past the last element of the array and forming such an
+    // adderess is valid in C, even though referring to the element
+    // in an offsetof expression is strictly not.
+    __builtin_offsetof (A1, a1 [1]),             // valid
+    __builtin_offsetof (A1, a1 [2]),             // { dg-warning "index" }
+
+    __builtin_offsetof (A1_x_2, a1),             // valid
+    __builtin_offsetof (A1_x_2, a1 [0]),         // valid
+    __builtin_offsetof (A1_x_2, a1 [1]),         // extension
+    __builtin_offsetof (A1_x_2, a1 [2]),         // { dg-warning "index" }
+
+    __builtin_offsetof (A1_x_2, a),              // valid
+    __builtin_offsetof (A1_x_2, a [0]),          // valid
+    __builtin_offsetof (A1_x_2, a [1]),          // valid
+    __builtin_offsetof (A1_x_2, a [99]),         // valid
+
+    __builtin_offsetof (A1_x_2, a),              // valid
+    __builtin_offsetof (A1_x_2, a [0][0]),       // valid
+    __builtin_offsetof (A1_x_2, a [0][1]),       // valid
+
+    // The following is silently accepted because A is a flexible array
+    // member with the type of T[][2] and we can't tell what its runtime
+    // major dimension is.  If it happens to be 1 then A[0][2] refers
+    // to just-past-the-last-element which is accepted as an extension.
+    // If it's greater than 1 then we fail to diagnose this case as
+    // an unavoidable false neagtive.
+    __builtin_offsetof (A1_x_2, a [0][2]),       // extension
+
+    // Unlike the case above, this is clearly invalid since it refers
+    // to an element past one one just-past-the-end in A[][2].
+    __builtin_offsetof (A1_x_2, a [0][3]),       // { dg-warning "index" }
+
+    __builtin_offsetof (A1_x_2, a [1][0]),       // valid
+    __builtin_offsetof (A1_x_2, a [1][1]),       // valid
+    __builtin_offsetof (A1_x_2, a [1][2]),       // extension (see above)
+    __builtin_offsetof (A1_x_2, a [99][0]),      // valid
+    __builtin_offsetof (A1_x_2, a [99][1]),      // valid
+    __builtin_offsetof (A1_x_2, a [99][2]),      // extension (see above)
+
+    __builtin_offsetof (A1_1_x, a),              // valid
+    __builtin_offsetof (A1_1_x, a [0]),          // valid
+    __builtin_offsetof (A1_1_x, a [1]),          // valid
+    __builtin_offsetof (A1_1_x, a [99]),         // valid
+
+    __builtin_offsetof (A1_1_x, a1_1 [0][0]),    // valid
+    __builtin_offsetof (A1_1_x, a1_1 [0][1]),    // extension
+    __builtin_offsetof (A1_1_x, a1_1 [1][0]),    // { dg-warning "index" }
+    __builtin_offsetof (A1_1_x, a1_1 [1][1]),    // { dg-warning "index" }
+
+    __builtin_offsetof (Ax_2_3, a_x_2_3 [0][1][3]),  // extension
+    __builtin_offsetof (Ax_2_3, a_x_2_3 [0][1][4]),  // { dg-warning "index" }
+    __builtin_offsetof (Ax_2_3, a_x_2_3 [0][2]),     // extension
+    __builtin_offsetof (Ax_2_3, a_x_2_3 [0][2][0]),  // { dg-warning "index" }
+
+    __builtin_offsetof (B, a2_3 [0][0].c),           // valid
+    __builtin_offsetof (B, a2_3 [0][0].a1_1 [0][0]), // valid
+    __builtin_offsetof (B, a2_3 [1][3]),             // extension
+    __builtin_offsetof (B, a2_3 [0][0].a1_1 [0][1]), // extension
+
+    __builtin_offsetof (B, a2_3 [0][0].a1_1 [1][0]), // { dg-warning "index" }
+    __builtin_offsetof (B, a2_3 [0][0].a1_1 [1][1]), // { dg-warning "index" }
+
+    __builtin_offsetof (B, a2_3 [1][2].a1_1 [0][0]), // valid
+
+    // Forming an offset to the just-past-end element is accepted.
+    __builtin_offsetof (B, a2_3 [1][2].a1_1 [0][1]), // extension
+    __builtin_offsetof (B, a2_3 [1][2].a1_1 [1][0]), // { dg-warning "index" }
+    __builtin_offsetof (B, a2_3 [1][2].a1_1 [1][1]), // { dg-warning "index" }
+
+    // Forming an offset to the just-past-end element is accepted.
+    __builtin_offsetof (B, a2_3 [1][3]),             // exension
+    // ...but these are diagnosed because they dereference a just-path-the-end
+    // element.
+    __builtin_offsetof (B, a2_3 [1][3].a1_1 [0][0]), // { dg-warning "index" }
+    __builtin_offsetof (B, a2_3 [1][3].a1_1 [0][0]), // { dg-warning "index" }
+    __builtin_offsetof (B, a2_3 [1][3].a1_1 [0][1]), // { dg-warning "index" }
+    __builtin_offsetof (B, a2_3 [1][3].a1_1 [1][0]), // { dg-warning "index" }
+    __builtin_offsetof (B, a2_3 [1][3].a1_1 [1][1]), // { dg-warning "index" }
+
+    // Analogous to the case above, these are both diagnosed because they
+    // dereference just-path-the-end elements of the a2_3 array.
+    __builtin_offsetof (B, a2_3 [1][3].c),       // { dg-warning "index" }
+    __builtin_offsetof (B, a2_3 [1][3].c),       // { dg-warning "index" }
+
+    // The following are all invalid because of the reference to a2_3[2].
+    __builtin_offsetof (B, a2_3 [2][0].a1_1 [0][0]), // { dg-warning "index" }
+    __builtin_offsetof (B, a2_3 [2][0].a1_1 [0][1]), // { dg-warning "index" }
+    __builtin_offsetof (B, a2_3 [2][0].a1_1 [1][0]), // { dg-warning "index" }
+    __builtin_offsetof (B, a2_3 [2][0].a1_1 [1][1]), // { dg-warning "index" }
+    __builtin_offsetof (B, a2_3 [2][0].c),           // { dg-warning "index" }
+
+    __builtin_offsetof (C, a5_7 [4][6]),
+    __builtin_offsetof (C, a5_7 [4][6].a),
+    __builtin_offsetof (C, a5_7 [4][6].a [0]),
+    __builtin_offsetof (C, a5_7 [4][6].a [99]),
+
+    __builtin_offsetof (C, a5_7 [4][7]),             // extension
+    // Diagnose the following even though the object whose offset is
+    // computed is a flexible array member.
+    __builtin_offsetof (C, a5_7 [4][7].a),           // { dg-warning "index" }
+    __builtin_offsetof (C, a5_7 [4][7].a [0]),       // { dg-warning "index" }
+    __builtin_offsetof (C, a5_7 [4][7].a [99]),      // { dg-warning "index" }
+
+    // Verify that no diagnostic is issued for offsetof expressions
+    // involving structs where the array has a rank of 1 and is the last
+    // member (e.g., those are treated as flexible array members).
+    __builtin_offsetof (FA0, a0 [0]),
+    __builtin_offsetof (FA0, a0 [1]),
+    __builtin_offsetof (FA0, a0 [99]),
+
+    __builtin_offsetof (FA1, a1 [0]),
+    __builtin_offsetof (FA1, a1 [1]),
+    __builtin_offsetof (FA1, a1 [99]),
+
+    __builtin_offsetof (FA3, a3 [0]),
+    __builtin_offsetof (FA3, a3 [3]),
+    __builtin_offsetof (FA3, a3 [99]),
+
+    __builtin_offsetof (FA5_7, a5_7 [0][0]),
+
+    // Unlike one-dimensional arrays, verify that out-of-bounds references
+    // to arrays with rank of 2 and greater are diagnosed.
+    __builtin_offsetof (FA5_7, a5_7 [0][7]),         // { dg-warning "index" }
+    __builtin_offsetof (FA5_7, a5_7 [1][7]),         // { dg-warning "index" }
+    __builtin_offsetof (FA5_7, a5_7 [5][0]),         // { dg-warning "index" }
+    __builtin_offsetof (FA5_7, a5_7 [5][7]),         // { dg-warning "index" }
+    __builtin_offsetof (FA5_7, a5_7 [99][99])        // { dg-warning "index" }
+  };
+
+  (void)&a;
+}

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] c/67882 - improve -Warray-bounds for invalid offsetof
  2015-10-16 19:34   ` Martin Sebor
@ 2015-10-20 13:21     ` Bernd Schmidt
  2015-10-20 15:33       ` Martin Sebor
  0 siblings, 1 reply; 30+ messages in thread
From: Bernd Schmidt @ 2015-10-20 13:21 UTC (permalink / raw)
  To: Martin Sebor, Bernd Schmidt, Gcc Patch List, Joseph Myers

[-- Attachment #1: Type: text/plain, Size: 1342 bytes --]

On 10/16/2015 09:34 PM, Martin Sebor wrote:
> Thank you for the review. Attached is an updated patch that hopefully
> addresses all your comments. I ran the check_GNU_style.sh script on
> it to make sure I didn't miss something.  I've also added replies to
> a few of your comments below.

Ok, thanks. However - the logic around the context struct in the patch 
still seems fairly impenetrable to me, and I've been wondering whether a 
simpler approach wouldn't work just as well. I came up with the patch 
below, which just passes a tree_code context to recursive invocations 
and increasing the upper bound by one only if not in an ARRAY_REF or 
COMPONENT_REF context.

As far as I can tell, this produces the same warnings on the testcase 
(modulo differences in type printout), except for the final few FA5_7 
testcases, which I had doubts about anyway:

typedef struct FA5_7 {
   int i;
   char a5_7 [5][7];
} FA5_7;

     __builtin_offsetof (FA5_7, a5_7 [0][7]),         // { dg-warning 
"index" }
     __builtin_offsetof (FA5_7, a5_7 [1][7]),         // { dg-warning 
"index" }
     __builtin_offsetof (FA5_7, a5_7 [5][0]),         // { dg-warning 
"index" }
     __builtin_offsetof (FA5_7, a5_7 [5][7]),         // { dg-warning 
"index" }

Why wouldn't at least the first two be convered by the 
one-past-the-end-is-ok rule?


Bernd


[-- Attachment #2: offsetof.diff --]
[-- Type: text/x-patch, Size: 2096 bytes --]

Index: gcc/c-family/c-common.c
===================================================================
--- gcc/c-family/c-common.c	(revision 229049)
+++ gcc/c-family/c-common.c	(working copy)
@@ -10589,11 +10589,11 @@ c_common_to_target_charset (HOST_WIDE_IN
    traditional rendering of offsetof as a macro.  Return the folded result.  */
 
 tree
-fold_offsetof_1 (tree expr)
+fold_offsetof_1 (tree expr, enum tree_code ctx)
 {
   tree base, off, t;
-
-  switch (TREE_CODE (expr))
+  tree_code code = TREE_CODE (expr);
+  switch (code)
     {
     case ERROR_MARK:
       return expr;
@@ -10617,7 +10617,7 @@ fold_offsetof_1 (tree expr)
       return TREE_OPERAND (expr, 0);
 
     case COMPONENT_REF:
-      base = fold_offsetof_1 (TREE_OPERAND (expr, 0));
+      base = fold_offsetof_1 (TREE_OPERAND (expr, 0), code);
       if (base == error_mark_node)
 	return base;
 
@@ -10634,7 +10634,7 @@ fold_offsetof_1 (tree expr)
       break;
 
     case ARRAY_REF:
-      base = fold_offsetof_1 (TREE_OPERAND (expr, 0));
+      base = fold_offsetof_1 (TREE_OPERAND (expr, 0), code);
       if (base == error_mark_node)
 	return base;
 
@@ -10649,8 +10649,9 @@ fold_offsetof_1 (tree expr)
 	      && !tree_int_cst_equal (upbound,
 				      TYPE_MAX_VALUE (TREE_TYPE (upbound))))
 	    {
-	      upbound = size_binop (PLUS_EXPR, upbound,
-				    build_int_cst (TREE_TYPE (upbound), 1));
+	      if (ctx != ARRAY_REF && ctx != COMPONENT_REF)
+		upbound = size_binop (PLUS_EXPR, upbound,
+				      build_int_cst (TREE_TYPE (upbound), 1));
 	      if (tree_int_cst_lt (upbound, t))
 		{
 		  tree v;
Index: gcc/c-family/c-common.h
===================================================================
--- gcc/c-family/c-common.h	(revision 229049)
+++ gcc/c-family/c-common.h	(working copy)
@@ -1029,7 +1029,7 @@ extern bool c_dump_tree (void *, tree);
 
 extern void verify_sequence_points (tree);
 
-extern tree fold_offsetof_1 (tree);
+extern tree fold_offsetof_1 (tree, tree_code ctx = ERROR_MARK);
 extern tree fold_offsetof (tree);
 
 /* Places where an lvalue, or modifiable lvalue, may be required.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] c/67882 - improve -Warray-bounds for invalid offsetof
  2015-10-20 13:21     ` Bernd Schmidt
@ 2015-10-20 15:33       ` Martin Sebor
  2015-10-20 15:52         ` Bernd Schmidt
  2015-10-20 16:54         ` Joseph Myers
  0 siblings, 2 replies; 30+ messages in thread
From: Martin Sebor @ 2015-10-20 15:33 UTC (permalink / raw)
  To: Bernd Schmidt, Bernd Schmidt, Gcc Patch List, Joseph Myers

On 10/20/2015 07:20 AM, Bernd Schmidt wrote:
> On 10/16/2015 09:34 PM, Martin Sebor wrote:
>> Thank you for the review. Attached is an updated patch that hopefully
>> addresses all your comments. I ran the check_GNU_style.sh script on
>> it to make sure I didn't miss something.  I've also added replies to
>> a few of your comments below.
>
> Ok, thanks. However - the logic around the context struct in the patch
> still seems fairly impenetrable to me, and I've been wondering whether a
> simpler approach wouldn't work just as well. I came up with the patch
> below, which just passes a tree_code context to recursive invocations
> and increasing the upper bound by one only if not in an ARRAY_REF or
> COMPONENT_REF context.
>
> As far as I can tell, this produces the same warnings on the testcase
> (modulo differences in type printout), except for the final few FA5_7
> testcases, which I had doubts about anyway:
>
> typedef struct FA5_7 {
>    int i;
>    char a5_7 [5][7];
> } FA5_7;
>
>      __builtin_offsetof (FA5_7, a5_7 [0][7]),         // { dg-warning
> "index" }
>      __builtin_offsetof (FA5_7, a5_7 [1][7]),         // { dg-warning
> "index" }
>      __builtin_offsetof (FA5_7, a5_7 [5][0]),         // { dg-warning
> "index" }
>      __builtin_offsetof (FA5_7, a5_7 [5][7]),         // { dg-warning
> "index" }
>
> Why wouldn't at least the first two be convered by the
> one-past-the-end-is-ok rule?

Thanks for taking the time to try to simplify the patch!

Diagnosing the cases above is at the core of the issue (and can
be seen by substituting a5_7 into the oversimplified test case
in the bug).

In a 5 by 7 array, the offset computed for the out-of-bounds
a_5_7[0][7] is the same as the offset computed for the in-bounds
a_5_7[1][0].  The result might be unexpected because it suggests
that two "distinct" elements of an array share the same offset.

The "one-past-the-end rule" applies only to the offset corresponding
to the address just past the end of the whole array because like its
address, the offset of the element just beyond the end of the array
is valid and cannot be mistaken for an offset of a valid element.

My initial approach was similar to the patch you attached.  Most
of the additional complexity (i.e., the recursive use of the context)
grew out of wanting to diagnose these less than trivial cases of
surprising offsets in multi-dimensional arrays.  I don't think it
can be done without passing some state down the recursive calls but
I'd be happy to be proven wrong.

FWIW, teh bug and my patch for it were precipitated by looking for
the problems behind PR 67872 - missing -Warray-bounds warning (which
was in turn prompted by developing and testing a solution for PR
67942 - diagnose placement new buffer overflow).

I think -Warray-bounds should emit consistent diagnostics for invalid
array references regardless of the contexts. I.e., given

     struct S {
         int A [5][7];
         int x;
     } s;

these should both be diagnosed:

     int i = offsetof (struct S, A [0][7]);

     int *p = &s.A [0][7];

because they are both undefined and both can lead to surprising
results when used.

With my patch for 67882, GCC diagnoses the first case. I hope to
work on 67872 in the near future to diagnose the second to bring
GCC on par with Clang. (Clang doesn't yet diagnose any offsetof
errors).

Martin

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] c/67882 - improve -Warray-bounds for invalid offsetof
  2015-10-20 15:33       ` Martin Sebor
@ 2015-10-20 15:52         ` Bernd Schmidt
  2015-10-20 16:57           ` Martin Sebor
  2015-10-20 16:54         ` Joseph Myers
  1 sibling, 1 reply; 30+ messages in thread
From: Bernd Schmidt @ 2015-10-20 15:52 UTC (permalink / raw)
  To: Martin Sebor, Bernd Schmidt, Gcc Patch List, Joseph Myers



On 10/20/2015 05:31 PM, Martin Sebor wrote:
> On 10/20/2015 07:20 AM, Bernd Schmidt wrote:
>> On 10/16/2015 09:34 PM, Martin Sebor wrote:
>>> Thank you for the review. Attached is an updated patch that hopefully
>>> addresses all your comments. I ran the check_GNU_style.sh script on
>>> it to make sure I didn't miss something.  I've also added replies to
>>> a few of your comments below.
>>
>> Ok, thanks. However - the logic around the context struct in the patch
>> still seems fairly impenetrable to me, and I've been wondering whether a
>> simpler approach wouldn't work just as well. I came up with the patch
>> below, which just passes a tree_code context to recursive invocations
>> and increasing the upper bound by one only if not in an ARRAY_REF or
>> COMPONENT_REF context.
>>
>> As far as I can tell, this produces the same warnings on the testcase
>> (modulo differences in type printout), except for the final few FA5_7
>> testcases, which I had doubts about anyway:
>>
>> typedef struct FA5_7 {
>>    int i;
>>    char a5_7 [5][7];
>> } FA5_7;
>>
>>      __builtin_offsetof (FA5_7, a5_7 [0][7]),         // { dg-warning
>> "index" }
>>      __builtin_offsetof (FA5_7, a5_7 [1][7]),         // { dg-warning
>> "index" }
>>      __builtin_offsetof (FA5_7, a5_7 [5][0]),         // { dg-warning
>> "index" }
>>      __builtin_offsetof (FA5_7, a5_7 [5][7]),         // { dg-warning
>> "index" }
>>
>> Why wouldn't at least the first two be convered by the
>> one-past-the-end-is-ok rule?
>
> Thanks for taking the time to try to simplify the patch!
>
> Diagnosing the cases above is at the core of the issue (and can
> be seen by substituting a5_7 into the oversimplified test case
> in the bug).
>
> In a 5 by 7 array, the offset computed for the out-of-bounds
> a_5_7[0][7] is the same as the offset computed for the in-bounds
> a_5_7[1][0].  The result might be unexpected because it suggests
> that two "distinct" elements of an array share the same offset.

Yeah, but Joseph wrote:

   Given such a flexible array member, [anything][0] and
   [anything][1] are logically valid if [anything] is
   nonnegative (of course, the array might or might not
   have enough element at runtime).  [anything][2] is a
   reference to just past the end of an element of the
   flexible array member; [anything][3] is clearly invalid.

   Regarding the actual testcase, accepting a1_1[0][1] seems
   fine (it's referencing the just-past-end element of the
   valid array a1_1[0]).

So how's that different from a5_7[0][7]? In case it matters, a1_1 in the 
earlier test that was discussed could not be a flexible array because it 
was followed by another field in the same struct.

Handing over review duties to Joseph because he'll have to decide what 
to warn about and what not to.

>     struct S {
>         int A [5][7];
>         int x;
>     } s;
>
> these should both be diagnosed:
>
>     int i = offsetof (struct S, A [0][7]);
>
>     int *p = &s.A [0][7];
>
> because they are both undefined and both can lead to surprising
> results when used.

Is that really undefined? I thought it was explicitly allowed. 
Dereferencing the pointer is what's undefined.


Bernd

Bernd

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] c/67882 - improve -Warray-bounds for invalid offsetof
  2015-10-20 15:33       ` Martin Sebor
  2015-10-20 15:52         ` Bernd Schmidt
@ 2015-10-20 16:54         ` Joseph Myers
  2015-10-20 20:36           ` Bernd Schmidt
  1 sibling, 1 reply; 30+ messages in thread
From: Joseph Myers @ 2015-10-20 16:54 UTC (permalink / raw)
  To: Martin Sebor; +Cc: Bernd Schmidt, Bernd Schmidt, Gcc Patch List

On Tue, 20 Oct 2015, Martin Sebor wrote:

> I think -Warray-bounds should emit consistent diagnostics for invalid
> array references regardless of the contexts. I.e., given
> 
>     struct S {
>         int A [5][7];
>         int x;
>     } s;
> 
> these should both be diagnosed:
> 
>     int i = offsetof (struct S, A [0][7]);
> 
>     int *p = &s.A [0][7];
> 
> because they are both undefined and both can lead to surprising
> results when used.

But both are valid.  &s.A [0][7] means s.A[0] + 7 (as explicitly specified 
in C11, neither the & nor the [] is evaluated in this case, but the [] 
turns into a +), and s.A[0] is an object of type int[7], which decays to a 
pointer to the first element of that array, so adding 7 produces a 
just-past-end pointer.  It's not valid to dereference that pointer, but 
the pointer itself is valid (and subtracting 1 from it produces a pointer 
you can dereference).

-- 
Joseph S. Myers
joseph@codesourcery.com

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] c/67882 - improve -Warray-bounds for invalid offsetof
  2015-10-20 15:52         ` Bernd Schmidt
@ 2015-10-20 16:57           ` Martin Sebor
  2015-10-20 17:11             ` Joseph Myers
  0 siblings, 1 reply; 30+ messages in thread
From: Martin Sebor @ 2015-10-20 16:57 UTC (permalink / raw)
  To: Bernd Schmidt, Bernd Schmidt, Gcc Patch List, Joseph Myers

On 10/20/2015 09:48 AM, Bernd Schmidt wrote:
>
>
> On 10/20/2015 05:31 PM, Martin Sebor wrote:
>> On 10/20/2015 07:20 AM, Bernd Schmidt wrote:
>>> On 10/16/2015 09:34 PM, Martin Sebor wrote:
>>>> Thank you for the review. Attached is an updated patch that hopefully
>>>> addresses all your comments. I ran the check_GNU_style.sh script on
>>>> it to make sure I didn't miss something.  I've also added replies to
>>>> a few of your comments below.
>>>
>>> Ok, thanks. However - the logic around the context struct in the patch
>>> still seems fairly impenetrable to me, and I've been wondering whether a
>>> simpler approach wouldn't work just as well. I came up with the patch
>>> below, which just passes a tree_code context to recursive invocations
>>> and increasing the upper bound by one only if not in an ARRAY_REF or
>>> COMPONENT_REF context.
>>>
>>> As far as I can tell, this produces the same warnings on the testcase
>>> (modulo differences in type printout), except for the final few FA5_7
>>> testcases, which I had doubts about anyway:
>>>
>>> typedef struct FA5_7 {
>>>    int i;
>>>    char a5_7 [5][7];
>>> } FA5_7;
>>>
>>>      __builtin_offsetof (FA5_7, a5_7 [0][7]),         // { dg-warning
>>> "index" }
>>>      __builtin_offsetof (FA5_7, a5_7 [1][7]),         // { dg-warning
>>> "index" }
>>>      __builtin_offsetof (FA5_7, a5_7 [5][0]),         // { dg-warning
>>> "index" }
>>>      __builtin_offsetof (FA5_7, a5_7 [5][7]),         // { dg-warning
>>> "index" }
>>>
>>> Why wouldn't at least the first two be convered by the
>>> one-past-the-end-is-ok rule?
>>
>> Thanks for taking the time to try to simplify the patch!
>>
>> Diagnosing the cases above is at the core of the issue (and can
>> be seen by substituting a5_7 into the oversimplified test case
>> in the bug).
>>
>> In a 5 by 7 array, the offset computed for the out-of-bounds
>> a_5_7[0][7] is the same as the offset computed for the in-bounds
>> a_5_7[1][0].  The result might be unexpected because it suggests
>> that two "distinct" elements of an array share the same offset.
>
> Yeah, but Joseph wrote:
>
>    Given such a flexible array member, [anything][0] and
>    [anything][1] are logically valid if [anything] is
>    nonnegative (of course, the array might or might not
>    have enough element at runtime).  [anything][2] is a
>    reference to just past the end of an element of the
>    flexible array member; [anything][3] is clearly invalid.
>
>    Regarding the actual testcase, accepting a1_1[0][1] seems
>    fine (it's referencing the just-past-end element of the
>    valid array a1_1[0]).
>
> So how's that different from a5_7[0][7]? In case it matters, a1_1 in the
> earlier test that was discussed could not be a flexible array because it
> was followed by another field in the same struct.

Let me try to explain using a couple of examples. Given

   struct S {
       a1_1 [1][1];
       int x;
   } s;

where a1_1 is an ordinary array of a single element

   offseof (struct S, a1_1[0][1])

is accepted as a GCC extension (and, IMO, should be made valid
in C) because a) there is no way to mistake the offset for the
offset of an element of the array at a valid index, and because
b) it yields to the same value as the following valid expression:

   (char*)&s.a1_1[0][1] - (char*)&s.a1_1;

However, given

   struct S {
       a5_7 [5][7];
       int x;
   } s;

the invalid offsetof expression

   offseof (struct S, a5_7[0][7])

should be diagnosed because a) it yields the same offset of the
valid

   offseof (struct S, a5_7[1][0])

and b) because s.a5_7[0][7] is not a valid reference in any context
(i.e., (char*)&s.a5_7[0][7] - (char*)&s.a5_7 is not a valid expression).

>
> Handing over review duties to Joseph because he'll have to decide what
> to warn about and what not to.
>
>>     struct S {
>>         int A [5][7];
>>         int x;
>>     } s;
>>
>> these should both be diagnosed:
>>
>>     int i = offsetof (struct S, A [0][7]);
>>
>>     int *p = &s.A [0][7];
>>
>> because they are both undefined and both can lead to surprising
>> results when used.
>
> Is that really undefined? I thought it was explicitly allowed.
> Dereferencing the pointer is what's undefined.

Both are undefined. The most succinct statements to this effect are
in the Unde3fined Behavior section of Annex J:

   An array subscript is out of range, even if an object is apparently
   accessible with the given subscript (as in the lvalue expression
   a[1][7] given the declaration int a[4][5]) (6.5.6).

   The member designator parameter of an offsetof macro is an
   invalid right operand of the . operator for the type parameter,
   or designates a bit-field (7.19).

Martin

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] c/67882 - improve -Warray-bounds for invalid offsetof
  2015-10-20 16:57           ` Martin Sebor
@ 2015-10-20 17:11             ` Joseph Myers
  2015-10-20 19:10               ` Martin Sebor
  0 siblings, 1 reply; 30+ messages in thread
From: Joseph Myers @ 2015-10-20 17:11 UTC (permalink / raw)
  To: Martin Sebor; +Cc: Bernd Schmidt, Bernd Schmidt, Gcc Patch List

On Tue, 20 Oct 2015, Martin Sebor wrote:

>   An array subscript is out of range, even if an object is apparently
>   accessible with the given subscript (as in the lvalue expression
>   a[1][7] given the declaration int a[4][5]) (6.5.6).

Just-past-the-end is only out of range if the dereference is executed, not 
in the &array[size] case which is equivalent to array + size.

-- 
Joseph S. Myers
joseph@codesourcery.com

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] c/67882 - improve -Warray-bounds for invalid offsetof
  2015-10-20 17:11             ` Joseph Myers
@ 2015-10-20 19:10               ` Martin Sebor
  0 siblings, 0 replies; 30+ messages in thread
From: Martin Sebor @ 2015-10-20 19:10 UTC (permalink / raw)
  To: Joseph Myers; +Cc: Bernd Schmidt, Bernd Schmidt, Gcc Patch List

[-- Attachment #1: Type: text/plain, Size: 778 bytes --]

On 10/20/2015 10:57 AM, Joseph Myers wrote:
> On Tue, 20 Oct 2015, Martin Sebor wrote:
>
>>    An array subscript is out of range, even if an object is apparently
>>    accessible with the given subscript (as in the lvalue expression
>>    a[1][7] given the declaration int a[4][5]) (6.5.6).
>
> Just-past-the-end is only out of range if the dereference is executed, not
> in the &array[size] case which is equivalent to array + size.

Okay, I agree that this less restrictive interpretation of just
past the end subscripts makes sense and also allows for a simpler
implementation of the solution.

Attached is a patch incorporating Bernd's change and updates to
the test reflecting what has been discussed, retested by boot-
strapping and running the tests on x86_64.

Martin


[-- Attachment #2: gcc-67882-surprising_offsetof_result.patch --]
[-- Type: text/x-patch, Size: 11261 bytes --]

gcc/ChangeLog
2015-10-20  Bernd Schmidt <bschmidt@redhat.com>

	PR c++-common/67882
	* c-family/c-common.h (struct offsetof_ctx_t): Declare.
	(fold_offsetof_1): Add argument.
	* c-family/c-common.c (struct offsetof_ctx_t): Define.
	(fold_offsetof_1): Diagnose more invalid offsetof expressions
	that reference elements past the end of an array.

testsuite/ChangeLog
2015-10-20  Martin Sebor  <msebor@redhat.com>

	PR c++-common/67882
	* c-c++-common/builtin-offsetof-2.c: New test.

diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
index e539527..0872ec1 100644
--- a/gcc/c-family/c-common.c
+++ b/gcc/c-family/c-common.c
@@ -10541,11 +10541,11 @@ c_common_to_target_charset (HOST_WIDE_INT c)
    traditional rendering of offsetof as a macro.  Return the folded result.  */

 tree
-fold_offsetof_1 (tree expr)
+fold_offsetof_1 (tree expr, enum tree_code ctx)
 {
   tree base, off, t;
-
-  switch (TREE_CODE (expr))
+  tree_code code = TREE_CODE (expr);
+  switch (code)
     {
     case ERROR_MARK:
       return expr;
@@ -10569,7 +10569,7 @@ fold_offsetof_1 (tree expr)
       return TREE_OPERAND (expr, 0);

     case COMPONENT_REF:
-      base = fold_offsetof_1 (TREE_OPERAND (expr, 0));
+      base = fold_offsetof_1 (TREE_OPERAND (expr, 0), code);
       if (base == error_mark_node)
 	return base;

@@ -10586,7 +10586,7 @@ fold_offsetof_1 (tree expr)
       break;

     case ARRAY_REF:
-      base = fold_offsetof_1 (TREE_OPERAND (expr, 0));
+      base = fold_offsetof_1 (TREE_OPERAND (expr, 0), code);
       if (base == error_mark_node)
 	return base;

@@ -10601,6 +10601,7 @@ fold_offsetof_1 (tree expr)
 	      && !tree_int_cst_equal (upbound,
 				      TYPE_MAX_VALUE (TREE_TYPE (upbound))))
 	    {
+	      if (ctx != ARRAY_REF && ctx != COMPONENT_REF)
 		upbound = size_binop (PLUS_EXPR, upbound,
 				      build_int_cst (TREE_TYPE (upbound), 1));
 	      if (tree_int_cst_lt (upbound, t))
diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h
index 0b4d993..762da01 100644
--- a/gcc/c-family/c-common.h
+++ b/gcc/c-family/c-common.h
@@ -1025,7 +1025,7 @@ extern bool c_dump_tree (void *, tree);

 extern void verify_sequence_points (tree);

-extern tree fold_offsetof_1 (tree);
+extern tree fold_offsetof_1 (tree, tree_code ctx = ERROR_MARK);
 extern tree fold_offsetof (tree);

 /* Places where an lvalue, or modifiable lvalue, may be required.
diff --git a/gcc/testsuite/c-c++-common/builtin-offsetof-2.c b/gcc/testsuite/c-c++-common/builtin-offsetof-2.c
new file mode 100644
index 0000000..f943dde
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/builtin-offsetof-2.c
@@ -0,0 +1,217 @@
+// { dg-options "-Warray-bounds" }
+// { dg-do compile }
+
+// Test case exercising pr c/67882 - surprising offsetof result
+//   on an invalid array member without diagnostic.
+
+typedef struct A1 {
+  char a1[1];
+  char c;
+} A1;
+
+typedef struct A1_x_2 {
+  char a1[1];
+  char a[][2];
+} A1_x_2;
+
+typedef struct A1_1_x {
+  char a1_1[1][1];
+  char a[];
+} A1_1_x;
+
+typedef struct Ax_2_3 {
+  int i;
+  char a_x_2_3[][2][3];
+} Ax_2_3;
+
+typedef struct A1_1 {
+  char a1_1[1][1];
+  char c;
+} A1_1;
+
+typedef struct B {
+  A1_1 a2_3[2][3];
+  char a1_1[3][5];
+  char a[];
+} B;
+
+// Structures with members that contain flexible array members are
+// an extension accepted by GCC.
+typedef struct C {
+  A1_1_x a5_7 [5][7];
+  int a;
+} C;
+
+// Structs with a "fake" flexible array member (a GCC extension).
+typedef struct FA0 {
+  int i;
+  char a0 [0];
+} FA0;
+
+typedef struct FA1 {
+  int i;
+  char a1 [1];
+} FA1;
+
+typedef struct FA3 {
+  int i;
+  char a3 [3];
+} FA3;
+
+// A "fake" multidimensional flexible array member.
+typedef struct FA5_7 {
+  int i;
+  char a5_7 [5][7];
+} FA5_7;
+
+static void test (void)
+{
+  // Verify that offsetof references to array elements past the end of
+  // the array member are diagnosed.  As an extension, permit references
+  // to the element just past-the-end of the array.
+
+  int a[] = {
+    __builtin_offsetof (A1, a1),                 // valid
+    __builtin_offsetof (A1, a1 [0]),             // valid
+
+    // The following expression is valid because it forms the equivalent
+    // of an address pointing just past the last element of the array.
+    __builtin_offsetof (A1, a1 [1]),             // valid
+
+    __builtin_offsetof (A1, a1 [2]),             // { dg-warning "index" }
+
+    __builtin_offsetof (A1_x_2, a1),             // valid
+    __builtin_offsetof (A1_x_2, a1 [0]),         // valid
+    __builtin_offsetof (A1_x_2, a1 [1]),         // valid
+    __builtin_offsetof (A1_x_2, a1 [2]),         // { dg-warning "index" }
+
+    __builtin_offsetof (A1_x_2, a),              // valid
+    __builtin_offsetof (A1_x_2, a [0]),          // valid
+    __builtin_offsetof (A1_x_2, a [1]),          // valid
+    __builtin_offsetof (A1_x_2, a [99]),         // valid
+
+    __builtin_offsetof (A1_x_2, a),              // valid
+    __builtin_offsetof (A1_x_2, a [0][0]),       // valid
+    __builtin_offsetof (A1_x_2, a [0][1]),       // valid
+
+    // The following expression is valid because it forms the equivalent
+    // of an address pointing just past the last element of the first
+    // array.
+    __builtin_offsetof (A1_x_2, a [0][2]),       // valid
+
+    // Unlike the case above, this is invalid since it refers to an element
+    // past one one just-past-the-end in A[][2].
+    __builtin_offsetof (A1_x_2, a [0][3]),       // { dg-warning "index" }
+
+    __builtin_offsetof (A1_x_2, a [1][0]),       // valid
+    __builtin_offsetof (A1_x_2, a [1][1]),       // valid
+    __builtin_offsetof (A1_x_2, a [1][2]),       // valid
+    __builtin_offsetof (A1_x_2, a [99][0]),      // valid
+    __builtin_offsetof (A1_x_2, a [99][1]),      // valid
+    __builtin_offsetof (A1_x_2, a [99][2]),      // valid
+
+    __builtin_offsetof (A1_1_x, a),              // valid
+    __builtin_offsetof (A1_1_x, a [0]),          // valid
+    __builtin_offsetof (A1_1_x, a [1]),          // valid
+    __builtin_offsetof (A1_1_x, a [99]),         // valid
+
+    __builtin_offsetof (A1_1_x, a1_1 [0][0]),    // valid
+    __builtin_offsetof (A1_1_x, a1_1 [0][1]),    // valid
+    __builtin_offsetof (A1_1_x, a1_1 [0][2]),    // { dg-warning "index" }
+    __builtin_offsetof (A1_1_x, a1_1 [1][0]),    // { dg-warning "index" }
+    __builtin_offsetof (A1_1_x, a1_1 [1][1]),    // { dg-warning "index" }
+
+    __builtin_offsetof (Ax_2_3, a_x_2_3 [0][1][3]),  // valid
+    __builtin_offsetof (Ax_2_3, a_x_2_3 [0][1][4]),  // { dg-warning "index" }
+    __builtin_offsetof (Ax_2_3, a_x_2_3 [0][2]),     // valid
+    __builtin_offsetof (Ax_2_3, a_x_2_3 [0][2][0]),  // { dg-warning "index" }
+
+    __builtin_offsetof (B, a2_3 [0][0].c),           // valid
+    __builtin_offsetof (B, a2_3 [0][0].a1_1 [0][0]), // valid
+    __builtin_offsetof (B, a2_3 [1][3]),             // valid
+    __builtin_offsetof (B, a2_3 [1][4]),             // { dg-warning "index" }
+    __builtin_offsetof (B, a2_3 [0][0].a1_1 [0][1]), // valid
+    __builtin_offsetof (B, a2_3 [0][0].a1_1 [0][2]), // { dg-warning "index" }
+
+    __builtin_offsetof (B, a2_3 [0][0].a1_1 [1][0]), // { dg-warning "index" }
+    __builtin_offsetof (B, a2_3 [0][0].a1_1 [1][1]), // { dg-warning "index" }
+
+    __builtin_offsetof (B, a2_3 [1][2].a1_1 [0][0]), // valid
+
+    // Forming an offset to the just-past-end element is valid.
+    __builtin_offsetof (B, a2_3 [1][2].a1_1 [0][1]), // valid
+    __builtin_offsetof (B, a2_3 [1][2].a1_1 [1][0]), // { dg-warning "index" }
+    __builtin_offsetof (B, a2_3 [1][2].a1_1 [1][1]), // { dg-warning "index" }
+
+    // Forming an offset to the just-past-end element is valid.
+    __builtin_offsetof (B, a2_3 [1][3]),             // valid
+    // ...but these are diagnosed because they dereference a just-past-the-end
+    // element.
+    __builtin_offsetof (B, a2_3 [1][3].a1_1 [0][0]), // { dg-warning "index" }
+    __builtin_offsetof (B, a2_3 [1][3].a1_1 [0][0]), // { dg-warning "index" }
+    __builtin_offsetof (B, a2_3 [1][3].a1_1 [0][1]), // { dg-warning "index" }
+    __builtin_offsetof (B, a2_3 [1][3].a1_1 [1][0]), // { dg-warning "index" }
+    __builtin_offsetof (B, a2_3 [1][3].a1_1 [1][1]), // { dg-warning "index" }
+
+    // Analogous to the case above, these are both diagnosed because they
+    // dereference just-past-the-end elements of the a2_3 array.
+    __builtin_offsetof (B, a2_3 [1][3].c),       // { dg-warning "index" }
+    __builtin_offsetof (B, a2_3 [1][3].c),       // { dg-warning "index" }
+
+    // The following are all invalid because of the reference to a2_3[2].
+    __builtin_offsetof (B, a2_3 [2][0].a1_1 [0][0]), // { dg-warning "index" }
+    __builtin_offsetof (B, a2_3 [2][0].a1_1 [0][1]), // { dg-warning "index" }
+    __builtin_offsetof (B, a2_3 [2][0].a1_1 [1][0]), // { dg-warning "index" }
+    __builtin_offsetof (B, a2_3 [2][0].a1_1 [1][1]), // { dg-warning "index" }
+    __builtin_offsetof (B, a2_3 [2][0].c),           // { dg-warning "index" }
+
+    __builtin_offsetof (C, a5_7 [4][6]),
+    __builtin_offsetof (C, a5_7 [4][6].a),
+    __builtin_offsetof (C, a5_7 [4][6].a [0]),
+    __builtin_offsetof (C, a5_7 [4][6].a [99]),
+
+    __builtin_offsetof (C, a5_7 [4][7]),             // valid
+    // Diagnose the following even though the object whose offset is
+    // computed is a flexible array member.
+    __builtin_offsetof (C, a5_7 [4][7].a),           // { dg-warning "index" }
+    __builtin_offsetof (C, a5_7 [4][7].a [0]),       // { dg-warning "index" }
+    __builtin_offsetof (C, a5_7 [4][7].a [99]),      // { dg-warning "index" }
+
+    // Verify that no diagnostic is issued for offsetof expressions
+    // involving structs where the array has a rank of 1 and is the last
+    // member (e.g., those are treated as flexible array members).
+    __builtin_offsetof (FA0, a0 [0]),
+    __builtin_offsetof (FA0, a0 [1]),
+    __builtin_offsetof (FA0, a0 [99]),
+
+    __builtin_offsetof (FA1, a1 [0]),
+    __builtin_offsetof (FA1, a1 [1]),
+    __builtin_offsetof (FA1, a1 [99]),
+
+    __builtin_offsetof (FA3, a3 [0]),
+    __builtin_offsetof (FA3, a3 [3]),
+    __builtin_offsetof (FA3, a3 [99]),
+
+    __builtin_offsetof (FA5_7, a5_7 [0][0]),
+
+    // Unlike one-dimensional arrays, verify that out-of-bounds references
+    // to "fake" flexible arrays with rank of 2 and greater are diagnosed.
+
+    // The following are valid because they compute the offset of just past
+    // the end of each of the a5_7[0] and a5_7[1] arrays.
+    __builtin_offsetof (FA5_7, a5_7 [0][7]),         // valid
+    __builtin_offsetof (FA5_7, a5_7 [1][7]),         // valid
+
+    // The following two are accepted as an extesion (because a5_7 is
+    // treated as a flexible array member).
+    __builtin_offsetof (FA5_7, a5_7 [5][0]),         // extension
+    __builtin_offsetof (FA5_7, a5_7 [5][7]),         // extension
+
+    // The following are invalid since in both cases they denote an element
+    // that's beyond just-past-the-end of the array.
+    __builtin_offsetof (FA5_7, a5_7 [0][8]),        // { dg-warning "index" }
+    __builtin_offsetof (FA5_7, a5_7 [6][8])         // { dg-warning "index" }
+  };
+
+  (void)&a;
+}

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] c/67882 - improve -Warray-bounds for invalid offsetof
  2015-10-20 16:54         ` Joseph Myers
@ 2015-10-20 20:36           ` Bernd Schmidt
  2015-10-20 22:19             ` Joseph Myers
  0 siblings, 1 reply; 30+ messages in thread
From: Bernd Schmidt @ 2015-10-20 20:36 UTC (permalink / raw)
  To: Joseph Myers, Martin Sebor; +Cc: Gcc Patch List

On 10/20/2015 06:53 PM, Joseph Myers wrote:
> On Tue, 20 Oct 2015, Martin Sebor wrote:
>
>> I think -Warray-bounds should emit consistent diagnostics for invalid
>> array references regardless of the contexts. I.e., given
>>
>>      struct S {
>>          int A [5][7];
>>          int x;
>>      } s;
>>
>> these should both be diagnosed:
>>
>>      int i = offsetof (struct S, A [0][7]);
>>
>>      int *p = &s.A [0][7];
>>
>> because they are both undefined and both can lead to surprising
>> results when used.
>
> But both are valid.  &s.A [0][7] means s.A[0] + 7 (as explicitly specified
> in C11, neither the & nor the [] is evaluated in this case, but the []
> turns into a +), and s.A[0] is an object of type int[7], which decays to a
> pointer to the first element of that array, so adding 7 produces a
> just-past-end pointer.  It's not valid to dereference that pointer, but
> the pointer itself is valid (and subtracting 1 from it produces a pointer
> you can dereference).

Thanks, Joseph, this is helpful. There are a few other cases that I'm 
uncertain about and which might require additional changes to the patch; 
the one I sent was an experiment more than a finished product.

Consider

struct t { int a; int b; };
struct A { struct t v[2]; } a;

So I think we've established that
   &a.v[2]
is valid, giving a pointer just past the end of the structure. How about
   &a.v[2].a
and
   &a.v[2].b
The first of these gives the same pointer just past the end of the 
array, while the second one gives a higher address. I would expect that 
the second one is invalid, but I'm not so sure about the first one. 
Syntactically we have an access to an out-of-bounds field, but the whole 
expression just computes the valid one-past-the-end address.

I think this has an impact on the tests I quoted in my last mail:

typedef struct FA5_7 {
   int i;
   char a5_7 [5][7];
} FA5_7;

     __builtin_offsetof (FA5_7, a5_7 [0][7]),         // { dg-warning 
"index" }
     __builtin_offsetof (FA5_7, a5_7 [1][7]),         // { dg-warning 
"index" }
     __builtin_offsetof (FA5_7, a5_7 [5][0]),         // { dg-warning 
"index" }
     __builtin_offsetof (FA5_7, a5_7 [5][7]),         // { dg-warning 
"index" }

Here I think the last one of these is most likely invalid (being 8 bytes 
past the end of the object, rather than just one) and the others valid. 
Can you confirm this? (If the &a.v[2].a example is considered invalid, 
then I think the a5_7[5][0] test would be the equivalent and ought to 
also be considered invalid).

It might turn out that we have to do something like pass a pointer to an 
"at_end" boolean to recursive invocations, which would set it to true if 
the expression dealt with by the caller should not increase the address 
any further. Then, when folding the addition we check that the offset we 
have is either zero or we don't have at_end, and warn otherwise.


Bernd

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] c/67882 - improve -Warray-bounds for invalid offsetof
  2015-10-20 20:36           ` Bernd Schmidt
@ 2015-10-20 22:19             ` Joseph Myers
  2015-10-23 11:17               ` Bernd Schmidt
  2015-11-07 23:38               ` Segher Boessenkool
  0 siblings, 2 replies; 30+ messages in thread
From: Joseph Myers @ 2015-10-20 22:19 UTC (permalink / raw)
  To: Bernd Schmidt; +Cc: Martin Sebor, Gcc Patch List

On Tue, 20 Oct 2015, Bernd Schmidt wrote:

> Consider
> 
> struct t { int a; int b; };
> struct A { struct t v[2]; } a;
> 
> So I think we've established that
>   &a.v[2]
> is valid, giving a pointer just past the end of the structure. How about
>   &a.v[2].a
> and
>   &a.v[2].b
> The first of these gives the same pointer just past the end of the array,
> while the second one gives a higher address. I would expect that the second
> one is invalid, but I'm not so sure about the first one. Syntactically we have
> an access to an out-of-bounds field, but the whole expression just computes
> the valid one-past-the-end address.

I don't think either is valid.  The address operator '&' requires "a 
function designator, the result of a [] or unary * operator, or an lvalue 
that designates an object".  So because a.v[2].a does not designate an 
object, there is undefined behavior.  The special case for [] allows the 
address of a just-past-end array element to be taken, but that doesn't 
apply here.

> I think this has an impact on the tests I quoted in my last mail:
> 
> typedef struct FA5_7 {
>   int i;
>   char a5_7 [5][7];
> } FA5_7;
> 
>     __builtin_offsetof (FA5_7, a5_7 [0][7]),         // { dg-warning "index" }
>     __builtin_offsetof (FA5_7, a5_7 [1][7]),         // { dg-warning "index" }
>     __builtin_offsetof (FA5_7, a5_7 [5][0]),         // { dg-warning "index" }
>     __builtin_offsetof (FA5_7, a5_7 [5][7]),         // { dg-warning "index" }
> 
> Here I think the last one of these is most likely invalid (being 8 bytes past
> the end of the object, rather than just one) and the others valid. Can you
> confirm this? (If the &a.v[2].a example is considered invalid, then I think
> the a5_7[5][0] test would be the equivalent and ought to also be considered
> invalid).

The last one is certainly invalid.  The one before is arguably invalid as 
well (in the unary '&' equivalent, &a5_7[5][0] which is equivalent to 
a5_7[5] + 0, the questionable operation is implicit conversion of a5_7[5] 
from array to pointer - an array expression gets converted to an 
expression "that points to the initial element of the array object", but 
there is no array object a5_7[5] here).

-- 
Joseph S. Myers
joseph@codesourcery.com

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] c/67882 - improve -Warray-bounds for invalid offsetof
  2015-10-20 22:19             ` Joseph Myers
@ 2015-10-23 11:17               ` Bernd Schmidt
  2015-10-23 15:15                 ` Martin Sebor
  2015-11-07 23:38               ` Segher Boessenkool
  1 sibling, 1 reply; 30+ messages in thread
From: Bernd Schmidt @ 2015-10-23 11:17 UTC (permalink / raw)
  To: Joseph Myers; +Cc: Martin Sebor, Gcc Patch List

On 10/21/2015 12:10 AM, Joseph Myers wrote:
> On Tue, 20 Oct 2015, Bernd Schmidt wrote:
>>  How about
>>    &a.v[2].a
>> and
>>    &a.v[2].b
>
> I don't think either is valid.

>> typedef struct FA5_7 {
>>    int i;
>>    char a5_7 [5][7];
>> } FA5_7;
>>
>>      __builtin_offsetof (FA5_7, a5_7 [0][7]),         // { dg-warning "index" }
>>      __builtin_offsetof (FA5_7, a5_7 [1][7]),         // { dg-warning "index" }
>>      __builtin_offsetof (FA5_7, a5_7 [5][0]),         // { dg-warning "index" }
>>      __builtin_offsetof (FA5_7, a5_7 [5][7]),         // { dg-warning "index" }
>
> The last one is certainly invalid.  The one before is arguably invalid as
> well (in the unary '&' equivalent, &a5_7[5][0] which is equivalent to
> a5_7[5] + 0, the questionable operation is implicit conversion of a5_7[5]
> from array to pointer - an array expression gets converted to an
> expression "that points to the initial element of the array object", but
> there is no array object a5_7[5] here).

Martin, is this something you're working on, or should I have a go?


Bernd

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] c/67882 - improve -Warray-bounds for invalid offsetof
  2015-10-23 11:17               ` Bernd Schmidt
@ 2015-10-23 15:15                 ` Martin Sebor
  2015-10-23 16:53                   ` Joseph Myers
  0 siblings, 1 reply; 30+ messages in thread
From: Martin Sebor @ 2015-10-23 15:15 UTC (permalink / raw)
  To: Bernd Schmidt, Joseph Myers; +Cc: Gcc Patch List

On 10/23/2015 05:13 AM, Bernd Schmidt wrote:
> On 10/21/2015 12:10 AM, Joseph Myers wrote:
>> On Tue, 20 Oct 2015, Bernd Schmidt wrote:
>>>  How about
>>>    &a.v[2].a
>>> and
>>>    &a.v[2].b
>>
>> I don't think either is valid.
>
>>> typedef struct FA5_7 {
>>>    int i;
>>>    char a5_7 [5][7];
>>> } FA5_7;
>>>
>>>      __builtin_offsetof (FA5_7, a5_7 [0][7]),         // { dg-warning
>>> "index" }
>>>      __builtin_offsetof (FA5_7, a5_7 [1][7]),         // { dg-warning
>>> "index" }
>>>      __builtin_offsetof (FA5_7, a5_7 [5][0]),         // { dg-warning
>>> "index" }
>>>      __builtin_offsetof (FA5_7, a5_7 [5][7]),         // { dg-warning
>>> "index" }
>>
>> The last one is certainly invalid.  The one before is arguably invalid as
>> well (in the unary '&' equivalent, &a5_7[5][0] which is equivalent to
>> a5_7[5] + 0, the questionable operation is implicit conversion of a5_7[5]
>> from array to pointer - an array expression gets converted to an
>> expression "that points to the initial element of the array object", but
>> there is no array object a5_7[5] here).
>
> Martin, is this something you're working on, or should I have a go?

I thought I made all the tweaks to these test cases in the last patch:

   https://gcc.gnu.org/ml/gcc-patches/2015-10/msg01946.html

But now that I'm re-reading the answer above I see that Joseph
was suggesting that a5_7[5][0] should be diagnosed when the patch
accepts it as an extension.  I think we do want to accept it
because a5_7 is treated as a flexible array member (as an extension)
and so the upper bound of the major index is unknown. I.e., FA5_7
is defined like so:

   typedef struct FA5_7 {
       int i;
       char a5_7 [5][7];   // treated as char [][7]
    } FA5_7;

Martin

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] c/67882 - improve -Warray-bounds for invalid offsetof
  2015-10-23 15:15                 ` Martin Sebor
@ 2015-10-23 16:53                   ` Joseph Myers
  2015-10-23 17:45                     ` Bernd Schmidt
  0 siblings, 1 reply; 30+ messages in thread
From: Joseph Myers @ 2015-10-23 16:53 UTC (permalink / raw)
  To: Martin Sebor; +Cc: Bernd Schmidt, Gcc Patch List

On Fri, 23 Oct 2015, Martin Sebor wrote:

> But now that I'm re-reading the answer above I see that Joseph
> was suggesting that a5_7[5][0] should be diagnosed when the patch
> accepts it as an extension.  I think we do want to accept it
> because a5_7 is treated as a flexible array member (as an extension)
> and so the upper bound of the major index is unknown. I.e., FA5_7
> is defined like so:

If you treat it as a flexible array member, then, yes, it would be valid.

-- 
Joseph S. Myers
joseph@codesourcery.com

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] c/67882 - improve -Warray-bounds for invalid offsetof
  2015-10-23 16:53                   ` Joseph Myers
@ 2015-10-23 17:45                     ` Bernd Schmidt
  2015-10-23 20:54                       ` Martin Sebor
  0 siblings, 1 reply; 30+ messages in thread
From: Bernd Schmidt @ 2015-10-23 17:45 UTC (permalink / raw)
  To: Joseph Myers, Martin Sebor; +Cc: Gcc Patch List

On 10/23/2015 06:50 PM, Joseph Myers wrote:
> On Fri, 23 Oct 2015, Martin Sebor wrote:
>
>> But now that I'm re-reading the answer above I see that Joseph
>> was suggesting that a5_7[5][0] should be diagnosed when the patch
>> accepts it as an extension.  I think we do want to accept it
>> because a5_7 is treated as a flexible array member (as an extension)
>> and so the upper bound of the major index is unknown. I.e., FA5_7
>> is defined like so:
>
> If you treat it as a flexible array member, then, yes, it would be valid.

Ok, let's install the patch as-is, and postpone the discussion of 
whether that is a valid flexible array member (I certainly wouldn't have 
guessed so from the documentation which only mentions [], [0] and [1] as 
valid cases).

I guess this is a case where I could say either "I wrote the patch" or 
"I requested changes to a patch in review"; in the latter case I can 
approve it. Joseph seems on board with what we've discussed, so I'd say 
please wait until Tuesday for objections then commit.


Bernd

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] c/67882 - improve -Warray-bounds for invalid offsetof
  2015-10-23 17:45                     ` Bernd Schmidt
@ 2015-10-23 20:54                       ` Martin Sebor
  2015-10-26 11:44                         ` Bernd Schmidt
  2015-11-03 19:15                         ` Martin Sebor
  0 siblings, 2 replies; 30+ messages in thread
From: Martin Sebor @ 2015-10-23 20:54 UTC (permalink / raw)
  To: Bernd Schmidt, Joseph Myers; +Cc: Gcc Patch List

On 10/23/2015 11:45 AM, Bernd Schmidt wrote:
> On 10/23/2015 06:50 PM, Joseph Myers wrote:
>> On Fri, 23 Oct 2015, Martin Sebor wrote:
>>
>>> But now that I'm re-reading the answer above I see that Joseph
>>> was suggesting that a5_7[5][0] should be diagnosed when the patch
>>> accepts it as an extension.  I think we do want to accept it
>>> because a5_7 is treated as a flexible array member (as an extension)
>>> and so the upper bound of the major index is unknown. I.e., FA5_7
>>> is defined like so:
>>
>> If you treat it as a flexible array member, then, yes, it would be valid.
>
> Ok, let's install the patch as-is, and postpone the discussion of
> whether that is a valid flexible array member (I certainly wouldn't have
> guessed so from the documentation which only mentions [], [0] and [1] as
> valid cases).

The original code deliberately avoids diagnosing the case of last
array members with bounds greater than 1 (see the comment about
"a poor man's flexible array member" added with a fix for bug
41935) and I didn't want to change that.

But if there is sentiment for tightening it up I would be very
much in favor. IMO, it would be ideal if we could agree on and
apply the same rules for offsetof as for other expressions (and
diagnose, for example, &a5_7[5][0], which currently isn't
diagnosed).

As I mentioned, I'm planning to work on bug 67872 and it would
be helpful to know what our rules are up front. I can go back
and update this patch after it's been committed if the rules
evolve between now and then.

>
> I guess this is a case where I could say either "I wrote the patch" or
> "I requested changes to a patch in review"; in the latter case I can
> approve it. Joseph seems on board with what we've discussed, so I'd say
> please wait until Tuesday for objections then commit.

Okay.

Martin

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] c/67882 - improve -Warray-bounds for invalid offsetof
  2015-10-23 20:54                       ` Martin Sebor
@ 2015-10-26 11:44                         ` Bernd Schmidt
  2015-10-26 11:51                           ` Jakub Jelinek
  2015-11-03 19:15                         ` Martin Sebor
  1 sibling, 1 reply; 30+ messages in thread
From: Bernd Schmidt @ 2015-10-26 11:44 UTC (permalink / raw)
  To: Martin Sebor, Joseph Myers; +Cc: Gcc Patch List, Jakub Jelinek

On 10/23/2015 10:40 PM, Martin Sebor wrote:
>
> The original code deliberately avoids diagnosing the case of last
> array members with bounds greater than 1 (see the comment about
> "a poor man's flexible array member" added with a fix for bug
> 41935) and I didn't want to change that.

Jakub added that, Cc'd. Do you recall why this was done?

> But if there is sentiment for tightening it up I would be very
> much in favor.

I'd be in favor, but this is Joseph's call really.


Bernd

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] c/67882 - improve -Warray-bounds for invalid offsetof
  2015-10-26 11:44                         ` Bernd Schmidt
@ 2015-10-26 11:51                           ` Jakub Jelinek
  2015-10-26 12:01                             ` Bernd Schmidt
  0 siblings, 1 reply; 30+ messages in thread
From: Jakub Jelinek @ 2015-10-26 11:51 UTC (permalink / raw)
  To: Bernd Schmidt; +Cc: Martin Sebor, Joseph Myers, Gcc Patch List

On Mon, Oct 26, 2015 at 12:40:18PM +0100, Bernd Schmidt wrote:
> On 10/23/2015 10:40 PM, Martin Sebor wrote:
> >
> >The original code deliberately avoids diagnosing the case of last
> >array members with bounds greater than 1 (see the comment about
> >"a poor man's flexible array member" added with a fix for bug
> >41935) and I didn't want to change that.
> 
> Jakub added that, Cc'd. Do you recall why this was done?

Because the amount of code that uses this (including GCC itself) is just too
huge, so everywhere in the middle-end we also special case last array members of
structs.  While C99+ has flexible array members, e.g. C++ does not, so users
are left with using struct { ... type fld[1]; };
or similar to stand for the flexible array members, even when they want to
be able to use ->fld[3] etc.

	Jakub

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] c/67882 - improve -Warray-bounds for invalid offsetof
  2015-10-26 11:51                           ` Jakub Jelinek
@ 2015-10-26 12:01                             ` Bernd Schmidt
  2015-10-26 12:04                               ` Jakub Jelinek
  0 siblings, 1 reply; 30+ messages in thread
From: Bernd Schmidt @ 2015-10-26 12:01 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Martin Sebor, Joseph Myers, Gcc Patch List

On 10/26/2015 12:47 PM, Jakub Jelinek wrote:

> Because the amount of code that uses this (including GCC itself) is just too
> huge, so everywhere in the middle-end we also special case last array members of
> structs.  While C99+ has flexible array members, e.g. C++ does not, so users
> are left with using struct { ... type fld[1]; };

Yes, and that case is documented. However, the issue is arrays declared 
with a larger size than 1 or 0 - is there really code using them as 
flexible array members?


Bernd

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] c/67882 - improve -Warray-bounds for invalid offsetof
  2015-10-26 12:01                             ` Bernd Schmidt
@ 2015-10-26 12:04                               ` Jakub Jelinek
  2015-10-26 12:32                                 ` Richard Biener
  0 siblings, 1 reply; 30+ messages in thread
From: Jakub Jelinek @ 2015-10-26 12:04 UTC (permalink / raw)
  To: Bernd Schmidt; +Cc: Martin Sebor, Joseph Myers, Gcc Patch List

On Mon, Oct 26, 2015 at 12:57:53PM +0100, Bernd Schmidt wrote:
> On 10/26/2015 12:47 PM, Jakub Jelinek wrote:
> 
> >Because the amount of code that uses this (including GCC itself) is just too
> >huge, so everywhere in the middle-end we also special case last array members of
> >structs.  While C99+ has flexible array members, e.g. C++ does not, so users
> >are left with using struct { ... type fld[1]; };
> 
> Yes, and that case is documented. However, the issue is arrays declared with
> a larger size than 1 or 0 - is there really code using them as flexible
> array members?

I believe so, though don't have pointers to that right now.  But vaguely
remember we saw various cases of using 2 or other values too.

	Jakub

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] c/67882 - improve -Warray-bounds for invalid offsetof
  2015-10-26 12:04                               ` Jakub Jelinek
@ 2015-10-26 12:32                                 ` Richard Biener
  2015-10-27 11:18                                   ` Bernd Schmidt
  0 siblings, 1 reply; 30+ messages in thread
From: Richard Biener @ 2015-10-26 12:32 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Bernd Schmidt, Martin Sebor, Joseph Myers, Gcc Patch List

On Mon, Oct 26, 2015 at 1:01 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Mon, Oct 26, 2015 at 12:57:53PM +0100, Bernd Schmidt wrote:
>> On 10/26/2015 12:47 PM, Jakub Jelinek wrote:
>>
>> >Because the amount of code that uses this (including GCC itself) is just too
>> >huge, so everywhere in the middle-end we also special case last array members of
>> >structs.  While C99+ has flexible array members, e.g. C++ does not, so users
>> >are left with using struct { ... type fld[1]; };
>>
>> Yes, and that case is documented. However, the issue is arrays declared with
>> a larger size than 1 or 0 - is there really code using them as flexible
>> array members?
>
> I believe so, though don't have pointers to that right now.  But vaguely
> remember we saw various cases of using 2 or other values too.

Yes, char[4] is quite common (basically making sure there is no appearant
padding behind the array due to alignment - just in case compilers might
be clever because of that).

Richard.

>         Jakub

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] c/67882 - improve -Warray-bounds for invalid offsetof
  2015-10-26 12:32                                 ` Richard Biener
@ 2015-10-27 11:18                                   ` Bernd Schmidt
  0 siblings, 0 replies; 30+ messages in thread
From: Bernd Schmidt @ 2015-10-27 11:18 UTC (permalink / raw)
  To: Richard Biener, Jakub Jelinek; +Cc: Martin Sebor, Joseph Myers, Gcc Patch List

On 10/26/2015 01:27 PM, Richard Biener wrote:
> On Mon, Oct 26, 2015 at 1:01 PM, Jakub Jelinek <jakub@redhat.com> wrote:
>> On Mon, Oct 26, 2015 at 12:57:53PM +0100, Bernd Schmidt wrote:
>>> On 10/26/2015 12:47 PM, Jakub Jelinek wrote:
>>>
>>>> Because the amount of code that uses this (including GCC itself) is just too
>>>> huge, so everywhere in the middle-end we also special case last array members of
>>>> structs.  While C99+ has flexible array members, e.g. C++ does not, so users
>>>> are left with using struct { ... type fld[1]; };
>>>
>>> Yes, and that case is documented. However, the issue is arrays declared with
>>> a larger size than 1 or 0 - is there really code using them as flexible
>>> array members?
>>
>> I believe so, though don't have pointers to that right now.  But vaguely
>> remember we saw various cases of using 2 or other values too.
>
> Yes, char[4] is quite common (basically making sure there is no appearant
> padding behind the array due to alignment - just in case compilers might
> be clever because of that).

Ugh, how ugly. Can we at least agree not to allow multi-dimensional 
arrays with a size larger than one to be considered flexible?


Bernd

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] c/67882 - improve -Warray-bounds for invalid offsetof
  2015-10-23 20:54                       ` Martin Sebor
  2015-10-26 11:44                         ` Bernd Schmidt
@ 2015-11-03 19:15                         ` Martin Sebor
  1 sibling, 0 replies; 30+ messages in thread
From: Martin Sebor @ 2015-11-03 19:15 UTC (permalink / raw)
  To: Bernd Schmidt; +Cc: Joseph Myers, Gcc Patch List

>> I guess this is a case where I could say either "I wrote the patch" or
>> "I requested changes to a patch in review"; in the latter case I can
>> approve it. Joseph seems on board with what we've discussed, so I'd say
>> please wait until Tuesday for objections then commit.

I didn't get to committing the patch last week but I have done so
just now.

Martin

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] c/67882 - improve -Warray-bounds for invalid offsetof
  2015-10-20 22:19             ` Joseph Myers
  2015-10-23 11:17               ` Bernd Schmidt
@ 2015-11-07 23:38               ` Segher Boessenkool
  2015-11-09 22:46                 ` Martin Sebor
  2015-11-10  0:02                 ` Joseph Myers
  1 sibling, 2 replies; 30+ messages in thread
From: Segher Boessenkool @ 2015-11-07 23:38 UTC (permalink / raw)
  To: Joseph Myers; +Cc: Bernd Schmidt, Martin Sebor, Gcc Patch List

On Tue, Oct 20, 2015 at 10:10:44PM +0000, Joseph Myers wrote:
> > typedef struct FA5_7 {
> >   int i;
> >   char a5_7 [5][7];
> > } FA5_7;
> > 
> >     __builtin_offsetof (FA5_7, a5_7 [0][7]),         // { dg-warning "index" }
> >     __builtin_offsetof (FA5_7, a5_7 [1][7]),         // { dg-warning "index" }
> >     __builtin_offsetof (FA5_7, a5_7 [5][0]),         // { dg-warning "index" }
> >     __builtin_offsetof (FA5_7, a5_7 [5][7]),         // { dg-warning "index" }
> > 
> > Here I think the last one of these is most likely invalid (being 8 bytes past
> > the end of the object, rather than just one) and the others valid. Can you
> > confirm this? (If the &a.v[2].a example is considered invalid, then I think
> > the a5_7[5][0] test would be the equivalent and ought to also be considered
> > invalid).
> 
> The last one is certainly invalid.  The one before is arguably invalid as 
> well (in the unary '&' equivalent, &a5_7[5][0] which is equivalent to 
> a5_7[5] + 0, the questionable operation is implicit conversion of a5_7[5] 
> from array to pointer - an array expression gets converted to an 
> expression "that points to the initial element of the array object", but 
> there is no array object a5_7[5] here).

C11, 6.5.2.1/3:
Successive subscript operators designate an element of a
multidimensional array object. If E is an n-dimensional array (n >= 2)
with dimensions i x j x . . . x k, then E (used as other than an lvalue)
is converted to a pointer to an (n - 1)-dimensional array with
dimensions j x . . . x k. If the unary * operator is applied to this
pointer explicitly, or implicitly as a result of subscripting, the
result is the referenced (n - 1)-dimensional array, which itself is
converted into a pointer if used as other than an lvalue. It follows
from this that arrays are stored in row-major order (last subscript
varies fastest).

As far as I see, a5_7[5] here is never treated as an array, just as a
pointer, and &a5_7[5][0] is valid.


Segher

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] c/67882 - improve -Warray-bounds for invalid offsetof
  2015-11-07 23:38               ` Segher Boessenkool
@ 2015-11-09 22:46                 ` Martin Sebor
  2015-11-10  0:02                 ` Joseph Myers
  1 sibling, 0 replies; 30+ messages in thread
From: Martin Sebor @ 2015-11-09 22:46 UTC (permalink / raw)
  To: Segher Boessenkool, Joseph Myers; +Cc: Bernd Schmidt, Gcc Patch List

On 11/07/2015 04:38 PM, Segher Boessenkool wrote:
> On Tue, Oct 20, 2015 at 10:10:44PM +0000, Joseph Myers wrote:
>>> typedef struct FA5_7 {
>>>    int i;
>>>    char a5_7 [5][7];
>>> } FA5_7;
>>>
>>>      __builtin_offsetof (FA5_7, a5_7 [0][7]),         // { dg-warning "index" }
>>>      __builtin_offsetof (FA5_7, a5_7 [1][7]),         // { dg-warning "index" }
>>>      __builtin_offsetof (FA5_7, a5_7 [5][0]),         // { dg-warning "index" }
>>>      __builtin_offsetof (FA5_7, a5_7 [5][7]),         // { dg-warning "index" }
>>>
>>> Here I think the last one of these is most likely invalid (being 8 bytes past
>>> the end of the object, rather than just one) and the others valid. Can you
>>> confirm this? (If the &a.v[2].a example is considered invalid, then I think
>>> the a5_7[5][0] test would be the equivalent and ought to also be considered
>>> invalid).
>>
>> The last one is certainly invalid.  The one before is arguably invalid as
>> well (in the unary '&' equivalent, &a5_7[5][0] which is equivalent to
>> a5_7[5] + 0, the questionable operation is implicit conversion of a5_7[5]
>> from array to pointer - an array expression gets converted to an
>> expression "that points to the initial element of the array object", but
>> there is no array object a5_7[5] here).
>
> C11, 6.5.2.1/3:
> Successive subscript operators designate an element of a
> multidimensional array object. If E is an n-dimensional array (n >= 2)
> with dimensions i x j x . . . x k, then E (used as other than an lvalue)
> is converted to a pointer to an (n - 1)-dimensional array with
> dimensions j x . . . x k. If the unary * operator is applied to this
> pointer explicitly, or implicitly as a result of subscripting, the
> result is the referenced (n - 1)-dimensional array, which itself is
> converted into a pointer if used as other than an lvalue. It follows
> from this that arrays are stored in row-major order (last subscript
> varies fastest).
>
> As far as I see, a5_7[5] here is never treated as an array, just as a
> pointer, and &a5_7[5][0] is valid.

Segher and I discussed this briefly on IRC over the weekend and
I agreed to try to get a confirmation of the interpretation the
warning is based on from WG14. I'll report back what I learn
(if anything). I defer to Bernd and Joseph as to whether to make
any changes in the meantime.

Martin

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH] c/67882 - improve -Warray-bounds for invalid offsetof
  2015-11-07 23:38               ` Segher Boessenkool
  2015-11-09 22:46                 ` Martin Sebor
@ 2015-11-10  0:02                 ` Joseph Myers
  1 sibling, 0 replies; 30+ messages in thread
From: Joseph Myers @ 2015-11-10  0:02 UTC (permalink / raw)
  To: Segher Boessenkool; +Cc: Bernd Schmidt, Martin Sebor, Gcc Patch List

On Sat, 7 Nov 2015, Segher Boessenkool wrote:

> > The last one is certainly invalid.  The one before is arguably invalid as 
> > well (in the unary '&' equivalent, &a5_7[5][0] which is equivalent to 
> > a5_7[5] + 0, the questionable operation is implicit conversion of a5_7[5] 
> > from array to pointer - an array expression gets converted to an 
> > expression "that points to the initial element of the array object", but 
> > there is no array object a5_7[5] here).
> 
> C11, 6.5.2.1/3:
> Successive subscript operators designate an element of a
> multidimensional array object. If E is an n-dimensional array (n >= 2)
> with dimensions i x j x . . . x k, then E (used as other than an lvalue)
> is converted to a pointer to an (n - 1)-dimensional array with
> dimensions j x . . . x k. If the unary * operator is applied to this
> pointer explicitly, or implicitly as a result of subscripting, the
> result is the referenced (n - 1)-dimensional array, which itself is
> converted into a pointer if used as other than an lvalue. It follows
> from this that arrays are stored in row-major order (last subscript
> varies fastest).
> 
> As far as I see, a5_7[5] here is never treated as an array, just as a
> pointer, and &a5_7[5][0] is valid.

As usual, based on taking the address, not offsetof where there's the open 
question of whether the C standard actually requires support for anything 
other than a single element name there:

a5_7[5] is an expression of array type.  The only way for it to be treated 
as a pointer is for it to be converted implicitly to pointer type.  That 
implicit conversion is what I think is problematic.

Only once the implicit conversion has taken place do the special rules 
about &A[B] meaning A + B take effect.  But since the problem I see is 
with the conversion of A to a pointer, you still have undefined behavior.

The paragraph you quote seems to not to add anything to the semantics 
defined elsewhere in the standard; it's purely descriptive of some 
consequences of those semantics.

Whether we wish to be more permissive about some such cases (depending on 
-Warray-bounds=N) is a pragmatic matter depending on the extent to which 
they are used in practice.

-- 
Joseph S. Myers
joseph@codesourcery.com

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCH] c/67882 - improve -Warray-bounds for invalid offsetof
@ 2015-10-09  2:49 Martin Sebor
  0 siblings, 0 replies; 30+ messages in thread
From: Martin Sebor @ 2015-10-09  2:49 UTC (permalink / raw)
  To: Gcc Patch List

Gcc attempts to diagnose invalid offsetof expressions whose member
designator is an array element with an out-of-bounds index. The
logic in the function that does this detection is incomplete, leading
to false negatives. Since the result of the expression in these cases
can be surprising, this patch tightens up the logic to diagnose more
such cases.

^ permalink raw reply	[flat|nested] 30+ messages in thread

end of thread, other threads:[~2015-11-10  0:02 UTC | newest]

Thread overview: 30+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-10-09  2:55 [PATCH] c/67882 - improve -Warray-bounds for invalid offsetof Martin Sebor
2015-10-15 21:59 ` [PING] " Martin Sebor
2015-10-16 12:28 ` Bernd Schmidt
2015-10-16 17:27   ` Joseph Myers
2015-10-16 19:34   ` Martin Sebor
2015-10-20 13:21     ` Bernd Schmidt
2015-10-20 15:33       ` Martin Sebor
2015-10-20 15:52         ` Bernd Schmidt
2015-10-20 16:57           ` Martin Sebor
2015-10-20 17:11             ` Joseph Myers
2015-10-20 19:10               ` Martin Sebor
2015-10-20 16:54         ` Joseph Myers
2015-10-20 20:36           ` Bernd Schmidt
2015-10-20 22:19             ` Joseph Myers
2015-10-23 11:17               ` Bernd Schmidt
2015-10-23 15:15                 ` Martin Sebor
2015-10-23 16:53                   ` Joseph Myers
2015-10-23 17:45                     ` Bernd Schmidt
2015-10-23 20:54                       ` Martin Sebor
2015-10-26 11:44                         ` Bernd Schmidt
2015-10-26 11:51                           ` Jakub Jelinek
2015-10-26 12:01                             ` Bernd Schmidt
2015-10-26 12:04                               ` Jakub Jelinek
2015-10-26 12:32                                 ` Richard Biener
2015-10-27 11:18                                   ` Bernd Schmidt
2015-11-03 19:15                         ` Martin Sebor
2015-11-07 23:38               ` Segher Boessenkool
2015-11-09 22:46                 ` Martin Sebor
2015-11-10  0:02                 ` Joseph Myers
  -- strict thread matches above, loose matches on Subject: below --
2015-10-09  2:49 Martin Sebor

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).