public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] handle vla plus offset in strlen (PR 90662)
@ 2019-06-05 22:51 Martin Sebor
  2019-06-06 21:53 ` Jeff Law
  0 siblings, 1 reply; 4+ messages in thread
From: Martin Sebor @ 2019-06-05 22:51 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 552 bytes --]

One of my new tests for the strlen/sprintf integration tripped
over an incomplete handling of VLAs by the strlen pass.  Where
it can determine the length of a substring at some offset with
other kinds of arrays, the pass fails with VLAs because they
are represented as pointers to arrays.

The attached patch adds the missing handling so that code like
the following can be fully folded even for VLAs.

   int f (int n)
   {
     char a[n];
     strcpy (a, "12345");
     if (strlen (&a[2]) != 3)
       abort ();
   }

Tested on x86_64-linux.

Martin

[-- Attachment #2: gcc-90662.diff --]
[-- Type: text/x-patch, Size: 10171 bytes --]

PR tree-optimization/90662 - strlen of a string in a vla plus offset not folded

gcc/ChangeLog:

	PR tree-optimization/90662
	* tree-ssa-strlen.c (get_stridx): Handle simple VLAs and pointers
	to arrays.

gcc/testsuite/ChangeLog:

	PR tree-optimization/90662
	* gcc.dg/strlenopt-62.c: New test.
	* gcc.dg/strlenopt-63.c: New test.

diff --git a/gcc/testsuite/gcc.dg/strlenopt-62.c b/gcc/testsuite/gcc.dg/strlenopt-62.c
new file mode 100644
index 00000000000..644bdee2c1b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/strlenopt-62.c
@@ -0,0 +1,89 @@
+/* PR tree-optimization/90662 - strlen of a string in a vla plus offset
+   not folded
+   { dg-do compile }
+   { dg-options "-O2 -Wall -fdump-tree-gimple -fdump-tree-optimized" } */
+
+#include "strlenopt.h"
+
+#define CONCAT(x, y) x ## y
+#define CAT(x, y) CONCAT (x, y)
+#define FAILNAME(name, counter) \
+  CAT (CAT (CAT (call_ ## name ##_on_line_, __LINE__), _), counter)
+
+#define FAIL(name, counter) do {			\
+    extern void FAILNAME (name, counter) (void);	\
+    FAILNAME (name, counter)();				\
+  } while (0)
+
+/* Macro to emit a call to funcation named
+     call_in_true_branch_not_eliminated_on_line_NNN()
+   for each call that's expected to be eliminated.  The dg-final
+   scan-tree-dump-time directive at the bottom of the test verifies
+   that no such call appears in output.  */
+#define ELIM(expr) \
+  if (!(expr)) FAIL (in_true_branch_not_eliminated, __COUNTER__); else (void)0
+
+#define ARGS(...) __VA_ARGS__
+
+void sink (void*, ...);
+
+
+#define T(expect, init, p)				\
+  do {							\
+    char vla[n];					\
+    char *ptr = strcpy (vla, init);			\
+    ELIM (expect == strlen (p));			\
+    sink (ptr);						\
+  } while (0)
+
+void test_vla_local (int n)
+{
+  T (0, "", ptr);
+  T (0, "\0", ptr);
+  T (1, "1", ptr);
+  T (2, "12", ptr);
+  T (3, "123", ptr);
+
+  T (2, "123", ptr + 1);
+  T (1, "123", &ptr[2]);
+  T (0, "123", &ptr[1] + 2);
+}
+
+
+#undef T
+#define T(expect, parr, init, p)			\
+  do {							\
+    char (*parray)[] = *ppa++;				\
+    char *ptr = strcpy (parr, init);			\
+    (void)&ptr;						\
+    ELIM (expect == strlen (p));			\
+  } while (0)
+
+/* Have the function take a pointer to pointers to arrays so that each
+   test case can use its own pointer to avoid interference between.  */
+
+void test_array_ptr (char (**ppa)[])
+{
+  T (0, *parray, "", *parray);
+  T (0, *parray, "", &(*parray)[0]);
+
+  T (1, *parray, "1", &(*parray)[0]);
+  T (0, *parray, "1", &(*parray)[1]);
+
+  T (2, *parray, "12", &(*parray)[0]);
+  T (1, *parray, "12", &(*parray)[1]);
+  T (0, *parray, "12", &(*parray)[2]);
+
+  T (3, *parray, "123", &(*parray)[0]);
+  T (2, *parray, "123", &(*parray)[1]);
+  T (1, *parray, "123", &(*parray)[2]);
+  T (0, *parray, "123", &(*parray)[3]);
+
+  T (3, *parray, "123", ptr);
+  T (2, *parray, "123", &ptr[1]);
+  T (1, *parray, "123", &ptr[2]);
+  T (0, *parray, "123", &ptr[3]);
+}
+
+/* { dg-final { scan-tree-dump-times "strlen" 0 "optimized" } }
+   { dg-final { scan-tree-dump-times "not_eliminated" 0 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/strlenopt-63.c b/gcc/testsuite/gcc.dg/strlenopt-63.c
new file mode 100644
index 00000000000..ca3e55fd9f8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/strlenopt-63.c
@@ -0,0 +1,158 @@
+/* PR tree-optimization/90662 - strlen of a string in a vla plus offset
+   not folded
+   Verify that strlen of pointers to arrays are computed correctly
+   (whether folded or not).
+   { dg-do run }
+   { dg-options "-O2 -Wall" } */
+
+#include "strlenopt.h"
+
+#define A(expr)                                                 \
+  ((expr)                                                       \
+   ? (void)0                                                    \
+   : (__builtin_printf ("assertion failed on line %i: %s\n",    \
+                        __LINE__, #expr),                       \
+      __builtin_abort ()))
+
+typedef char A5[5];
+
+A5 a5[5];
+A5* p[5] = { &a5[4], &a5[3], &a5[2], &a5[1], &a5[0] };
+
+__attribute__ ((noclone, noinline, noipa))
+void deref_deref (void)
+{
+  strcpy (**p, "12345");
+  A (strlen (**p) == 5);
+}
+
+__attribute__ ((noclone, noinline, noipa))
+void deref_idx_0 (void)
+{
+  strcpy (*p[0], "");
+  A (strlen (*p[0]) == 0);
+}
+
+__attribute__ ((noclone, noinline, noipa))
+void deref_idx_1 (void)
+{
+  strcpy (*p[1], "1");
+  A (strlen (*p[1]) == 1);
+  A (strlen (&(*p[1])[1]) == 0);
+
+  A (strlen (*p[0]) == 0);
+}
+
+__attribute__ ((noclone, noinline, noipa))
+void deref_idx_2 (void)
+{
+  strcpy (*p[2], "12");
+  A (strlen (*p[2]) == 2);
+  A (strlen (&(*p[2])[1]) == 1);
+  A (strlen (&(*p[2])[2]) == 0);
+
+  A (strlen (*p[1]) == 1);
+  A (strlen (*p[0]) == 0);
+}
+
+__attribute__ ((noclone, noinline, noipa))
+void deref_idx_3 (void)
+{
+  strcpy (*p[3], "123");
+  A (strlen (*p[3]) == 3);
+  A (strlen (&(*p[3])[1]) == 2);
+  A (strlen (&(*p[3])[2]) == 1);
+  A (strlen (&(*p[3])[3]) == 0);
+
+  A (strlen (*p[2]) == 2);
+  A (strlen (*p[1]) == 1);
+  A (strlen (*p[0]) == 0);
+}
+
+__attribute__ ((noclone, noinline, noipa))
+void deref_idx_4 (void)
+{
+  strcpy (*p[4], "1234");
+  A (strlen (*p[4]) == 4);
+  A (strlen (&(*p[4])[1]) == 3);
+  A (strlen (&(*p[4])[2]) == 2);
+  A (strlen (&(*p[4])[3]) == 1);
+  A (strlen (&(*p[4])[4]) == 0);
+
+  A (strlen (*p[3]) == 3);
+  A (strlen (*p[2]) == 2);
+  A (strlen (*p[1]) == 1);
+  A (strlen (*p[0]) == 0);
+}
+
+__attribute__ ((noclone, noinline, noipa))
+void deref_idx_4_x (void)
+{
+  strcpy (*p[4], "");
+  A (strlen (*p[4]) == 0);
+  A (strlen (*p[3]) == 3);
+  A (strlen (*p[2]) == 2);
+  A (strlen (*p[1]) == 1);
+  A (strlen (*p[0]) == 0);
+}
+
+__attribute__ ((noclone, noinline, noipa))
+void deref_idx_3_x (void)
+{
+  strcpy (&(*p[3])[0], "1");
+  A (strlen (*p[4]) == 0);
+  A (strlen (*p[3]) == 1);
+  A (strlen (*p[2]) == 2);
+  A (strlen (*p[1]) == 1);
+  A (strlen (*p[0]) == 0);
+}
+
+__attribute__ ((noclone, noinline, noipa))
+void deref_idx_2_x (void)
+{
+  strcpy (*p[2], "12");
+  A (strlen (*p[4]) == 0);
+  A (strlen (*p[3]) == 1);
+  A (strlen (*p[2]) == 2);
+  A (strlen (*p[1]) == 1);
+  A (strlen (*p[0]) == 0);
+}
+
+__attribute__ ((noclone, noinline, noipa))
+void deref_idx_1_x (void)
+{
+  strcpy (*p[1], "123");
+  A (strlen (*p[4]) == 0);
+  A (strlen (*p[3]) == 1);
+  A (strlen (*p[2]) == 2);
+  A (strlen (*p[1]) == 3);
+  A (strlen (*p[0]) == 0);
+}
+
+__attribute__ ((noclone, noinline, noipa))
+void deref_idx_0_x (void)
+{
+  strcpy (*p[0], "1234");
+  A (strlen (*p[4]) == 0);
+  A (strlen (*p[3]) == 1);
+  A (strlen (*p[2]) == 2);
+  A (strlen (*p[1]) == 3);
+  A (strlen (*p[0]) == 4);
+}
+
+int main (void)
+{
+  deref_deref ();
+
+  deref_idx_0 ();
+  deref_idx_1 ();
+  deref_idx_2 ();
+  deref_idx_3 ();
+  deref_idx_4 ();
+
+  deref_idx_4_x ();
+  deref_idx_3_x ();
+  deref_idx_2_x ();
+  deref_idx_1_x ();
+  deref_idx_0_x ();
+}
diff --git a/gcc/tree-ssa-strlen.c b/gcc/tree-ssa-strlen.c
index a2dc9c7b102..2820bd972c6 100644
--- a/gcc/tree-ssa-strlen.c
+++ b/gcc/tree-ssa-strlen.c
@@ -295,36 +295,66 @@ get_stridx (tree exp)
   if (TREE_CODE (exp) == SSA_NAME)
     {
       if (ssa_ver_to_stridx[SSA_NAME_VERSION (exp)])
-	return ssa_ver_to_stridx[SSA_NAME_VERSION (exp)];
-      int i;
+        return ssa_ver_to_stridx[SSA_NAME_VERSION (exp)];
+
       tree e = exp;
-      HOST_WIDE_INT off = 0;
-      for (i = 0; i < 5; i++)
-	{
-	  gimple *def_stmt = SSA_NAME_DEF_STMT (e);
-	  if (!is_gimple_assign (def_stmt)
-	      || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR)
-	    return 0;
-	  tree rhs1 = gimple_assign_rhs1 (def_stmt);
-	  tree rhs2 = gimple_assign_rhs2 (def_stmt);
-	  if (TREE_CODE (rhs1) != SSA_NAME
-	      || !tree_fits_shwi_p (rhs2))
-	    return 0;
-	  HOST_WIDE_INT this_off = tree_to_shwi (rhs2);
-	  if (this_off < 0)
-	    return 0;
-	  off = (unsigned HOST_WIDE_INT) off + this_off;
-	  if (off < 0)
-	    return 0;
-	  if (ssa_ver_to_stridx[SSA_NAME_VERSION (rhs1)])
-	    {
-	      strinfo *si
-		= get_strinfo (ssa_ver_to_stridx[SSA_NAME_VERSION (rhs1)]);
-	      if (si && compare_nonzero_chars (si, off) >= 0)
-		return get_stridx_plus_constant (si, off, exp);
-	    }
-	  e = rhs1;
-	}
+      HOST_WIDE_INT offset = 0;
+      /* Follow a chain of at most 5 assignments.  */
+      for (int i = 0; i < 5; i++)
+        {
+          gimple *def_stmt = SSA_NAME_DEF_STMT (e);
+          if (!is_gimple_assign (def_stmt))
+            return 0;
+
+          tree_code rhs_code = gimple_assign_rhs_code (def_stmt);
+          tree ptr, off;
+
+          if (rhs_code == ADDR_EXPR)
+            {
+              /* Handle indices/offsets into VLAs which are implemented
+                 as pointers to arrays.  */
+              ptr = gimple_assign_rhs1 (def_stmt);
+              ptr = TREE_OPERAND (ptr, 0);
+              if (TREE_CODE (ptr) != ARRAY_REF)
+                return 0;
+
+              off = TREE_OPERAND (ptr, 1);
+              ptr = TREE_OPERAND (ptr, 0);
+              if (TREE_CODE (ptr) != MEM_REF)
+                return 0;
+
+              tree mem_off = TREE_OPERAND (ptr, 1);
+              if (!integer_zerop (mem_off))
+                return 0;
+
+              ptr = TREE_OPERAND (ptr, 0);
+            }
+          else if (rhs_code == POINTER_PLUS_EXPR)
+            {
+              ptr = gimple_assign_rhs1 (def_stmt);
+              off = gimple_assign_rhs2 (def_stmt);
+            }
+          else
+            return 0;
+
+          if (TREE_CODE (ptr) != SSA_NAME
+              || !tree_fits_shwi_p (off))
+            return 0;
+          HOST_WIDE_INT this_off = tree_to_shwi (off);
+          if (this_off < 0)
+            return 0;
+          offset = (unsigned HOST_WIDE_INT) offset + this_off;
+          if (offset < 0)
+            return 0;
+          if (ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)])
+            {
+              strinfo *si
+                = get_strinfo (ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)]);
+              if (si && compare_nonzero_chars (si, offset) >= 0)
+                return get_stridx_plus_constant (si, offset, exp);
+            }
+          e = ptr;
+        }
       return 0;
     }
 

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] handle vla plus offset in strlen (PR 90662)
  2019-06-05 22:51 [PATCH] handle vla plus offset in strlen (PR 90662) Martin Sebor
@ 2019-06-06 21:53 ` Jeff Law
  2019-06-07 19:12   ` Martin Sebor
  0 siblings, 1 reply; 4+ messages in thread
From: Jeff Law @ 2019-06-06 21:53 UTC (permalink / raw)
  To: Martin Sebor, gcc-patches

On 6/5/19 4:51 PM, Martin Sebor wrote:
> One of my new tests for the strlen/sprintf integration tripped
> over an incomplete handling of VLAs by the strlen pass.  Where
> it can determine the length of a substring at some offset with
> other kinds of arrays, the pass fails with VLAs because they
> are represented as pointers to arrays.
> 
> The attached patch adds the missing handling so that code like
> the following can be fully folded even for VLAs.
> 
>   int f (int n)
>   {
>     char a[n];
>     strcpy (a, "12345");
>     if (strlen (&a[2]) != 3)
>       abort ();
>   }
> 
> Tested on x86_64-linux.
> 
> Martin
> 
> gcc-90662.diff
> 
> PR tree-optimization/90662 - strlen of a string in a vla plus offset not folded
> 
> gcc/ChangeLog:
> 
> 	PR tree-optimization/90662
> 	* tree-ssa-strlen.c (get_stridx): Handle simple VLAs and pointers
> 	to arrays.
> 
> gcc/testsuite/ChangeLog:
> 
> 	PR tree-optimization/90662
> 	* gcc.dg/strlenopt-62.c: New test.
> 	* gcc.dg/strlenopt-63.c: New test.
We're relying on the fact that in a MEM_REF the offset is always a byte
offset, so no scaling for wchars needed, right?

OK for the trunk.
THanks,
Jeff

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] handle vla plus offset in strlen (PR 90662)
  2019-06-06 21:53 ` Jeff Law
@ 2019-06-07 19:12   ` Martin Sebor
  2019-06-11 20:12     ` Jeff Law
  0 siblings, 1 reply; 4+ messages in thread
From: Martin Sebor @ 2019-06-07 19:12 UTC (permalink / raw)
  To: Jeff Law, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 1895 bytes --]

On 6/6/19 3:53 PM, Jeff Law wrote:
> On 6/5/19 4:51 PM, Martin Sebor wrote:
>> One of my new tests for the strlen/sprintf integration tripped
>> over an incomplete handling of VLAs by the strlen pass.  Where
>> it can determine the length of a substring at some offset with
>> other kinds of arrays, the pass fails with VLAs because they
>> are represented as pointers to arrays.
>>
>> The attached patch adds the missing handling so that code like
>> the following can be fully folded even for VLAs.
>>
>>    int f (int n)
>>    {
>>      char a[n];
>>      strcpy (a, "12345");
>>      if (strlen (&a[2]) != 3)
>>        abort ();
>>    }
>>
>> Tested on x86_64-linux.
>>
>> Martin
>>
>> gcc-90662.diff
>>
>> PR tree-optimization/90662 - strlen of a string in a vla plus offset not folded
>>
>> gcc/ChangeLog:
>>
>> 	PR tree-optimization/90662
>> 	* tree-ssa-strlen.c (get_stridx): Handle simple VLAs and pointers
>> 	to arrays.
>>
>> gcc/testsuite/ChangeLog:
>>
>> 	PR tree-optimization/90662
>> 	* gcc.dg/strlenopt-62.c: New test.
>> 	* gcc.dg/strlenopt-63.c: New test.
> We're relying on the fact that in a MEM_REF the offset is always a byte
> offset, so no scaling for wchars needed, right?

I was assuming that the strlen pass only deals with narrow chars.
But it can be called on a wide character array (or an array of
any other type) even if it makes little sense, and the code isn't
prepared to handle that.  Thanks for the hint!  The attached
revision adds the scaling along with tests for non-char VLAs
and pointer to arrays.

Martin

> 
> OK for the trunk.
> THanks,
> Jeff
> 


[-- Attachment #2: gcc-90662.diff --]
[-- Type: text/x-patch, Size: 18689 bytes --]

PR tree-optimization/90662 - strlen of a string in a vla plus offset not folded

gcc/ChangeLog:

	PR tree-optimization/90662
	* tree-ssa-strlen.c (get_stridx): Handle simple VLAs and pointers
	to arrays.

gcc/testsuite/ChangeLog:

	PR tree-optimization/90662
	* gcc.dg/strlenopt-62.c: New test.
	* gcc.dg/strlenopt-63.c: New test.
	* gcc.dg/strlenopt-64.c: New test.

diff --git a/gcc/testsuite/gcc.dg/strlenopt-62.c b/gcc/testsuite/gcc.dg/strlenopt-62.c
new file mode 100644
index 00000000000..0e09a7ab0e1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/strlenopt-62.c
@@ -0,0 +1,189 @@
+/* PR tree-optimization/90662 - strlen of a string in a vla plus offset
+   not folded
+   { dg-do compile }
+   { dg-options "-O2 -Wall -fdump-tree-gimple -fdump-tree-optimized" } */
+
+#include "strlenopt.h"
+
+typedef __INT16_TYPE__   int16_t;
+typedef __INT32_TYPE__   int32_t;
+
+#define CONCAT(x, y) x ## y
+#define CAT(x, y) CONCAT (x, y)
+#define FAILNAME(name, counter) \
+  CAT (CAT (CAT (call_ ## name ##_on_line_, __LINE__), _), counter)
+
+#define FAIL(name, counter) do {			\
+    extern void FAILNAME (name, counter) (void);	\
+    FAILNAME (name, counter)();				\
+  } while (0)
+
+/* Macro to emit a call to funcation named
+     call_in_true_branch_not_eliminated_on_line_NNN()
+   for each call that's expected to be eliminated.  The dg-final
+   scan-tree-dump-time directive at the bottom of the test verifies
+   that no such call appears in output.  */
+#define ELIM(expr) \
+  if (!(expr)) FAIL (in_true_branch_not_eliminated, __COUNTER__); else (void)0
+
+#define ARGS(...) __VA_ARGS__
+
+void sink (void*, ...);
+
+
+#define T(Type, expect, init, p)			\
+  do {							\
+    Type vla[n];					\
+    char *ptr = strcpy ((char*)vla, init);		\
+    ELIM (expect == strlen (p));			\
+    sink (ptr);						\
+  } while (0)
+
+void test_char_vla_local (int n)
+{
+  T (char, 0, "", vla);
+  T (char, 0, "\0", vla);
+  T (char, 1, "1", vla);
+  T (char, 2, "12", vla);
+  T (char, 3, "123", vla);
+
+  T (char, 2, "123", vla + 1);
+  T (char, 1, "123", &vla[2]);
+  T (char, 0, "123", &vla[1] + 2);
+
+  T (char, 2, "123", &vla[2] - 1);
+  T (char, 3, "123", &vla[1] - 1);
+
+  T (char, 0, "", ptr);
+  T (char, 0, "\0", ptr);
+  T (char, 1, "1", ptr);
+  T (char, 2, "12", ptr);
+  T (char, 3, "123", ptr);
+
+  T (char, 2, "123", ptr + 1);
+  T (char, 1, "123", &ptr[2]);
+  T (char, 0, "123", &ptr[1] + 2);
+}
+
+void test_int16_vla_local (int n)
+{
+  T (int16_t, 0, "", (char*)vla);
+  T (int16_t, 0, "\0", (char*)vla);
+  T (int16_t, 1, "1", (char*)vla);
+  T (int16_t, 2, "12", (char*)vla);
+  T (int16_t, 3, "123", (char*)vla);
+
+  T (int16_t, 2, "1234", (char*)(vla + 1));
+  T (int16_t, 2, "123456", (char*)&vla[2]);
+  T (int16_t, 1, "123456", (char*)&vla[2] + 1);
+  T (int16_t, 0, "123456", (char*)&vla[2] + 2);
+  T (int16_t, 0, "123456", (char*)(&vla[1] + 2));
+
+  T (int16_t, 3, "123456", (char*)&vla[2] - 1);
+  T (int16_t, 4, "123456", (char*)&vla[2] - 2);
+
+  T (int16_t, 0, "", ptr);
+  T (int16_t, 0, "\0", ptr);
+  T (int16_t, 1, "1", ptr);
+  T (int16_t, 2, "12", ptr);
+  T (int16_t, 3, "123", ptr);
+
+  T (int16_t, 2, "123", ptr + 1);
+  T (int16_t, 1, "123", &ptr[2]);
+  T (int16_t, 0, "123", &ptr[1] + 2);
+}
+
+void test_int_vla_local (int n)
+{
+  T (int16_t, 0, "", (char*)vla);
+  T (int16_t, 0, "\0", (char*)vla);
+  T (int16_t, 1, "1", (char*)vla);
+  T (int16_t, 2, "12", (char*)vla);
+  T (int16_t, 3, "123", (char*)vla);
+
+  T (int16_t, 2, "1234", (char*)(vla + 1));
+  T (int16_t, 2, "123456", (char*)&vla[2]);
+  T (int16_t, 1, "123456", (char*)&vla[2] + 1);
+  T (int16_t, 0, "123456", (char*)&vla[2] + 2);
+  T (int16_t, 0, "123456", (char*)(&vla[1] + 2));
+
+  T (int, 0, "", ptr);
+  T (int, 0, "\0", ptr);
+  T (int, 1, "1", ptr);
+  T (int, 2, "12", ptr);
+  T (int, 3, "123", ptr);
+
+  T (int, 2, "123", ptr + 1);
+  T (int, 1, "123", &ptr[2]);
+  T (int, 0, "123", &ptr[1] + 2);
+}
+
+
+#undef T
+#define T(Type, expect, parr, init, p)			\
+  do {							\
+    Type (*parray)[] = *ppa++;				\
+    char *ptr = strcpy ((char*)parr, init);		\
+    (void)&ptr;						\
+    ELIM (expect == strlen (p));			\
+  } while (0)
+
+/* Have the function take a pointer to pointers to arrays so that each
+   test case can use its own pointer to avoid interference between.  */
+
+void test_char_array_ptr (char (**ppa)[])
+{
+  T (char, 0, *parray, "", *parray);
+  T (char, 0, *parray, "", &(*parray)[0]);
+
+  T (char, 1, *parray, "1", &(*parray)[0]);
+  T (char, 0, *parray, "1", &(*parray)[1]);
+
+  T (char, 2, *parray, "12", &(*parray)[0]);
+  T (char, 1, *parray, "12", &(*parray)[1]);
+  T (char, 0, *parray, "12", &(*parray)[2]);
+
+  T (char, 3, *parray, "123", &(*parray)[0]);
+  T (char, 2, *parray, "123", &(*parray)[1]);
+  T (char, 1, *parray, "123", &(*parray)[2]);
+  T (char, 0, *parray, "123", &(*parray)[3]);
+
+  T (char, 3, *parray, "123", ptr);
+  T (char, 2, *parray, "123", &ptr[1]);
+  T (char, 1, *parray, "123", &ptr[2]);
+  T (char, 0, *parray, "123", &ptr[3]);
+}
+
+void test_int16_array_ptr (int16_t (**ppa)[])
+{
+  T (int16_t, 0, *parray, "", (char*)*parray);
+  T (int16_t, 0, *parray, "", (char*)&(*parray)[0]);
+
+  T (int16_t, 1, *parray, "1", (char*)&(*parray)[0]);
+  T (int16_t, 0, *parray, "12", (char*)&(*parray)[1]);
+
+  T (int16_t, 2, *parray, "12", (char*)&(*parray)[0]);
+  T (int16_t, 1, *parray, "12", (char*)&(*parray)[0] + 1);
+  T (int16_t, 2, *parray, "1234", (char*)&(*parray)[1]);
+  T (int16_t, 1, *parray, "1234", (char*)&(*parray)[1] + 1);
+  T (int16_t, 0, *parray, "1234", (char*)&(*parray)[2]);
+
+  T (int16_t, 6, *parray, "123456", (char*)&(*parray)[0]);
+  T (int16_t, 5, *parray, "123456", (char*)&(*parray)[0] + 1);
+  T (int16_t, 0, *parray, "123456", (char*)&(*parray)[0] + 6);
+  T (int16_t, 4, *parray, "123456", (char*)&(*parray)[1]);
+  T (int16_t, 3, *parray, "123456", (char*)&(*parray)[1] + 1);
+  T (int16_t, 0, *parray, "123456", (char*)&(*parray)[1] + 4);
+  T (int16_t, 2, *parray, "123456", (char*)&(*parray)[2]);
+  T (int16_t, 1, *parray, "123456", (char*)&(*parray)[2] + 1);
+  T (int16_t, 0, *parray, "123456", (char*)&(*parray)[2] + 2);
+  T (int16_t, 0, *parray, "123456", (char*)&(*parray)[3]);
+
+  T (int16_t, 3, *parray, "123", (char*)ptr);
+  T (int16_t, 2, *parray, "123", (char*)&ptr[1]);
+  T (int16_t, 1, *parray, "123", (char*)&ptr[2]);
+  T (int16_t, 0, *parray, "123", (char*)&ptr[3]);
+}
+
+/* { dg-final { scan-tree-dump-times "strlen" 0 "optimized" } }
+   { dg-final { scan-tree-dump-times "not_eliminated" 0 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/strlenopt-63.c b/gcc/testsuite/gcc.dg/strlenopt-63.c
new file mode 100644
index 00000000000..f77db6bbe7a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/strlenopt-63.c
@@ -0,0 +1,158 @@
+/* PR tree-optimization/90662 - strlen of a string in a vla plus offset
+   not folded
+   Verify that strlen of pointers to char arrays are computed correctly
+   (whether folded or not).
+   { dg-do run }
+   { dg-options "-O2 -Wall" } */
+
+#include "strlenopt.h"
+
+#define A(expr)                                                 \
+  ((expr)                                                       \
+   ? (void)0                                                    \
+   : (__builtin_printf ("assertion failed on line %i: %s\n",    \
+                        __LINE__, #expr),                       \
+      __builtin_abort ()))
+
+typedef char A5[5];
+
+A5 a5[5];
+A5* p[5] = { &a5[4], &a5[3], &a5[2], &a5[1], &a5[0] };
+
+__attribute__ ((noclone, noinline, noipa))
+void deref_deref (void)
+{
+  strcpy (**p, "12345");
+  A (strlen (**p) == 5);
+}
+
+__attribute__ ((noclone, noinline, noipa))
+void deref_idx_0 (void)
+{
+  strcpy (*p[0], "");
+  A (strlen (*p[0]) == 0);
+}
+
+__attribute__ ((noclone, noinline, noipa))
+void deref_idx_1 (void)
+{
+  strcpy (*p[1], "1");
+  A (strlen (*p[1]) == 1);
+  A (strlen (&(*p[1])[1]) == 0);
+
+  A (strlen (*p[0]) == 0);
+}
+
+__attribute__ ((noclone, noinline, noipa))
+void deref_idx_2 (void)
+{
+  strcpy (*p[2], "12");
+  A (strlen (*p[2]) == 2);
+  A (strlen (&(*p[2])[1]) == 1);
+  A (strlen (&(*p[2])[2]) == 0);
+
+  A (strlen (*p[1]) == 1);
+  A (strlen (*p[0]) == 0);
+}
+
+__attribute__ ((noclone, noinline, noipa))
+void deref_idx_3 (void)
+{
+  strcpy (*p[3], "123");
+  A (strlen (*p[3]) == 3);
+  A (strlen (&(*p[3])[1]) == 2);
+  A (strlen (&(*p[3])[2]) == 1);
+  A (strlen (&(*p[3])[3]) == 0);
+
+  A (strlen (*p[2]) == 2);
+  A (strlen (*p[1]) == 1);
+  A (strlen (*p[0]) == 0);
+}
+
+__attribute__ ((noclone, noinline, noipa))
+void deref_idx_4 (void)
+{
+  strcpy (*p[4], "1234");
+  A (strlen (*p[4]) == 4);
+  A (strlen (&(*p[4])[1]) == 3);
+  A (strlen (&(*p[4])[2]) == 2);
+  A (strlen (&(*p[4])[3]) == 1);
+  A (strlen (&(*p[4])[4]) == 0);
+
+  A (strlen (*p[3]) == 3);
+  A (strlen (*p[2]) == 2);
+  A (strlen (*p[1]) == 1);
+  A (strlen (*p[0]) == 0);
+}
+
+__attribute__ ((noclone, noinline, noipa))
+void deref_idx_4_x (void)
+{
+  strcpy (*p[4], "");
+  A (strlen (*p[4]) == 0);
+  A (strlen (*p[3]) == 3);
+  A (strlen (*p[2]) == 2);
+  A (strlen (*p[1]) == 1);
+  A (strlen (*p[0]) == 0);
+}
+
+__attribute__ ((noclone, noinline, noipa))
+void deref_idx_3_x (void)
+{
+  strcpy (&(*p[3])[0], "1");
+  A (strlen (*p[4]) == 0);
+  A (strlen (*p[3]) == 1);
+  A (strlen (*p[2]) == 2);
+  A (strlen (*p[1]) == 1);
+  A (strlen (*p[0]) == 0);
+}
+
+__attribute__ ((noclone, noinline, noipa))
+void deref_idx_2_x (void)
+{
+  strcpy (*p[2], "12");
+  A (strlen (*p[4]) == 0);
+  A (strlen (*p[3]) == 1);
+  A (strlen (*p[2]) == 2);
+  A (strlen (*p[1]) == 1);
+  A (strlen (*p[0]) == 0);
+}
+
+__attribute__ ((noclone, noinline, noipa))
+void deref_idx_1_x (void)
+{
+  strcpy (*p[1], "123");
+  A (strlen (*p[4]) == 0);
+  A (strlen (*p[3]) == 1);
+  A (strlen (*p[2]) == 2);
+  A (strlen (*p[1]) == 3);
+  A (strlen (*p[0]) == 0);
+}
+
+__attribute__ ((noclone, noinline, noipa))
+void deref_idx_0_x (void)
+{
+  strcpy (*p[0], "1234");
+  A (strlen (*p[4]) == 0);
+  A (strlen (*p[3]) == 1);
+  A (strlen (*p[2]) == 2);
+  A (strlen (*p[1]) == 3);
+  A (strlen (*p[0]) == 4);
+}
+
+int main (void)
+{
+  deref_deref ();
+
+  deref_idx_0 ();
+  deref_idx_1 ();
+  deref_idx_2 ();
+  deref_idx_3 ();
+  deref_idx_4 ();
+
+  deref_idx_4_x ();
+  deref_idx_3_x ();
+  deref_idx_2_x ();
+  deref_idx_1_x ();
+  deref_idx_0_x ();
+}
diff --git a/gcc/testsuite/gcc.dg/strlenopt-64.c b/gcc/testsuite/gcc.dg/strlenopt-64.c
new file mode 100644
index 00000000000..5451457dc31
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/strlenopt-64.c
@@ -0,0 +1,182 @@
+/* PR tree-optimization/90662 - strlen of a string in a vla plus offset
+   not folded
+   Verify that strlen of pointers to wide character arrays (emulated
+   by int16_t) are computed correctly (whether folded or not).
+   { dg-do run }
+   { dg-options "-O2 -Wall -Wno-incompatible-pointer-types" } */
+
+#include "strlenopt.h"
+
+typedef __INT16_TYPE__ int16_t;
+
+#define A(expr)                                                 \
+  ((expr)                                                       \
+   ? (void)0                                                    \
+   : (__builtin_printf ("assertion failed on line %i: %s\n",    \
+                        __LINE__, #expr),                       \
+      __builtin_abort ()))
+
+typedef int16_t A5[5];
+
+A5 a5[5];
+A5* p[5] = { &a5[4], &a5[3], &a5[2], &a5[1], &a5[0] };
+
+__attribute__ ((noclone, noinline, noipa))
+void deref_deref (void)
+{
+  strcpy (**p, "12345");
+  A (strlen (**p) == 5);
+}
+
+__attribute__ ((noclone, noinline, noipa))
+void deref_idx_0 (void)
+{
+  strcpy (*p[0], "");
+  A (strlen (*p[0]) == 0);
+}
+
+__attribute__ ((noclone, noinline, noipa))
+void deref_idx_1 (void)
+{
+  strcpy (*p[1], "12");
+  A (strlen (*p[1]) == 2);
+  A (strlen (&(*p[1])[1]) == 0);
+
+  A (strlen ((char*)*p[1] + 1) == 1);
+  A (strlen ((char*)*p[1] + 2) == 0);
+  A (strlen ((char*)*p[1] + 3) == 0);
+
+  A (strlen ((char*)&(*p[1])[1] + 1) == 0);
+
+  A (strlen (*p[0]) == 0);
+}
+
+__attribute__ ((noclone, noinline, noipa))
+void deref_idx_2 (void)
+{
+  strcpy (*p[2], "1234");
+  A (strlen (*p[2]) == 4);
+  A (strlen (&(*p[2])[1]) == 2);
+  A (strlen (&(*p[2])[2]) == 0);
+
+  A (strlen ((char*)*p[2] + 1) == 3);
+  A (strlen ((char*)*p[2] + 2) == 2);
+  A (strlen ((char*)*p[2] + 3) == 1);
+  A (strlen ((char*)*p[2] + 4) == 0);
+  A (strlen ((char*)*p[2] + 5) == 0);
+
+  A (strlen ((char*)&(*p[2])[1] + 1) == 1);
+  A (strlen ((char*)&(*p[2])[1] + 2) == 0);
+
+  A (strlen (*p[1]) == 2);
+  A (strlen (*p[0]) == 0);
+}
+
+__attribute__ ((noclone, noinline, noipa))
+void deref_idx_3 (void)
+{
+  strcpy (*p[3], "123456");
+  A (strlen (*p[3]) == 6);
+  A (strlen (&(*p[3])[1]) == 4);
+  A (strlen (&(*p[3])[2]) == 2);
+  A (strlen (&(*p[3])[3]) == 0);
+
+  A (strlen ((char*)*p[3] + 1) == 5);
+  A (strlen ((char*)*p[3] + 2) == 4);
+  A (strlen ((char*)*p[3] + 3) == 3);
+  A (strlen ((char*)*p[3] + 4) == 2);
+  A (strlen ((char*)*p[3] + 5) == 1);
+  A (strlen ((char*)*p[3] + 6) == 0);
+
+  A (strlen (*p[2]) == 4);
+  A (strlen (*p[1]) == 2);
+  A (strlen (*p[0]) == 0);
+}
+
+__attribute__ ((noclone, noinline, noipa))
+void deref_idx_4 (void)
+{
+  strcpy (*p[4], "12345678");
+  A (strlen (*p[4]) == 8);
+  A (strlen (&(*p[4])[1]) == 6);
+  A (strlen (&(*p[4])[2]) == 4);
+  A (strlen (&(*p[4])[3]) == 2);
+  A (strlen (&(*p[4])[4]) == 0);
+
+  A (strlen (*p[3]) == 6);
+  A (strlen (*p[2]) == 4);
+  A (strlen (*p[1]) == 2);
+  A (strlen (*p[0]) == 0);
+}
+
+__attribute__ ((noclone, noinline, noipa))
+void deref_idx_4_x (void)
+{
+  strcpy (*p[4], "");
+  A (strlen (*p[4]) == 0);
+  A (strlen (*p[3]) == 6);
+  A (strlen (*p[2]) == 4);
+  A (strlen (*p[1]) == 2);
+  A (strlen (*p[0]) == 0);
+}
+
+__attribute__ ((noclone, noinline, noipa))
+void deref_idx_3_x (void)
+{
+  strcpy (&(*p[3])[0], "1");
+  A (strlen (*p[4]) == 0);
+  A (strlen (*p[3]) == 1);
+  A (strlen (*p[2]) == 4);
+  A (strlen (*p[1]) == 2);
+  A (strlen (*p[0]) == 0);
+}
+
+__attribute__ ((noclone, noinline, noipa))
+void deref_idx_2_x (void)
+{
+  strcpy (*p[2], "12");
+  A (strlen (*p[4]) == 0);
+  A (strlen (*p[3]) == 1);
+  A (strlen (*p[2]) == 2);
+  A (strlen (*p[1]) == 2);
+  A (strlen (*p[0]) == 0);
+}
+
+__attribute__ ((noclone, noinline, noipa))
+void deref_idx_1_x (void)
+{
+  strcpy (*p[1], "123");
+  A (strlen (*p[4]) == 0);
+  A (strlen (*p[3]) == 1);
+  A (strlen (*p[2]) == 2);
+  A (strlen (*p[1]) == 3);
+  A (strlen (*p[0]) == 0);
+}
+
+__attribute__ ((noclone, noinline, noipa))
+void deref_idx_0_x (void)
+{
+  strcpy (*p[0], "1234");
+  A (strlen (*p[4]) == 0);
+  A (strlen (*p[3]) == 1);
+  A (strlen (*p[2]) == 2);
+  A (strlen (*p[1]) == 3);
+  A (strlen (*p[0]) == 4);
+}
+
+int main (void)
+{
+  deref_deref ();
+
+  deref_idx_0 ();
+  deref_idx_1 ();
+  deref_idx_2 ();
+  deref_idx_3 ();
+  deref_idx_4 ();
+
+  deref_idx_4_x ();
+  deref_idx_3_x ();
+  deref_idx_2_x ();
+  deref_idx_1_x ();
+  deref_idx_0_x ();
+}
diff --git a/gcc/tree-ssa-strlen.c b/gcc/tree-ssa-strlen.c
index a2dc9c7b102..005319f7661 100644
--- a/gcc/tree-ssa-strlen.c
+++ b/gcc/tree-ssa-strlen.c
@@ -295,36 +295,79 @@ get_stridx (tree exp)
   if (TREE_CODE (exp) == SSA_NAME)
     {
       if (ssa_ver_to_stridx[SSA_NAME_VERSION (exp)])
-	return ssa_ver_to_stridx[SSA_NAME_VERSION (exp)];
-      int i;
+        return ssa_ver_to_stridx[SSA_NAME_VERSION (exp)];
+
       tree e = exp;
-      HOST_WIDE_INT off = 0;
-      for (i = 0; i < 5; i++)
-	{
-	  gimple *def_stmt = SSA_NAME_DEF_STMT (e);
-	  if (!is_gimple_assign (def_stmt)
-	      || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR)
-	    return 0;
-	  tree rhs1 = gimple_assign_rhs1 (def_stmt);
-	  tree rhs2 = gimple_assign_rhs2 (def_stmt);
-	  if (TREE_CODE (rhs1) != SSA_NAME
-	      || !tree_fits_shwi_p (rhs2))
-	    return 0;
-	  HOST_WIDE_INT this_off = tree_to_shwi (rhs2);
-	  if (this_off < 0)
-	    return 0;
-	  off = (unsigned HOST_WIDE_INT) off + this_off;
-	  if (off < 0)
-	    return 0;
-	  if (ssa_ver_to_stridx[SSA_NAME_VERSION (rhs1)])
-	    {
-	      strinfo *si
-		= get_strinfo (ssa_ver_to_stridx[SSA_NAME_VERSION (rhs1)]);
-	      if (si && compare_nonzero_chars (si, off) >= 0)
-		return get_stridx_plus_constant (si, off, exp);
-	    }
-	  e = rhs1;
-	}
+      HOST_WIDE_INT offset = 0;
+      /* Follow a chain of at most 5 assignments.  */
+      for (int i = 0; i < 5; i++)
+        {
+          gimple *def_stmt = SSA_NAME_DEF_STMT (e);
+          if (!is_gimple_assign (def_stmt))
+            return 0;
+
+          tree_code rhs_code = gimple_assign_rhs_code (def_stmt);
+          tree ptr, off;
+
+          if (rhs_code == ADDR_EXPR)
+            {
+              /* Handle indices/offsets into VLAs which are implemented
+                 as pointers to arrays.  */
+              ptr = gimple_assign_rhs1 (def_stmt);
+              ptr = TREE_OPERAND (ptr, 0);
+
+	      /* Handle also VLAs of types larger than char. */
+	      if (tree eltsize = TYPE_SIZE_UNIT (TREE_TYPE (ptr)))
+		{
+		  if (TREE_CODE (ptr) == ARRAY_REF)
+		    {
+		      off = TREE_OPERAND (ptr, 1);
+		      /* Scale the array index by the size of the element
+			 type (normally 1 for char).  */
+		      off = fold_build2 (MULT_EXPR, TREE_TYPE (off), off,
+					 eltsize);
+		      ptr = TREE_OPERAND (ptr, 0);
+		    }
+		  else
+		    off = integer_zero_node;
+		}
+	      else
+		return 0;
+
+              if (TREE_CODE (ptr) != MEM_REF)
+                return 0;
+
+	      /* Add the MEM_REF byte offset.  */
+              tree mem_off = TREE_OPERAND (ptr, 1);
+	      off = fold_build2 (PLUS_EXPR, TREE_TYPE (off), off, mem_off);
+              ptr = TREE_OPERAND (ptr, 0);
+            }
+          else if (rhs_code == POINTER_PLUS_EXPR)
+            {
+              ptr = gimple_assign_rhs1 (def_stmt);
+              off = gimple_assign_rhs2 (def_stmt);
+            }
+          else
+            return 0;
+
+          if (TREE_CODE (ptr) != SSA_NAME
+              || !tree_fits_shwi_p (off))
+            return 0;
+          HOST_WIDE_INT this_off = tree_to_shwi (off);
+          if (this_off < 0)
+            return 0;
+          offset = (unsigned HOST_WIDE_INT) offset + this_off;
+          if (offset < 0)
+            return 0;
+          if (ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)])
+            {
+              strinfo *si
+                = get_strinfo (ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)]);
+              if (si && compare_nonzero_chars (si, offset) >= 0)
+                return get_stridx_plus_constant (si, offset, exp);
+            }
+          e = ptr;
+        }
       return 0;
     }
 

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] handle vla plus offset in strlen (PR 90662)
  2019-06-07 19:12   ` Martin Sebor
@ 2019-06-11 20:12     ` Jeff Law
  0 siblings, 0 replies; 4+ messages in thread
From: Jeff Law @ 2019-06-11 20:12 UTC (permalink / raw)
  To: Martin Sebor, gcc-patches

On 6/7/19 1:12 PM, Martin Sebor wrote:
> On 6/6/19 3:53 PM, Jeff Law wrote:
>> On 6/5/19 4:51 PM, Martin Sebor wrote:
>>> One of my new tests for the strlen/sprintf integration tripped
>>> over an incomplete handling of VLAs by the strlen pass.  Where
>>> it can determine the length of a substring at some offset with
>>> other kinds of arrays, the pass fails with VLAs because they
>>> are represented as pointers to arrays.
>>>
>>> The attached patch adds the missing handling so that code like
>>> the following can be fully folded even for VLAs.
>>>
>>>    int f (int n)
>>>    {
>>>      char a[n];
>>>      strcpy (a, "12345");
>>>      if (strlen (&a[2]) != 3)
>>>        abort ();
>>>    }
>>>
>>> Tested on x86_64-linux.
>>>
>>> Martin
>>>
>>> gcc-90662.diff
>>>
>>> PR tree-optimization/90662 - strlen of a string in a vla plus offset not folded
>>>
>>>
>>> gcc/ChangeLog:
>>>
>>>     PR tree-optimization/90662
>>>     * tree-ssa-strlen.c (get_stridx): Handle simple VLAs and pointers
>>>     to arrays.
>>>
>>> gcc/testsuite/ChangeLog:
>>>
>>>     PR tree-optimization/90662
>>>     * gcc.dg/strlenopt-62.c: New test.
>>>     * gcc.dg/strlenopt-63.c: New test.
>> We're relying on the fact that in a MEM_REF the offset is always a byte
>> offset, so no scaling for wchars needed, right?
> 
> I was assuming that the strlen pass only deals with narrow chars.
> But it can be called on a wide character array (or an array of
> any other type) even if it makes little sense, and the code isn't
> prepared to handle that.  Thanks for the hint!  The attached
> revision adds the scaling along with tests for non-char VLAs
> and pointer to arrays.
> 
> Martin
> 
>>
>> OK for the trunk.
>> THanks,
>> Jeff
>>
> 
> 
> gcc-90662.diff
> 
> PR tree-optimization/90662 - strlen of a string in a vla plus offset not folded
> 
> gcc/ChangeLog:
> 
> 	PR tree-optimization/90662
> 	* tree-ssa-strlen.c (get_stridx): Handle simple VLAs and pointers
> 	to arrays.
> 
> gcc/testsuite/ChangeLog:
> 
> 	PR tree-optimization/90662
> 	* gcc.dg/strlenopt-62.c: New test.
> 	* gcc.dg/strlenopt-63.c: New test.
> 	* gcc.dg/strlenopt-64.c: New test.
OK
jeff

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2019-06-11 20:12 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-06-05 22:51 [PATCH] handle vla plus offset in strlen (PR 90662) Martin Sebor
2019-06-06 21:53 ` Jeff Law
2019-06-07 19:12   ` Martin Sebor
2019-06-11 20:12     ` Jeff Law

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).