public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] fold strlen of constant aggregates (PR 83693)
@ 2018-01-08  2:11 Martin Sebor
  2018-01-08 11:41 ` Richard Biener
  0 siblings, 1 reply; 7+ messages in thread
From: Martin Sebor @ 2018-01-08  2:11 UTC (permalink / raw)
  To: Gcc Patch List

[-- Attachment #1: Type: text/plain, Size: 613 bytes --]

GCC is able to fold references to members of global aggregate
constants in many expressions but it doesn't known how do it
for strlen() arguments.  As a result, strlen calls with such
arguments such the one below are not optimized:

   const struct { char a[4]; } s = { "123" };

   void f (void)
   {
     if (s.a[3]) abort ();   // folded/eliminated
   }

   void g (void)
   {
     if (strlen (s.a) != 3) abort ();   // not folded
   }

The attached patch enables gimple_fold_builtin_strlen() to extract
constant strings from aggregate initializers, analogously to how
it extracts data of other types.

Martin

[-- Attachment #2: gcc-83693.diff --]
[-- Type: text/x-patch, Size: 9371 bytes --]

PR tree-optimization/83693 - missing strlen optimization for array of arrays

gcc/testsuite/ChangeLog:

	PR tree-optimization/83693
	* gcc.dg/strlenopt-42.c: New test.

gcc/ChangeLog:

	PR tree-optimization/83693
	* expr.c (string_constant): Handle a bare STRING_CST.
	* gimple-fold.c (gimple_fold_builtin_strlen): Try to extract
	a string from an aggregate CONSTRUCTOR first.

diff --git a/gcc/expr.c b/gcc/expr.c
index cd1e57d..fbb50ad 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -11371,7 +11371,7 @@ string_constant (tree arg, tree *ptr_offset)
 	  array = TREE_OPERAND (TREE_OPERAND (arg, 0), 0);
 	  offset = TREE_OPERAND (TREE_OPERAND (arg, 0), 1);
 	  if (TREE_CODE (array) != STRING_CST && !VAR_P (array))
-	    return 0;
+	    return NULL_TREE;
 
 	  /* Check if the array has a nonzero lower bound.  */
 	  lower_bound = array_ref_low_bound (TREE_OPERAND (arg, 0));
@@ -11379,9 +11379,9 @@ string_constant (tree arg, tree *ptr_offset)
 	    {
 	      /* If the offset and base aren't both constants, return 0.  */
 	      if (TREE_CODE (lower_bound) != INTEGER_CST)
-	        return 0;
+	        return NULL_TREE;
 	      if (TREE_CODE (offset) != INTEGER_CST)
-		return 0;
+		return NULL_TREE;
 	      /* Adjust offset by the lower bound.  */
 	      offset = size_diffop (fold_convert (sizetype, offset),
 				    fold_convert (sizetype, lower_bound));
@@ -11392,13 +11392,13 @@ string_constant (tree arg, tree *ptr_offset)
 	  array = TREE_OPERAND (TREE_OPERAND (arg, 0), 0);
 	  offset = TREE_OPERAND (TREE_OPERAND (arg, 0), 1);
 	  if (TREE_CODE (array) != ADDR_EXPR)
-	    return 0;
+	    return NULL_TREE;
 	  array = TREE_OPERAND (array, 0);
 	  if (TREE_CODE (array) != STRING_CST && !VAR_P (array))
-	    return 0;
+	    return NULL_TREE;
 	}
       else
-	return 0;
+	return NULL_TREE;
     }
   else if (TREE_CODE (arg) == PLUS_EXPR || TREE_CODE (arg) == POINTER_PLUS_EXPR)
     {
@@ -11423,10 +11423,15 @@ string_constant (tree arg, tree *ptr_offset)
 	  offset = arg0;
 	}
       else
-	return 0;
+	return NULL_TREE;
+    }
+  else if (TREE_CODE (arg) == STRING_CST)
+    {
+      *ptr_offset = size_zero_node;
+      return arg;
     }
   else
-    return 0;
+    return NULL_TREE;
 
   if (TREE_CODE (array) == STRING_CST)
     {
@@ -11442,14 +11447,14 @@ string_constant (tree arg, tree *ptr_offset)
       if (init == error_mark_node
 	  || !init
 	  || TREE_CODE (init) != STRING_CST)
-	return 0;
+	return NULL_TREE;
 
       /* Avoid const char foo[4] = "abcde";  */
       if (DECL_SIZE_UNIT (array) == NULL_TREE
 	  || TREE_CODE (DECL_SIZE_UNIT (array)) != INTEGER_CST
 	  || (length = TREE_STRING_LENGTH (init)) <= 0
 	  || compare_tree_int (DECL_SIZE_UNIT (array), length) < 0)
-	return 0;
+	return NULL_TREE;
 
       /* If variable is bigger than the string literal, OFFSET must be constant
 	 and inside of the bounds of the string literal.  */
@@ -11457,13 +11462,13 @@ string_constant (tree arg, tree *ptr_offset)
       if (compare_tree_int (DECL_SIZE_UNIT (array), length) > 0
 	  && (! tree_fits_uhwi_p (offset)
 	      || compare_tree_int (offset, length) >= 0))
-	return 0;
+	return NULL_TREE;
 
       *ptr_offset = offset;
       return init;
     }
 
-  return 0;
+  return NULL_TREE;
 }
 \f
 /* Generate code to calculate OPS, and exploded expression
diff --git a/gcc/gimple-fold.c b/gcc/gimple-fold.c
index fe3e0b4..f277324 100644
--- a/gcc/gimple-fold.c
+++ b/gcc/gimple-fold.c
@@ -3517,8 +3517,14 @@ gimple_fold_builtin_strlen (gimple_stmt_iterator *gsi)
   wide_int minlen;
   wide_int maxlen;
 
+  /* Try to extract a constant from an object's CONSTRUCTOR first.  */
+  tree arg = gimple_call_arg (stmt, 0);
+  if (TREE_CODE (arg) == ADDR_EXPR)
+    if (tree str = fold_const_aggregate_ref (TREE_OPERAND (arg, 0)))
+      arg = str;
+
   tree lenrange[2];
-  if (!get_range_strlen (gimple_call_arg (stmt, 0), lenrange)
+  if (!get_range_strlen (arg, lenrange)
       && lenrange[0] && TREE_CODE (lenrange[0]) == INTEGER_CST
       && lenrange[1] && TREE_CODE (lenrange[1]) == INTEGER_CST)
     {
diff --git a/gcc/testsuite/gcc.dg/strlenopt-42.c b/gcc/testsuite/gcc.dg/strlenopt-42.c
new file mode 100644
index 0000000..c0b4ece
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/strlenopt-42.c
@@ -0,0 +1,158 @@
+/* PR tree-optimization/83693 - missing strlen optimization for array
+   of arrays
+   { dg-do compile }
+   { dg-options "-O1 -fdump-tree-optimized" } */
+
+#include "strlenopt.h"
+
+#define DIFF_MAX __PTRDIFF_MAX__
+
+#define CAT(x, y) x ## y
+#define CONCAT(x, y) CAT (x, y)
+#define FAILNAME(name) CONCAT (call_ ## name ##_on_line_, __LINE__)
+
+#define FAIL(name) do {				\
+    extern void FAILNAME (name) (void);		\
+    FAILNAME (name)();				\
+  } while (0)
+
+/* Macro to emit a call to function named
+     call_in_true_branch_not_eliminated_on_line_NNN()
+   for each call that's expected to be eliminated.  The dg-final
+   scan-tree-dump-time directive at the bottom of the test verifies
+   that no such call appears in output.  */
+#define ELIM(expr)					\
+  if (!(expr)) FAIL (in_true_branch_not_eliminated);	\
+  else (void)0
+
+#define STR "0123456789abcdefghijklmnopqrtsuvwxyz"
+#define S(N) (STR + sizeof (STR) - N - 1)
+
+const char a5_5[5][5] = {
+  "", "1", "12", "123", "1234"
+};
+
+const char b5_5[5][5] = {
+  [1] = "1", [0] = "", [3] = "123", [4] = "1234", [2] = "12"
+};
+
+const char* const a3_6_17[3][6] = {
+  { S (0), S (1), S (2), S (3), S (4), S (5) },
+  { S (6), S (7), S (8), S (9), S (10), S (11) },
+  { S (12), S (13), S (14), S (15), S (16), S (17) },
+};
+
+void test_arrays (void)
+{
+  ELIM (strlen (a5_5[0]) == 0);
+  ELIM (strlen (a5_5[1]) == 1);
+  ELIM (strlen (a5_5[2]) == 2);
+  ELIM (strlen (a5_5[3]) == 3);
+  ELIM (strlen (a5_5[4]) == 4);
+
+  ELIM (strlen (b5_5[0]) == 0);
+  ELIM (strlen (b5_5[1]) == 1);
+  ELIM (strlen (b5_5[2]) == 2);
+  ELIM (strlen (b5_5[3]) == 3);
+  ELIM (strlen (b5_5[4]) == 4);
+
+  ELIM (strlen (a3_6_17[0][0]) == 0);
+  ELIM (strlen (a3_6_17[0][1]) == 1);
+  ELIM (strlen (a3_6_17[0][2]) == 2);
+  ELIM (strlen (a3_6_17[0][3]) == 3);
+  ELIM (strlen (a3_6_17[0][4]) == 4);
+  ELIM (strlen (a3_6_17[0][5]) == 5);
+  ELIM (strlen (a3_6_17[1][0]) == 6);
+  ELIM (strlen (a3_6_17[1][1]) == 7);
+  ELIM (strlen (a3_6_17[1][2]) == 8);
+  ELIM (strlen (a3_6_17[1][3]) == 9);
+  ELIM (strlen (a3_6_17[1][4]) == 10);
+  ELIM (strlen (a3_6_17[1][5]) == 11);
+  ELIM (strlen (a3_6_17[2][0]) == 12);
+  ELIM (strlen (a3_6_17[2][1]) == 13);
+  ELIM (strlen (a3_6_17[2][2]) == 14);
+  ELIM (strlen (a3_6_17[2][3]) == 15);
+  ELIM (strlen (a3_6_17[2][4]) == 16);
+  ELIM (strlen (a3_6_17[2][5]) == 17);
+}
+
+struct MemberStrings
+{
+  char a5_5[5][5];
+  const char* a3_6_17[3][6];
+};
+
+const struct MemberStrings ma[] = {
+  {
+    { "", "1", "12", "123", "1234" },
+    {
+      { S (0), S (1), S (2), S (3), S (4), S (5) },
+      { S (6), S (7), S (8), S (9), S (10), S (11) },
+      { S (12), S (13), S (14), S (15), S (16), S (17) },
+    }
+  },
+
+  {
+    { "1234", "123", "12", "1", "" },
+    {
+      { S (17), S (16), S (15), S (14), S (13), S (12) },
+      { S (11), S (10), S (9), S (8), S (7) , S (6) },
+      { S (5), S (4), S (3), S (2), S (1), S (0) },
+    }
+  }
+};
+
+void test_member_arrays (void)
+{
+  ELIM (strlen (ma[0].a5_5[0]) == 0);
+  ELIM (strlen (ma[0].a5_5[1]) == 1);
+  ELIM (strlen (ma[0].a5_5[2]) == 2);
+  ELIM (strlen (ma[0].a5_5[3]) == 3);
+  ELIM (strlen (ma[0].a5_5[4]) == 4);
+
+  ELIM (strlen (ma[0].a3_6_17[0][0]) == 0);
+  ELIM (strlen (ma[0].a3_6_17[0][1]) == 1);
+  ELIM (strlen (ma[0].a3_6_17[0][2]) == 2);
+  ELIM (strlen (ma[0].a3_6_17[0][3]) == 3);
+  ELIM (strlen (ma[0].a3_6_17[0][4]) == 4);
+  ELIM (strlen (ma[0].a3_6_17[0][5]) == 5);
+  ELIM (strlen (ma[0].a3_6_17[1][0]) == 6);
+  ELIM (strlen (ma[0].a3_6_17[1][1]) == 7);
+  ELIM (strlen (ma[0].a3_6_17[1][2]) == 8);
+  ELIM (strlen (ma[0].a3_6_17[1][3]) == 9);
+  ELIM (strlen (ma[0].a3_6_17[1][4]) == 10);
+  ELIM (strlen (ma[0].a3_6_17[1][5]) == 11);
+  ELIM (strlen (ma[0].a3_6_17[2][0]) == 12);
+  ELIM (strlen (ma[0].a3_6_17[2][1]) == 13);
+  ELIM (strlen (ma[0].a3_6_17[2][2]) == 14);
+  ELIM (strlen (ma[0].a3_6_17[2][3]) == 15);
+  ELIM (strlen (ma[0].a3_6_17[2][4]) == 16);
+  ELIM (strlen (ma[0].a3_6_17[2][5]) == 17);
+
+  ELIM (strlen (ma[1].a5_5[0]) == 4);
+  ELIM (strlen (ma[1].a5_5[1]) == 3);
+  ELIM (strlen (ma[1].a5_5[2]) == 2);
+  ELIM (strlen (ma[1].a5_5[3]) == 1);
+  ELIM (strlen (ma[1].a5_5[4]) == 0);
+
+  ELIM (strlen (ma[1].a3_6_17[0][0]) == 17);
+  ELIM (strlen (ma[1].a3_6_17[0][1]) == 16);
+  ELIM (strlen (ma[1].a3_6_17[0][2]) == 15);
+  ELIM (strlen (ma[1].a3_6_17[0][3]) == 14);
+  ELIM (strlen (ma[1].a3_6_17[0][4]) == 13);
+  ELIM (strlen (ma[1].a3_6_17[0][5]) == 12);
+  ELIM (strlen (ma[1].a3_6_17[1][0]) == 11);
+  ELIM (strlen (ma[1].a3_6_17[1][1]) == 10);
+  ELIM (strlen (ma[1].a3_6_17[1][2]) == 9);
+  ELIM (strlen (ma[1].a3_6_17[1][3]) == 8);
+  ELIM (strlen (ma[1].a3_6_17[1][4]) == 7);
+  ELIM (strlen (ma[1].a3_6_17[1][5]) == 6);
+  ELIM (strlen (ma[1].a3_6_17[2][0]) == 5);
+  ELIM (strlen (ma[1].a3_6_17[2][1]) == 4);
+  ELIM (strlen (ma[1].a3_6_17[2][2]) == 3);
+  ELIM (strlen (ma[1].a3_6_17[2][3]) == 2);
+  ELIM (strlen (ma[1].a3_6_17[2][4]) == 1);
+  ELIM (strlen (ma[1].a3_6_17[2][5]) == 0);
+}
+
+/* { dg-final { scan-tree-dump-times "_not_eliminated" 0 "optimized" } } */

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] fold strlen of constant aggregates (PR 83693)
  2018-01-08  2:11 [PATCH] fold strlen of constant aggregates (PR 83693) Martin Sebor
@ 2018-01-08 11:41 ` Richard Biener
  2018-01-09  1:20   ` Martin Sebor
  0 siblings, 1 reply; 7+ messages in thread
From: Richard Biener @ 2018-01-08 11:41 UTC (permalink / raw)
  To: Martin Sebor; +Cc: Gcc Patch List

On Mon, Jan 8, 2018 at 3:11 AM, Martin Sebor <msebor@gmail.com> wrote:
> GCC is able to fold references to members of global aggregate
> constants in many expressions but it doesn't known how do it
> for strlen() arguments.  As a result, strlen calls with such
> arguments such the one below are not optimized:
>
>   const struct { char a[4]; } s = { "123" };
>
>   void f (void)
>   {
>     if (s.a[3]) abort ();   // folded/eliminated
>   }
>
>   void g (void)
>   {
>     if (strlen (s.a) != 3) abort ();   // not folded
>   }
>
> The attached patch enables gimple_fold_builtin_strlen() to extract
> constant strings from aggregate initializers, analogously to how
> it extracts data of other types.

Hmm.  You do

diff --git a/gcc/gimple-fold.c b/gcc/gimple-fold.c
index fe3e0b4..f277324 100644
--- a/gcc/gimple-fold.c
+++ b/gcc/gimple-fold.c
@@ -3517,8 +3517,14 @@ gimple_fold_builtin_strlen (gimple_stmt_iterator *gsi)
   wide_int minlen;
   wide_int maxlen;

+  /* Try to extract a constant from an object's CONSTRUCTOR first.  */
+  tree arg = gimple_call_arg (stmt, 0);
+  if (TREE_CODE (arg) == ADDR_EXPR)
+    if (tree str = fold_const_aggregate_ref (TREE_OPERAND (arg, 0)))
+      arg = str;
+

(patch is not against trunk?) but then fold_const_aggregate_ref of, say, &s.a[2]
will simply return '3' which then yields to a bougs result?

So I'm not sure this flys as-is or at least needs a comment how such
simplification
will end up _not_ disturbing the following code (maybe by noticing '3'
aka an INTEGER_CST
isn't a valid string and thus not folding).

Richard.

> Martin

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] fold strlen of constant aggregates (PR 83693)
  2018-01-08 11:41 ` Richard Biener
@ 2018-01-09  1:20   ` Martin Sebor
  2018-01-09 21:43     ` Martin Sebor
  0 siblings, 1 reply; 7+ messages in thread
From: Martin Sebor @ 2018-01-09  1:20 UTC (permalink / raw)
  To: Richard Biener; +Cc: Gcc Patch List

[-- Attachment #1: Type: text/plain, Size: 2533 bytes --]

On 01/08/2018 04:40 AM, Richard Biener wrote:
> On Mon, Jan 8, 2018 at 3:11 AM, Martin Sebor <msebor@gmail.com> wrote:
>> GCC is able to fold references to members of global aggregate
>> constants in many expressions but it doesn't known how do it
>> for strlen() arguments.  As a result, strlen calls with such
>> arguments such the one below are not optimized:
>>
>>   const struct { char a[4]; } s = { "123" };
>>
>>   void f (void)
>>   {
>>     if (s.a[3]) abort ();   // folded/eliminated
>>   }
>>
>>   void g (void)
>>   {
>>     if (strlen (s.a) != 3) abort ();   // not folded
>>   }
>>
>> The attached patch enables gimple_fold_builtin_strlen() to extract
>> constant strings from aggregate initializers, analogously to how
>> it extracts data of other types.
>
> Hmm.  You do
>
> diff --git a/gcc/gimple-fold.c b/gcc/gimple-fold.c
> index fe3e0b4..f277324 100644
> --- a/gcc/gimple-fold.c
> +++ b/gcc/gimple-fold.c
> @@ -3517,8 +3517,14 @@ gimple_fold_builtin_strlen (gimple_stmt_iterator *gsi)
>    wide_int minlen;
>    wide_int maxlen;
>
> +  /* Try to extract a constant from an object's CONSTRUCTOR first.  */
> +  tree arg = gimple_call_arg (stmt, 0);
> +  if (TREE_CODE (arg) == ADDR_EXPR)
> +    if (tree str = fold_const_aggregate_ref (TREE_OPERAND (arg, 0)))
> +      arg = str;
> +
>
> (patch is not against trunk?)

I forgot to mention that the patch is on top of the one for PR
83671 (improve early strlen range folding).  But that's not
the case anymore with the updated patch.

> but then fold_const_aggregate_ref of, say, &s.a[2]
> will simply return '3' which then yields to a bougs result?
>
> So I'm not sure this flys as-is or at least needs a comment how such
> simplification
> will end up _not_ disturbing the following code (maybe by noticing '3'
> aka an INTEGER_CST
> isn't a valid string and thus not folding).

After sleeping on it I realized that although enhancing
gimple_fold_builtin_strlen is an improvement, it only benefits
straight calls to strlen and nothing else.  Calls to strcmp,
sprintf, or strcpy (and along with it the rest of the strlen
pass) are still expanded as if the argument were unknown.  To
improve even those callers, the folding needs to be done at
a lower level (otherwise they'd all have to duplicate the same
code as gimple_fold_builtin_strlen).  With that in mind I've
moved the logic to string_constant() so all of those clients
benefit.

Retested on x86_64-linux.  Out of paranoia I also built and
tested the top of Glibc trunk with no unusual failures.

Martin

[-- Attachment #2: gcc-83693.diff --]
[-- Type: text/x-patch, Size: 22957 bytes --]

PR tree-optimization/83693 - missing strlen optimization for array of arrays

gcc/ChangeLog:

	PR tree-optimization/83693
	* expr.c (array_ctor_elt): New function.
	(string_constant): Call it.  Handle initializers of arrays of arrays.

gcc/testsuite/ChangeLog:

	PR tree-optimization/83693
	* gcc.dg/strcmp-2.c: New test.
	* gcc.dg/strlenopt-42.c: New test.
	* gcc.dg/strlenopt-43.c: New test.

diff --git a/gcc/expr.c b/gcc/expr.c
index cd1e57d..d0126c8 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -62,7 +62,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "rtl-chkp.h"
 #include "ccmp.h"
 #include "rtx-vector-builder.h"
-
+#include "gimple-fold.h"
 
 /* If this is nonzero, we do not bother generating VOLATILE
    around volatile memory references, and we are willing to
@@ -11343,6 +11343,31 @@ is_aligning_offset (const_tree offset, const_tree exp)
   return TREE_CODE (offset) == ADDR_EXPR && TREE_OPERAND (offset, 0) == exp;
 }
 \f
+/* Return initializer element IDX for the array CONSTRUCTOR initializer
+   INIT or an empty string constant if no such element exists.  */
+
+static tree
+array_ctor_elt (tree init, tree idx)
+{
+  HOST_WIDE_INT i = tree_to_uhwi (idx);
+
+  if (i < CONSTRUCTOR_NELTS (init)
+      && tree_int_cst_equal (CONSTRUCTOR_ELT (init, i)->index, idx))
+    return CONSTRUCTOR_ELT (init, i)->value;
+
+  tree index, value;
+  FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (init), i, index, value)
+    {
+      if (tree_int_cst_equal (index, idx))
+	return value;
+    }
+
+  /* Callers expect a STRING_CST with an ARRAY_TYPE and build_string
+     returnd only the former, with no type.  */
+  init = build_string_literal (1, "");
+  return TREE_OPERAND (TREE_OPERAND (init, 0), 0);
+}
+
 /* Return the tree node if an ARG corresponds to a string constant or zero
    if it doesn't.  If we return nonzero, set *PTR_OFFSET to the offset
    in bytes within the string that ARG is accessing.  The type of the
@@ -11351,54 +11376,79 @@ is_aligning_offset (const_tree offset, const_tree exp)
 tree
 string_constant (tree arg, tree *ptr_offset)
 {
-  tree array, offset, lower_bound;
+  tree array, offset = NULL_TREE;
+
+  tree orig_arg = arg;
   STRIP_NOPS (arg);
 
   if (TREE_CODE (arg) == ADDR_EXPR)
     {
-      if (TREE_CODE (TREE_OPERAND (arg, 0)) == STRING_CST)
+      tree oper = TREE_OPERAND (arg, 0);
+
+      if (TREE_CODE (oper) == STRING_CST)
 	{
 	  *ptr_offset = size_zero_node;
-	  return TREE_OPERAND (arg, 0);
+	  return oper;
 	}
-      else if (TREE_CODE (TREE_OPERAND (arg, 0)) == VAR_DECL)
+      else if (TREE_CODE (oper) == VAR_DECL)
 	{
-	  array = TREE_OPERAND (arg, 0);
+	  array = oper;
 	  offset = size_zero_node;
 	}
-      else if (TREE_CODE (TREE_OPERAND (arg, 0)) == ARRAY_REF)
+      else if (TREE_CODE (oper) == ARRAY_REF)
 	{
-	  array = TREE_OPERAND (TREE_OPERAND (arg, 0), 0);
-	  offset = TREE_OPERAND (TREE_OPERAND (arg, 0), 1);
+	  array = TREE_OPERAND (oper, 0);
+	  offset = TREE_OPERAND (oper, 1);
+
+	  /* Get the initializer for the referenced expression.
+	     If the original expression is a cast of the beginning
+	     of an array (OFFSET == 0), use it because it may refer
+	     to the first element of a multidimensional array of
+	     char; otherise, use ARRAY to obtain the initializer.  */
+	  tree ref = (orig_arg == arg || !integer_zerop (offset)
+		      ? array : orig_arg);
+	  if (tree init = fold_const_aggregate_ref (ref))
+	    {
+	      if (TREE_CODE (init) == CONSTRUCTOR)
+		if ((init = array_ctor_elt (init, offset)) != NULL_TREE)
+		  offset = size_zero_node;
+
+	      if (init && TREE_CODE (init) == STRING_CST)
+		{
+		  *ptr_offset = fold_convert (sizetype, offset);
+		  return init;
+		}
+	    }
+
 	  if (TREE_CODE (array) != STRING_CST && !VAR_P (array))
-	    return 0;
+	    return NULL_TREE;
 
 	  /* Check if the array has a nonzero lower bound.  */
-	  lower_bound = array_ref_low_bound (TREE_OPERAND (arg, 0));
+	  tree lower_bound = array_ref_low_bound (oper);
 	  if (!integer_zerop (lower_bound))
 	    {
 	      /* If the offset and base aren't both constants, return 0.  */
 	      if (TREE_CODE (lower_bound) != INTEGER_CST)
-	        return 0;
+	        return NULL_TREE;
 	      if (TREE_CODE (offset) != INTEGER_CST)
-		return 0;
+		return NULL_TREE;
 	      /* Adjust offset by the lower bound.  */
 	      offset = size_diffop (fold_convert (sizetype, offset),
 				    fold_convert (sizetype, lower_bound));
 	    }
 	}
-      else if (TREE_CODE (TREE_OPERAND (arg, 0)) == MEM_REF)
+      else if (TREE_CODE (oper) == MEM_REF)
 	{
-	  array = TREE_OPERAND (TREE_OPERAND (arg, 0), 0);
-	  offset = TREE_OPERAND (TREE_OPERAND (arg, 0), 1);
+	  array = TREE_OPERAND (oper, 0);
+	  offset = TREE_OPERAND (oper, 1);
 	  if (TREE_CODE (array) != ADDR_EXPR)
-	    return 0;
+	    return NULL_TREE;
 	  array = TREE_OPERAND (array, 0);
 	  if (TREE_CODE (array) != STRING_CST && !VAR_P (array))
-	    return 0;
+	    return NULL_TREE;
 	}
       else
-	return 0;
+	return NULL_TREE;
     }
   else if (TREE_CODE (arg) == PLUS_EXPR || TREE_CODE (arg) == POINTER_PLUS_EXPR)
     {
@@ -11423,10 +11473,15 @@ string_constant (tree arg, tree *ptr_offset)
 	  offset = arg0;
 	}
       else
-	return 0;
+	return NULL_TREE;
+    }
+  else if (TREE_CODE (arg) == STRING_CST)
+    {
+      *ptr_offset = size_zero_node;
+      return arg;
     }
   else
-    return 0;
+    return NULL_TREE;
 
   if (TREE_CODE (array) == STRING_CST)
     {
@@ -11439,17 +11494,24 @@ string_constant (tree arg, tree *ptr_offset)
       tree init = ctor_for_folding (array);
 
       /* Variables initialized to string literals can be handled too.  */
-      if (init == error_mark_node
-	  || !init
-	  || TREE_CODE (init) != STRING_CST)
-	return 0;
+      if (!init || init == error_mark_node)
+	return NULL_TREE;
+
+      if (offset && TREE_CODE (init) == CONSTRUCTOR)
+	{
+	  init = array_ctor_elt (init, offset);
+	  offset = size_zero_node;
+	}
+
+      if (!init || TREE_CODE (init) != STRING_CST)
+	return NULL_TREE;
 
       /* Avoid const char foo[4] = "abcde";  */
       if (DECL_SIZE_UNIT (array) == NULL_TREE
 	  || TREE_CODE (DECL_SIZE_UNIT (array)) != INTEGER_CST
 	  || (length = TREE_STRING_LENGTH (init)) <= 0
 	  || compare_tree_int (DECL_SIZE_UNIT (array), length) < 0)
-	return 0;
+	return NULL_TREE;
 
       /* If variable is bigger than the string literal, OFFSET must be constant
 	 and inside of the bounds of the string literal.  */
@@ -11457,13 +11519,13 @@ string_constant (tree arg, tree *ptr_offset)
       if (compare_tree_int (DECL_SIZE_UNIT (array), length) > 0
 	  && (! tree_fits_uhwi_p (offset)
 	      || compare_tree_int (offset, length) >= 0))
-	return 0;
+	return NULL_TREE;
 
       *ptr_offset = offset;
       return init;
     }
 
-  return 0;
+  return NULL_TREE;
 }
 \f
 /* Generate code to calculate OPS, and exploded expression
diff --git a/gcc/testsuite/gcc.dg/strcmp-2.c b/gcc/testsuite/gcc.dg/strcmp-2.c
new file mode 100644
index 0000000..8bbdeab
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/strcmp-2.c
@@ -0,0 +1,106 @@
+/* PR tree-optimization/83693 - missing strlen optimization for array
+   of arrays
+   Test to verify that strcmp() calls with arguments that are arrays
+   of arrays, are folded as if they were string literals.
+   { dg-do compile }
+   { dg-options "-O2 -fdump-tree-optimized" } */
+
+extern int strcmp (const char*, const char*);
+
+#define CAT(x, y) x ## y
+#define CONCAT(x, y) CAT (x, y)
+#define FAILNAME(name) CONCAT (call_ ## name ##_on_line_, __LINE__)
+
+#define FAIL(name) do {				\
+    extern void FAILNAME (name) (void);		\
+    FAILNAME (name)();				\
+  } while (0)
+
+/* Macro to emit a call to function named
+     call_in_true_branch_not_eliminated_on_line_NNN()
+   for each call that's expected to be eliminated.  The dg-final
+   scan-tree-dump-time directive at the bottom of the test verifies
+   that no such call appears in output.  */
+#define ELIM(expr)					\
+  if (!(expr)) FAIL (in_true_branch_not_eliminated);	\
+  else (void)0
+
+/* Macro to emit a call to a function named
+     call_made_in_{true,false}_branch_on_line_NNN()
+   for each call that's expected to be retained.  The dg-final
+   scan-tree-dump-time directive at the bottom of the test verifies
+   that the expected number of both kinds of calls appears in output
+   (a pair for each line with the invocation of the KEEP() macro.  */
+#define KEEP(expr)				\
+  if (expr)					\
+    FAIL (made_in_true_branch);			\
+  else						\
+    FAIL (made_in_false_branch)
+
+#define S4 "1234"
+#define S6 "123456"
+#define S8 "12345678"
+
+const struct {
+  char a[2][3][10];
+} arr[][3] = {
+  [1][1] = { .a[1][2] = S8 },
+  [1][0] = { .a[1][1] = S6 },
+  [0][0] = { .a[0][1] = S4 }
+};
+
+void test_strcmp (void)
+{
+  ELIM (0 == strcmp (arr[0][0].a[0][0], ""));
+  ELIM (0 == strcmp (arr[0][0].a[0][1], S4));
+  ELIM (0 == strcmp (arr[0][0].a[0][2], ""));
+  ELIM (0 == strcmp (arr[0][0].a[1][0], ""));
+  ELIM (0 == strcmp (arr[0][0].a[1][1], ""));
+  ELIM (0 == strcmp (arr[0][0].a[1][2], ""));
+
+  ELIM (0 == strcmp (arr[0][1].a[0][0], ""));
+  ELIM (0 == strcmp (arr[0][1].a[0][1], ""));
+  ELIM (0 == strcmp (arr[0][1].a[0][2], ""));
+  ELIM (0 == strcmp (arr[0][1].a[1][0], ""));
+  ELIM (0 == strcmp (arr[0][1].a[1][1], ""));
+  ELIM (0 == strcmp (arr[0][1].a[1][2], ""));
+
+  ELIM (0 == strcmp (arr[1][0].a[0][0], ""));
+  ELIM (0 == strcmp (arr[1][0].a[0][1], ""));
+  ELIM (0 == strcmp (arr[1][0].a[0][2], ""));
+  ELIM (0 == strcmp (arr[1][0].a[1][0], ""));
+  ELIM (0 == strcmp (arr[1][0].a[1][1], S6));
+  ELIM (0 == strcmp (arr[1][0].a[1][2], ""));
+
+  ELIM (0 == strcmp (arr[1][1].a[0][0], ""));
+  ELIM (0 == strcmp (arr[1][1].a[0][1], ""));
+  ELIM (0 == strcmp (arr[1][1].a[0][2], ""));
+  ELIM (0 == strcmp (arr[1][1].a[1][0], ""));
+  ELIM (0 == strcmp (arr[1][1].a[1][1], ""));
+  ELIM (0 == strcmp (arr[1][1].a[1][2], S8));
+}
+
+#line 1000
+
+char str6[] = S6;
+
+const struct {
+  const char *p[2][3];
+} ptr[][2] = {
+  [1][1] = { .p[1][2] = S8 },
+  [1][0] = { .p[1][1] = S6 },
+  [0][1] = { .p[1][2] = str6 },
+  [0][0] = { .p[0][0] = S4 }
+};
+
+void keep_strcmp_ptr (void)
+{
+  ELIM (0 == strcmp (ptr[0][0].p[0][0], S4));
+  KEEP (0 == strcmp (ptr[0][1].p[1][2], S6));
+  ELIM (0 == strcmp (ptr[1][0].p[1][1], S6));
+  ELIM (0 == strcmp (ptr[1][1].p[1][2], S8));
+}
+
+/* { dg-final { scan-tree-dump-times "_not_eliminated" 0 "optimized" } }
+   { dg-final { scan-tree-dump-times "call_made_in_false_branch_on_line_1\[0-9\]\[0-9\]\[0-9\]" 1 "optimized" } }
+   { dg-final { scan-tree-dump-times "call_made_in_true_branch_on_line_1\[0-9\]\[0-9\]\[0-9\]" 1 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/strlenopt-42.c b/gcc/testsuite/gcc.dg/strlenopt-42.c
new file mode 100644
index 0000000..9d11fd2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/strlenopt-42.c
@@ -0,0 +1,248 @@
+/* PR tree-optimization/83693 - missing strlen optimization for array
+   of arrays
+   { dg-do compile }
+   { dg-options "-O1 -fdump-tree-optimized" } */
+
+#include "strlenopt.h"
+
+#define DIFF_MAX __PTRDIFF_MAX__
+
+#define CAT(x, y) x ## y
+#define CONCAT(x, y) CAT (x, y)
+#define FAILNAME(name) CONCAT (call_ ## name ##_on_line_, __LINE__)
+
+#define FAIL(name) do {				\
+    extern void FAILNAME (name) (void);		\
+    FAILNAME (name)();				\
+  } while (0)
+
+/* Macro to emit a call to function named
+     call_in_true_branch_not_eliminated_on_line_NNN()
+   for each call that's expected to be eliminated.  The dg-final
+   scan-tree-dump-time directive at the bottom of the test verifies
+   that no such call appears in output.  */
+#define ELIM(expr)					\
+  if (!(expr)) FAIL (in_true_branch_not_eliminated);	\
+  else (void)0
+
+/* Macro to emit a call to a function named
+     call_made_in_{true,false}_branch_on_line_NNN()
+   for each call that's expected to be retained.  The dg-final
+   scan-tree-dump-time directive at the bottom of the test verifies
+   that the expected number of both kinds of calls appears in output
+   (a pair for each line with the invocation of the KEEP() macro.  */
+#define KEEP(expr)				\
+  if (expr)					\
+    FAIL (made_in_true_branch);			\
+  else						\
+    FAIL (made_in_false_branch)
+
+
+#define STR "0123456789abcdefghijklmnopqrtsuvwxyz"
+#define S(N) (STR + sizeof (STR) - N - 1)
+
+const char a5_5[5][5] = {
+  "", "1", "12", "123", "1234"
+};
+
+const char b5_5[5][5] = {
+  [1] = "1", [0] = "", [3] = "123", [4] = "1234", [2] = "12"
+};
+
+const char c5_5[5][5] = { [3] = "123" };
+const char d5_5_5[5][5][5] = { [2][3] = "123" };
+
+const char* const a3_6_17[3][6] = {
+  { S (0), S (1), S (2), S (3), S (4), S (5) },
+  { S (6), S (7), S (8), S (9), S (10), S (11) },
+  { S (12), S (13), S (14), S (15), S (16), S (17) },
+};
+
+void elim_arrays (void)
+{
+  ELIM (strlen (a5_5[0]) == 0);
+  ELIM (strlen (a5_5[1]) == 1);
+  ELIM (strlen (a5_5[2]) == 2);
+  ELIM (strlen (a5_5[3]) == 3);
+  ELIM (strlen (a5_5[4]) == 4);
+
+  ELIM (strlen (b5_5[0]) == 0);
+  ELIM (strlen (b5_5[1]) == 1);
+  ELIM (strlen (b5_5[2]) == 2);
+  ELIM (strlen (b5_5[3]) == 3);
+  ELIM (strlen (b5_5[4]) == 4);
+
+  ELIM (strlen (c5_5[0]) == 0);
+  ELIM (strlen (c5_5[1]) == 0);
+  ELIM (strlen (c5_5[2]) == 0);
+  ELIM (strlen (c5_5[3]) == 3);
+  ELIM (strlen (c5_5[4]) == 0);
+
+  ELIM (strlen (d5_5_5[0][0]) == 0);
+  ELIM (strlen (d5_5_5[0][1]) == 0);
+  ELIM (strlen (d5_5_5[0][2]) == 0);
+  ELIM (strlen (d5_5_5[0][3]) == 0);
+  ELIM (strlen (d5_5_5[0][4]) == 0);
+
+  ELIM (strlen (d5_5_5[1][0]) == 0);
+  ELIM (strlen (d5_5_5[1][1]) == 0);
+  ELIM (strlen (d5_5_5[1][2]) == 0);
+  ELIM (strlen (d5_5_5[1][3]) == 0);
+  ELIM (strlen (d5_5_5[1][4]) == 0);
+
+  ELIM (strlen (d5_5_5[2][0]) == 0);
+  ELIM (strlen (d5_5_5[2][1]) == 0);
+  ELIM (strlen (d5_5_5[2][2]) == 0);
+  ELIM (strlen (d5_5_5[2][3]) == 3);
+  ELIM (strlen (d5_5_5[2][4]) == 0);
+
+  ELIM (strlen (d5_5_5[3][0]) == 0);
+  ELIM (strlen (d5_5_5[3][1]) == 0);
+  ELIM (strlen (d5_5_5[3][2]) == 0);
+  ELIM (strlen (d5_5_5[3][3]) == 0);
+  ELIM (strlen (d5_5_5[3][4]) == 0);
+
+  ELIM (strlen (d5_5_5[4][0]) == 0);
+  ELIM (strlen (d5_5_5[4][1]) == 0);
+  ELIM (strlen (d5_5_5[4][2]) == 0);
+  ELIM (strlen (d5_5_5[4][3]) == 0);
+  ELIM (strlen (d5_5_5[4][4]) == 0);
+
+  ELIM (strlen (a3_6_17[0][0]) == 0);
+  ELIM (strlen (a3_6_17[0][1]) == 1);
+  ELIM (strlen (a3_6_17[0][2]) == 2);
+  ELIM (strlen (a3_6_17[0][3]) == 3);
+  ELIM (strlen (a3_6_17[0][4]) == 4);
+  ELIM (strlen (a3_6_17[0][5]) == 5);
+  ELIM (strlen (a3_6_17[1][0]) == 6);
+  ELIM (strlen (a3_6_17[1][1]) == 7);
+  ELIM (strlen (a3_6_17[1][2]) == 8);
+  ELIM (strlen (a3_6_17[1][3]) == 9);
+  ELIM (strlen (a3_6_17[1][4]) == 10);
+  ELIM (strlen (a3_6_17[1][5]) == 11);
+  ELIM (strlen (a3_6_17[2][0]) == 12);
+  ELIM (strlen (a3_6_17[2][1]) == 13);
+  ELIM (strlen (a3_6_17[2][2]) == 14);
+  ELIM (strlen (a3_6_17[2][3]) == 15);
+  ELIM (strlen (a3_6_17[2][4]) == 16);
+  ELIM (strlen (a3_6_17[2][5]) == 17);
+}
+
+struct MemberStrings
+{
+  char a5_5[5][5];
+  const char* a3_6_17[3][6];
+};
+
+const struct MemberStrings ma[] = {
+  {
+    { "", "1", "12", "123", "1234" },
+    {
+      { S (0), S (1), S (2), S (3), S (4), S (5) },
+      { S (6), S (7), S (8), S (9), S (10), S (11) },
+      { S (12), S (13), S (14), S (15), S (16), S (17) },
+    }
+  },
+
+  {
+    { "1234", "123", "12", "1", "" },
+    {
+      { S (17), S (16), S (15), S (14), S (13), S (12) },
+      { S (11), S (10), S (9), S (8), S (7) , S (6) },
+      { S (5), S (4), S (3), S (2), S (1), S (0) },
+    }
+  },
+
+  {
+    { [3] = "123" },
+    .a3_6_17 [1][3] = S (13)
+  }
+};
+
+void elim_member_arrays (void)
+{
+  ELIM (strlen (ma[0].a5_5[0]) == 0);
+  ELIM (strlen (ma[0].a5_5[1]) == 1);
+  ELIM (strlen (ma[0].a5_5[2]) == 2);
+  ELIM (strlen (ma[0].a5_5[3]) == 3);
+  ELIM (strlen (ma[0].a5_5[4]) == 4);
+
+  ELIM (strlen (ma[0].a3_6_17[0][0]) == 0);
+  ELIM (strlen (ma[0].a3_6_17[0][1]) == 1);
+  ELIM (strlen (ma[0].a3_6_17[0][2]) == 2);
+  ELIM (strlen (ma[0].a3_6_17[0][3]) == 3);
+  ELIM (strlen (ma[0].a3_6_17[0][4]) == 4);
+  ELIM (strlen (ma[0].a3_6_17[0][5]) == 5);
+  ELIM (strlen (ma[0].a3_6_17[1][0]) == 6);
+  ELIM (strlen (ma[0].a3_6_17[1][1]) == 7);
+  ELIM (strlen (ma[0].a3_6_17[1][2]) == 8);
+  ELIM (strlen (ma[0].a3_6_17[1][3]) == 9);
+  ELIM (strlen (ma[0].a3_6_17[1][4]) == 10);
+  ELIM (strlen (ma[0].a3_6_17[1][5]) == 11);
+  ELIM (strlen (ma[0].a3_6_17[2][0]) == 12);
+  ELIM (strlen (ma[0].a3_6_17[2][1]) == 13);
+  ELIM (strlen (ma[0].a3_6_17[2][2]) == 14);
+  ELIM (strlen (ma[0].a3_6_17[2][3]) == 15);
+  ELIM (strlen (ma[0].a3_6_17[2][4]) == 16);
+  ELIM (strlen (ma[0].a3_6_17[2][5]) == 17);
+
+  ELIM (strlen (ma[1].a5_5[0]) == 4);
+  ELIM (strlen (ma[1].a5_5[1]) == 3);
+  ELIM (strlen (ma[1].a5_5[2]) == 2);
+  ELIM (strlen (ma[1].a5_5[3]) == 1);
+  ELIM (strlen (ma[1].a5_5[4]) == 0);
+
+  ELIM (strlen (ma[1].a3_6_17[0][0]) == 17);
+  ELIM (strlen (ma[1].a3_6_17[0][1]) == 16);
+  ELIM (strlen (ma[1].a3_6_17[0][2]) == 15);
+  ELIM (strlen (ma[1].a3_6_17[0][3]) == 14);
+  ELIM (strlen (ma[1].a3_6_17[0][4]) == 13);
+  ELIM (strlen (ma[1].a3_6_17[0][5]) == 12);
+  ELIM (strlen (ma[1].a3_6_17[1][0]) == 11);
+  ELIM (strlen (ma[1].a3_6_17[1][1]) == 10);
+  ELIM (strlen (ma[1].a3_6_17[1][2]) == 9);
+  ELIM (strlen (ma[1].a3_6_17[1][3]) == 8);
+  ELIM (strlen (ma[1].a3_6_17[1][4]) == 7);
+  ELIM (strlen (ma[1].a3_6_17[1][5]) == 6);
+  ELIM (strlen (ma[1].a3_6_17[2][0]) == 5);
+  ELIM (strlen (ma[1].a3_6_17[2][1]) == 4);
+  ELIM (strlen (ma[1].a3_6_17[2][2]) == 3);
+  ELIM (strlen (ma[1].a3_6_17[2][3]) == 2);
+  ELIM (strlen (ma[1].a3_6_17[2][4]) == 1);
+  ELIM (strlen (ma[1].a3_6_17[2][5]) == 0);
+
+  ELIM (strlen (ma[2].a5_5[0]) == 0);
+  ELIM (strlen (ma[2].a5_5[1]) == 0);
+  ELIM (strlen (ma[2].a5_5[2]) == 0);
+  ELIM (strlen (ma[2].a5_5[3]) == 3);
+  ELIM (strlen (ma[2].a5_5[4]) == 0);
+
+  ELIM (strlen (ma[2].a3_6_17[1][3]) == 13);
+}
+
+
+#line 10000
+
+char modstr[5] = "1234";
+
+const char* const xa[][3] = {
+  { "", "1", "12" },
+  { "123", modstr, "12345" }
+};
+
+void keep_arrays (void)
+{
+  ELIM (strlen (xa[0][0]) == 0);
+  ELIM (strlen (xa[0][1]) == 1);
+  ELIM (strlen (xa[0][2]) == 2);
+  ELIM (strlen (xa[1][0]) == 3);
+  /* The following refers to an initialized non-const character array
+     so the strlen() expression must not be eliminated.  */
+  KEEP (strlen (xa[1][1]) == 4);
+  ELIM (strlen (xa[1][2]) == 5);
+}
+
+/* { dg-final { scan-tree-dump-times "_not_eliminated" 0 "optimized" } }
+   { dg-final { scan-tree-dump-times "call_made_in_true_branch_on_line_1\[0-9\]\[0-9\]\[0-9\]" 1 "optimized" } }
+   { dg-final { scan-tree-dump-times "call_made_in_false_branch_on_line_1\[0-9\]\[0-9\]\[0-9\]" 1 "optimized" } } */
+
diff --git a/gcc/testsuite/gcc.dg/strlenopt-43.c b/gcc/testsuite/gcc.dg/strlenopt-43.c
new file mode 100644
index 0000000..93e2c1c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/strlenopt-43.c
@@ -0,0 +1,157 @@
+/* PR tree-optimization/83693 - missing strlen optimization for array
+   of arrays
+   Test to verify that strlen() calls with arguments that are copies
+   of constant arrays of arrays, made either by strcpy() or sprintf(),
+   are folded as if the copies were made from string literals.
+   { dg-do compile }
+   { dg-options "-O2 -fdump-tree-optimized" } */
+
+#include "strlenopt.h"
+
+extern int snprintf (char*, size_t, const char*, ...);
+
+#define CAT(x, y) x ## y
+#define CONCAT(x, y) CAT (x, y)
+#define FAILNAME(name) CONCAT (call_ ## name ##_on_line_, __LINE__)
+
+#define FAIL(name) do {				\
+    extern void FAILNAME (name) (void);		\
+    FAILNAME (name)();				\
+  } while (0)
+
+/* Macro to emit a call to function named
+     call_in_true_branch_not_eliminated_on_line_NNN()
+   for each call that's expected to be eliminated.  The dg-final
+   scan-tree-dump-time directive at the bottom of the test verifies
+   that no such call appears in output.  */
+#define ELIM(expr)					\
+  if (!(expr)) FAIL (in_true_branch_not_eliminated);	\
+  else (void)0
+
+/* Macro to emit a call to a function named
+     call_made_in_{true,false}_branch_on_line_NNN()
+   for each call that's expected to be retained.  The dg-final
+   scan-tree-dump-time directive at the bottom of the test verifies
+   that the expected number of both kinds of calls appears in output
+   (a pair for each line with the invocation of the KEEP() macro.  */
+#define KEEP(expr)				\
+  if (expr)					\
+    FAIL (made_in_true_branch);			\
+  else						\
+    FAIL (made_in_false_branch)
+
+
+const struct {
+  char a[2][3][10];
+} arr[][3] = {
+  [1][1] = { .a[1][2] = "12345678" },
+  [0][0] = { .a[0][0] = "1234" }
+};
+
+void elim_memcpy_array (void)
+{
+  char d[10];
+
+  memcpy (d, arr[0][0].a[0][0], 9);
+  ELIM (strlen (d) == 4);
+
+  memcpy (d, arr[0][0].a[0][1], 9);
+  ELIM (strlen (d) == 0);
+
+  memcpy (d, arr[0][0].a[1][0], 9);
+  ELIM (strlen (d) == 0);
+
+  memcpy (d, arr[1][1].a[0][0], 9);
+  ELIM (strlen (d) == 0);
+
+  memcpy (d, arr[1][1].a[0][1], 9);
+  ELIM (strlen (d) == 0);
+
+  memcpy (d, arr[1][1].a[1][0], 9);
+  ELIM (strlen (d) == 0);
+
+  memcpy (d, arr[1][1].a[1][1], 9);
+  ELIM (strlen (d) == 0);
+
+  memcpy (d, arr[1][1].a[1][2], 9);
+  ELIM (strlen (d) == 8);
+}
+
+void elim_strcpy_array (void)
+{
+  char d[10];
+
+  strcpy (d, arr[0][0].a[0][0]);
+  ELIM (strlen (d) == 4);
+
+  strcpy (d, arr[0][0].a[0][1]);
+  ELIM (strlen (d) == 0);
+
+  strcpy (d, arr[0][0].a[1][0]);
+  ELIM (strlen (d) == 0);
+
+  strcpy (d, arr[1][1].a[0][0]);
+  ELIM (strlen (d) == 0);
+
+  strcpy (d, arr[1][1].a[0][1]);
+  ELIM (strlen (d) == 0);
+
+  strcpy (d, arr[1][1].a[1][0]);
+  ELIM (strlen (d) == 0);
+
+  strcpy (d, arr[1][1].a[1][1]);
+  ELIM (strlen (d) == 0);
+
+  strcpy (d, arr[1][1].a[1][2]);
+  ELIM (strlen (d) == 8);
+}
+
+void elim_snprintf_array (void)
+{
+  int n;
+  n = snprintf (0, 0, "%s", arr[0][0].a[0][0]);
+  ELIM (n == 4);
+
+  n = snprintf (0, 0, "%s5", arr[0][0].a[0][0]);
+  ELIM (n == 5);
+
+  n = snprintf (0, 0, "%s", arr[0][0].a[0][1]);
+  ELIM (n == 0);
+
+  n = snprintf (0, 0, "%s56", arr[0][0].a[0][1]);
+  ELIM (n == 2);
+}
+
+
+#line 1000
+
+char str[] = "666";
+
+const struct {
+  char *p[2][3];
+} ptr[][2] = {
+  [1][1] = { .p[1][2] = "12345678" },
+  [0][1] = { .p[1][2] = str },
+  [0][0] = { .p[0][0] = "1234" }
+};
+
+void keep_strcpy_ptr (void)
+{
+  char d[10];
+
+  strcpy (d, ptr[0][1].p[1][2]);
+  KEEP (strlen (d) == 3);
+
+  strcpy (d, ptr[1][1].p[1][2]);
+  ELIM (strlen (d) == 8);
+}
+
+void keep_snprintf_ptr (void)
+{
+  int n = snprintf (0, 0, "%s", ptr[0][1].p[1][2]);
+  KEEP (n == 3);
+}
+
+/* { dg-final { scan-tree-dump-times "_not_eliminated" 0 "optimized" } }
+   { dg-final { scan-tree-dump-times "call_made_in_false_branch_on_line_1\[0-9\]\[0-9\]\[0-9\]" 2 "optimized" } }
+   { dg-final { scan-tree-dump-times "call_made_in_true_branch_on_line_1\[0-9\]\[0-9\]\[0-9\]" 2 "optimized" } } */

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] fold strlen of constant aggregates (PR 83693)
  2018-01-09  1:20   ` Martin Sebor
@ 2018-01-09 21:43     ` Martin Sebor
  2018-01-12 22:05       ` Martin Sebor
  2018-04-30 17:24       ` Jeff Law
  0 siblings, 2 replies; 7+ messages in thread
From: Martin Sebor @ 2018-01-09 21:43 UTC (permalink / raw)
  To: Richard Biener; +Cc: Gcc Patch List

[-- Attachment #1: Type: text/plain, Size: 1392 bytes --]

I found a few problems in the previous revision:

1) It didn't handle the simple case of member arrays of const
    struct objects (only member arrays of  const arrays of structs
    were handled).
2) The array_ctor_elt() function returned a narrow empty string
    for an uninitialized CONSTRUCTOR element of any character type
    when it should return the same string in the expected character
    type (char, wchar_t, etc.)
3) The string_constant() function would in some cases use a byte
    offset to get the initializer from a CONSTRUCTOR instead of
    an array index.

The attached version 3 of the patch corrects these issues.
Retested on x86_64 and with the Glibc ToT.

> After sleeping on it I realized that although enhancing
> gimple_fold_builtin_strlen is an improvement, it only benefits
> straight calls to strlen and nothing else.  Calls to strcmp,
> sprintf, or strcpy (and along with it the rest of the strlen
> pass) are still expanded as if the argument were unknown.  To
> improve even those callers, the folding needs to be done at
> a lower level (otherwise they'd all have to duplicate the same
> code as gimple_fold_builtin_strlen).  With that in mind I've
> moved the logic to string_constant() so all of those clients
> benefit.
>
> Retested on x86_64-linux.  Out of paranoia I also built and
> tested the top of Glibc trunk with no unusual failures.
>
> Martin


[-- Attachment #2: gcc-83693.diff --]
[-- Type: text/x-patch, Size: 27950 bytes --]

PR tree-optimization/83693 - missing strlen optimization for array of arrays

gcc/ChangeLog:

	PR tree-optimization/83693
	* expr.c (array_ctor_elt): New function.
	(string_constant): Call it.  Handle initializers of arrays of arrays.

gcc/testsuite/ChangeLog:

	PR tree-optimization/83693
	* gcc.dg/strcmp-2.c: New test.
	* gcc.dg/strlenopt-42.c: New test.
	* gcc.dg/strlenopt-43.c: New test.

diff --git a/gcc/expr.c b/gcc/expr.c
index cd1e57d..75110e5 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -62,7 +62,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "rtl-chkp.h"
 #include "ccmp.h"
 #include "rtx-vector-builder.h"
-
+#include "gimple-fold.h"
 
 /* If this is nonzero, we do not bother generating VOLATILE
    around volatile memory references, and we are willing to
@@ -11343,6 +11343,50 @@ is_aligning_offset (const_tree offset, const_tree exp)
   return TREE_CODE (offset) == ADDR_EXPR && TREE_OPERAND (offset, 0) == exp;
 }
 \f
+/* Return initializer element IDX for the array CONSTRUCTOR initializer
+   INIT or an empty string constant with type CHARTYPE if no such element
+   exists.  If IDX is null, simply return an empty string.  If IDX is not
+   constant, return NULL_TREE.  A helper of string_constant.  */
+
+static tree
+array_ctor_elt (tree chartype, tree init, tree idx)
+{
+  if (idx)
+    {
+      if (!tree_fits_uhwi_p (idx))
+	return NULL_TREE;
+
+      HOST_WIDE_INT i = tree_to_uhwi (idx);
+
+      if (i < CONSTRUCTOR_NELTS (init)
+	  && tree_int_cst_equal (CONSTRUCTOR_ELT (init, i)->index, idx))
+	return CONSTRUCTOR_ELT (init, i)->value;
+
+      tree index, value;
+      FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (init), i, index, value)
+	{
+	  if (tree_int_cst_equal (index, idx))
+	    return value;
+	}
+    }
+
+  /* Build and return a STRING_CST representing the empty string with
+     CHARTYPE.  Make sure the string representation has enough zero
+     bytes for CHARTYPE.  */
+  const char nuls[16] = "";
+  unsigned elemsize = tree_to_uhwi (TYPE_SIZE_UNIT(chartype));
+  tree str = build_string (elemsize, nuls);
+  tree elemtype = build_qualified_type (chartype, TYPE_QUAL_CONST);
+  tree indextype = build_index_type (size_zero_node);
+  tree arraytype = build_array_type (elemtype, indextype);
+  TREE_TYPE (str) = arraytype;
+  TREE_CONSTANT (str) = true;
+  TREE_READONLY (str) = true;
+  TREE_STATIC (str) = true;
+
+  return str;
+}
+
 /* Return the tree node if an ARG corresponds to a string constant or zero
    if it doesn't.  If we return nonzero, set *PTR_OFFSET to the offset
    in bytes within the string that ARG is accessing.  The type of the
@@ -11351,54 +11395,109 @@ is_aligning_offset (const_tree offset, const_tree exp)
 tree
 string_constant (tree arg, tree *ptr_offset)
 {
-  tree array, offset, lower_bound;
+  tree orig_arg = arg;
+  /* The type of the string element.  It can be any character type,
+     including char and wchar_t.  */
+  tree chartype = TREE_TYPE (orig_arg);
+  if (TREE_CODE (chartype) == POINTER_TYPE)
+    chartype = TREE_TYPE (chartype);
+  if (TREE_CODE (chartype) == ARRAY_TYPE)
+    chartype = TREE_TYPE (chartype);
+
+  /* Bail if ARG doesn't refer to a narrow or wide character string.  */
+  if (!INTEGRAL_TYPE_P (chartype))
+    return NULL_TREE;
+
   STRIP_NOPS (arg);
 
+  tree array, offset = NULL_TREE, idx = NULL_TREE;
+
   if (TREE_CODE (arg) == ADDR_EXPR)
     {
-      if (TREE_CODE (TREE_OPERAND (arg, 0)) == STRING_CST)
+      tree oper = TREE_OPERAND (arg, 0);
+
+      if (TREE_CODE (oper) == STRING_CST)
 	{
 	  *ptr_offset = size_zero_node;
-	  return TREE_OPERAND (arg, 0);
+	  return oper;
 	}
-      else if (TREE_CODE (TREE_OPERAND (arg, 0)) == VAR_DECL)
+      else if (TREE_CODE (oper) == VAR_DECL)
 	{
-	  array = TREE_OPERAND (arg, 0);
+	  array = oper;
 	  offset = size_zero_node;
 	}
-      else if (TREE_CODE (TREE_OPERAND (arg, 0)) == ARRAY_REF)
+      else if (TREE_CODE (oper) == ARRAY_REF)
 	{
-	  array = TREE_OPERAND (TREE_OPERAND (arg, 0), 0);
-	  offset = TREE_OPERAND (TREE_OPERAND (arg, 0), 1);
+	  array = TREE_OPERAND (oper, 0);
+	  offset = TREE_OPERAND (oper, 1);
+	  idx = offset;
+
+	  /* Get the initializer for the referenced expression.
+	     If the original expression is a cast of the beginning
+	     of an array (OFFSET == 0), use it because it may refer
+	     to the first element of a multidimensional array of
+	     char; otherise, use ARRAY to obtain the initializer.  */
+	  tree ref = (orig_arg == arg || !integer_zerop (offset)
+		      ? array : orig_arg);
+	  if (tree init = fold_const_aggregate_ref (ref))
+	    {
+	      if (TREE_CODE (init) == CONSTRUCTOR)
+		if ((init = array_ctor_elt (chartype, init, idx))
+		    != NULL_TREE)
+		  offset = size_zero_node;
+
+	      if (init && TREE_CODE (init) == STRING_CST)
+		{
+		  *ptr_offset = fold_convert (sizetype, offset);
+		  return init;
+		}
+	    }
+
 	  if (TREE_CODE (array) != STRING_CST && !VAR_P (array))
-	    return 0;
+	    return NULL_TREE;
 
 	  /* Check if the array has a nonzero lower bound.  */
-	  lower_bound = array_ref_low_bound (TREE_OPERAND (arg, 0));
+	  tree lower_bound = array_ref_low_bound (oper);
 	  if (!integer_zerop (lower_bound))
 	    {
 	      /* If the offset and base aren't both constants, return 0.  */
 	      if (TREE_CODE (lower_bound) != INTEGER_CST)
-	        return 0;
+	        return NULL_TREE;
 	      if (TREE_CODE (offset) != INTEGER_CST)
-		return 0;
+		return NULL_TREE;
 	      /* Adjust offset by the lower bound.  */
 	      offset = size_diffop (fold_convert (sizetype, offset),
 				    fold_convert (sizetype, lower_bound));
 	    }
 	}
-      else if (TREE_CODE (TREE_OPERAND (arg, 0)) == MEM_REF)
+      else if (TREE_CODE (oper) == COMPONENT_REF)
 	{
-	  array = TREE_OPERAND (TREE_OPERAND (arg, 0), 0);
-	  offset = TREE_OPERAND (TREE_OPERAND (arg, 0), 1);
+	  if (tree init = fold_const_aggregate_ref (oper))
+	    {
+	      if (TREE_CODE (init) == CONSTRUCTOR)
+		init = array_ctor_elt (chartype, init, NULL_TREE);
+
+	      if (init && TREE_CODE (init) == STRING_CST)
+		{
+		  *ptr_offset = size_zero_node;
+		  return init;
+		}
+	    }
+
+	  return NULL_TREE;
+	}
+      else if (TREE_CODE (oper) == MEM_REF)
+	{
+	  array = TREE_OPERAND (oper, 0);
+	  offset = TREE_OPERAND (oper, 1);
 	  if (TREE_CODE (array) != ADDR_EXPR)
-	    return 0;
+	    return NULL_TREE;
 	  array = TREE_OPERAND (array, 0);
 	  if (TREE_CODE (array) != STRING_CST && !VAR_P (array))
-	    return 0;
+	    return NULL_TREE;
 	}
       else
-	return 0;
+	return NULL_TREE;
     }
   else if (TREE_CODE (arg) == PLUS_EXPR || TREE_CODE (arg) == POINTER_PLUS_EXPR)
     {
@@ -11423,10 +11522,15 @@ string_constant (tree arg, tree *ptr_offset)
 	  offset = arg0;
 	}
       else
-	return 0;
+	return NULL_TREE;
+    }
+  else if (TREE_CODE (arg) == STRING_CST)
+    {
+      *ptr_offset = size_zero_node;
+      return arg;
     }
   else
-    return 0;
+    return NULL_TREE;
 
   if (TREE_CODE (array) == STRING_CST)
     {
@@ -11439,17 +11543,24 @@ string_constant (tree arg, tree *ptr_offset)
       tree init = ctor_for_folding (array);
 
       /* Variables initialized to string literals can be handled too.  */
-      if (init == error_mark_node
-	  || !init
-	  || TREE_CODE (init) != STRING_CST)
-	return 0;
+      if (!init || init == error_mark_node)
+	return NULL_TREE;
+
+      if (idx && TREE_CODE (init) == CONSTRUCTOR)
+	{
+	  init = array_ctor_elt (chartype, init, idx);
+	  offset = size_zero_node;
+	}
+
+      if (!init || TREE_CODE (init) != STRING_CST)
+	return NULL_TREE;
 
       /* Avoid const char foo[4] = "abcde";  */
       if (DECL_SIZE_UNIT (array) == NULL_TREE
 	  || TREE_CODE (DECL_SIZE_UNIT (array)) != INTEGER_CST
 	  || (length = TREE_STRING_LENGTH (init)) <= 0
 	  || compare_tree_int (DECL_SIZE_UNIT (array), length) < 0)
-	return 0;
+	return NULL_TREE;
 
       /* If variable is bigger than the string literal, OFFSET must be constant
 	 and inside of the bounds of the string literal.  */
@@ -11457,13 +11568,13 @@ string_constant (tree arg, tree *ptr_offset)
       if (compare_tree_int (DECL_SIZE_UNIT (array), length) > 0
 	  && (! tree_fits_uhwi_p (offset)
 	      || compare_tree_int (offset, length) >= 0))
-	return 0;
+	return NULL_TREE;
 
       *ptr_offset = offset;
       return init;
     }
 
-  return 0;
+  return NULL_TREE;
 }
 \f
 /* Generate code to calculate OPS, and exploded expression
diff --git a/gcc/testsuite/gcc.dg/strcmp-2.c b/gcc/testsuite/gcc.dg/strcmp-2.c
new file mode 100644
index 0000000..8bbdeab
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/strcmp-2.c
@@ -0,0 +1,106 @@
+/* PR tree-optimization/83693 - missing strlen optimization for array
+   of arrays
+   Test to verify that strcmp() calls with arguments that are arrays
+   of arrays, are folded as if they were string literals.
+   { dg-do compile }
+   { dg-options "-O2 -fdump-tree-optimized" } */
+
+extern int strcmp (const char*, const char*);
+
+#define CAT(x, y) x ## y
+#define CONCAT(x, y) CAT (x, y)
+#define FAILNAME(name) CONCAT (call_ ## name ##_on_line_, __LINE__)
+
+#define FAIL(name) do {				\
+    extern void FAILNAME (name) (void);		\
+    FAILNAME (name)();				\
+  } while (0)
+
+/* Macro to emit a call to function named
+     call_in_true_branch_not_eliminated_on_line_NNN()
+   for each call that's expected to be eliminated.  The dg-final
+   scan-tree-dump-time directive at the bottom of the test verifies
+   that no such call appears in output.  */
+#define ELIM(expr)					\
+  if (!(expr)) FAIL (in_true_branch_not_eliminated);	\
+  else (void)0
+
+/* Macro to emit a call to a function named
+     call_made_in_{true,false}_branch_on_line_NNN()
+   for each call that's expected to be retained.  The dg-final
+   scan-tree-dump-time directive at the bottom of the test verifies
+   that the expected number of both kinds of calls appears in output
+   (a pair for each line with the invocation of the KEEP() macro.  */
+#define KEEP(expr)				\
+  if (expr)					\
+    FAIL (made_in_true_branch);			\
+  else						\
+    FAIL (made_in_false_branch)
+
+#define S4 "1234"
+#define S6 "123456"
+#define S8 "12345678"
+
+const struct {
+  char a[2][3][10];
+} arr[][3] = {
+  [1][1] = { .a[1][2] = S8 },
+  [1][0] = { .a[1][1] = S6 },
+  [0][0] = { .a[0][1] = S4 }
+};
+
+void test_strcmp (void)
+{
+  ELIM (0 == strcmp (arr[0][0].a[0][0], ""));
+  ELIM (0 == strcmp (arr[0][0].a[0][1], S4));
+  ELIM (0 == strcmp (arr[0][0].a[0][2], ""));
+  ELIM (0 == strcmp (arr[0][0].a[1][0], ""));
+  ELIM (0 == strcmp (arr[0][0].a[1][1], ""));
+  ELIM (0 == strcmp (arr[0][0].a[1][2], ""));
+
+  ELIM (0 == strcmp (arr[0][1].a[0][0], ""));
+  ELIM (0 == strcmp (arr[0][1].a[0][1], ""));
+  ELIM (0 == strcmp (arr[0][1].a[0][2], ""));
+  ELIM (0 == strcmp (arr[0][1].a[1][0], ""));
+  ELIM (0 == strcmp (arr[0][1].a[1][1], ""));
+  ELIM (0 == strcmp (arr[0][1].a[1][2], ""));
+
+  ELIM (0 == strcmp (arr[1][0].a[0][0], ""));
+  ELIM (0 == strcmp (arr[1][0].a[0][1], ""));
+  ELIM (0 == strcmp (arr[1][0].a[0][2], ""));
+  ELIM (0 == strcmp (arr[1][0].a[1][0], ""));
+  ELIM (0 == strcmp (arr[1][0].a[1][1], S6));
+  ELIM (0 == strcmp (arr[1][0].a[1][2], ""));
+
+  ELIM (0 == strcmp (arr[1][1].a[0][0], ""));
+  ELIM (0 == strcmp (arr[1][1].a[0][1], ""));
+  ELIM (0 == strcmp (arr[1][1].a[0][2], ""));
+  ELIM (0 == strcmp (arr[1][1].a[1][0], ""));
+  ELIM (0 == strcmp (arr[1][1].a[1][1], ""));
+  ELIM (0 == strcmp (arr[1][1].a[1][2], S8));
+}
+
+#line 1000
+
+char str6[] = S6;
+
+const struct {
+  const char *p[2][3];
+} ptr[][2] = {
+  [1][1] = { .p[1][2] = S8 },
+  [1][0] = { .p[1][1] = S6 },
+  [0][1] = { .p[1][2] = str6 },
+  [0][0] = { .p[0][0] = S4 }
+};
+
+void keep_strcmp_ptr (void)
+{
+  ELIM (0 == strcmp (ptr[0][0].p[0][0], S4));
+  KEEP (0 == strcmp (ptr[0][1].p[1][2], S6));
+  ELIM (0 == strcmp (ptr[1][0].p[1][1], S6));
+  ELIM (0 == strcmp (ptr[1][1].p[1][2], S8));
+}
+
+/* { dg-final { scan-tree-dump-times "_not_eliminated" 0 "optimized" } }
+   { dg-final { scan-tree-dump-times "call_made_in_false_branch_on_line_1\[0-9\]\[0-9\]\[0-9\]" 1 "optimized" } }
+   { dg-final { scan-tree-dump-times "call_made_in_true_branch_on_line_1\[0-9\]\[0-9\]\[0-9\]" 1 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/strlenopt-42.c b/gcc/testsuite/gcc.dg/strlenopt-42.c
new file mode 100644
index 0000000..8b8749c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/strlenopt-42.c
@@ -0,0 +1,293 @@
+/* PR tree-optimization/83693 - missing strlen optimization for array
+   of arrays
+   { dg-do compile }
+   { dg-options "-O1 -fdump-tree-optimized" } */
+
+#include "strlenopt.h"
+
+#define DIFF_MAX __PTRDIFF_MAX__
+
+#define CAT(x, y) x ## y
+#define CONCAT(x, y) CAT (x, y)
+#define FAILNAME(name) CONCAT (call_ ## name ##_on_line_, __LINE__)
+
+#define FAIL(name) do {				\
+    extern void FAILNAME (name) (void);		\
+    FAILNAME (name)();				\
+  } while (0)
+
+/* Macro to emit a call to function named
+     call_in_true_branch_not_eliminated_on_line_NNN()
+   for each call that's expected to be eliminated.  The dg-final
+   scan-tree-dump-time directive at the bottom of the test verifies
+   that no such call appears in output.  */
+#define ELIM(expr)					\
+  if (!(expr)) FAIL (in_true_branch_not_eliminated);	\
+  else (void)0
+
+/* Macro to emit a call to a function named
+     call_made_in_{true,false}_branch_on_line_NNN()
+   for each call that's expected to be retained.  The dg-final
+   scan-tree-dump-time directive at the bottom of the test verifies
+   that the expected number of both kinds of calls appears in output
+   (a pair for each line with the invocation of the KEEP() macro.  */
+#define KEEP(expr)				\
+  if (expr)					\
+    FAIL (made_in_true_branch);			\
+  else						\
+    FAIL (made_in_false_branch)
+
+
+#define STR "0123456789abcdefghijklmnopqrtsuvwxyz"
+#define S(N) (STR + sizeof (STR) - N - 1)
+
+const char a5_5[5][5] = {
+  "", "1", "12", "123", "1234"
+};
+
+const char b5_5[5][5] = {
+  [1] = "1", [0] = "", [3] = "123", [4] = "1234", [2] = "12"
+};
+
+const char c5_5[5][5] = { [3] = "123" };
+const char d5_5_5[5][5][5] = { [2][3] = "123" };
+
+const char* const a3_6_17[3][6] = {
+  { S (0), S (1), S (2), S (3), S (4), S (5) },
+  { S (6), S (7), S (8), S (9), S (10), S (11) },
+  { S (12), S (13), S (14), S (15), S (16), S (17) },
+};
+
+void elim_arrays (void)
+{
+  ELIM (strlen (a5_5[0]) == 0);
+  ELIM (strlen (a5_5[1]) == 1);
+  ELIM (strlen (a5_5[2]) == 2);
+  ELIM (strlen (a5_5[3]) == 3);
+  ELIM (strlen (a5_5[4]) == 4);
+
+  ELIM (strlen (b5_5[0]) == 0);
+  ELIM (strlen (b5_5[1]) == 1);
+  ELIM (strlen (b5_5[2]) == 2);
+  ELIM (strlen (b5_5[3]) == 3);
+  ELIM (strlen (b5_5[4]) == 4);
+
+  ELIM (strlen (c5_5[0]) == 0);
+  ELIM (strlen (c5_5[1]) == 0);
+  ELIM (strlen (c5_5[2]) == 0);
+  ELIM (strlen (c5_5[3]) == 3);
+  ELIM (strlen (c5_5[4]) == 0);
+
+  ELIM (strlen (d5_5_5[0][0]) == 0);
+  ELIM (strlen (d5_5_5[0][1]) == 0);
+  ELIM (strlen (d5_5_5[0][2]) == 0);
+  ELIM (strlen (d5_5_5[0][3]) == 0);
+  ELIM (strlen (d5_5_5[0][4]) == 0);
+
+  ELIM (strlen (d5_5_5[1][0]) == 0);
+  ELIM (strlen (d5_5_5[1][1]) == 0);
+  ELIM (strlen (d5_5_5[1][2]) == 0);
+  ELIM (strlen (d5_5_5[1][3]) == 0);
+  ELIM (strlen (d5_5_5[1][4]) == 0);
+
+  ELIM (strlen (d5_5_5[2][0]) == 0);
+  ELIM (strlen (d5_5_5[2][1]) == 0);
+  ELIM (strlen (d5_5_5[2][2]) == 0);
+  ELIM (strlen (d5_5_5[2][3]) == 3);
+  ELIM (strlen (d5_5_5[2][4]) == 0);
+
+  ELIM (strlen (d5_5_5[3][0]) == 0);
+  ELIM (strlen (d5_5_5[3][1]) == 0);
+  ELIM (strlen (d5_5_5[3][2]) == 0);
+  ELIM (strlen (d5_5_5[3][3]) == 0);
+  ELIM (strlen (d5_5_5[3][4]) == 0);
+
+  ELIM (strlen (d5_5_5[4][0]) == 0);
+  ELIM (strlen (d5_5_5[4][1]) == 0);
+  ELIM (strlen (d5_5_5[4][2]) == 0);
+  ELIM (strlen (d5_5_5[4][3]) == 0);
+  ELIM (strlen (d5_5_5[4][4]) == 0);
+
+  ELIM (strlen (a3_6_17[0][0]) == 0);
+  ELIM (strlen (a3_6_17[0][1]) == 1);
+  ELIM (strlen (a3_6_17[0][2]) == 2);
+  ELIM (strlen (a3_6_17[0][3]) == 3);
+  ELIM (strlen (a3_6_17[0][4]) == 4);
+  ELIM (strlen (a3_6_17[0][5]) == 5);
+  ELIM (strlen (a3_6_17[1][0]) == 6);
+  ELIM (strlen (a3_6_17[1][1]) == 7);
+  ELIM (strlen (a3_6_17[1][2]) == 8);
+  ELIM (strlen (a3_6_17[1][3]) == 9);
+  ELIM (strlen (a3_6_17[1][4]) == 10);
+  ELIM (strlen (a3_6_17[1][5]) == 11);
+  ELIM (strlen (a3_6_17[2][0]) == 12);
+  ELIM (strlen (a3_6_17[2][1]) == 13);
+  ELIM (strlen (a3_6_17[2][2]) == 14);
+  ELIM (strlen (a3_6_17[2][3]) == 15);
+  ELIM (strlen (a3_6_17[2][4]) == 16);
+  ELIM (strlen (a3_6_17[2][5]) == 17);
+}
+
+const struct {
+  char a[10];
+  int i;
+} ms10 [] = {
+  { "123456789", 9 },
+  { "12345678", 8 },
+  { "1234567", 7 },
+  { "123456", 6 },
+  { "12345", 5 },
+  { "1234", 4 },
+  { "123", 3 },
+  { "12", 2 },
+  { "1", 1 },
+  { "", 0 },
+};
+
+struct MemberStringArrays
+{
+  char a5_5[5][5];
+  const char* a3_6_17[3][6];
+};
+
+const struct MemberStringArrays ms = {
+  .a5_5[1] = "1234",
+  .a5_5[3] = "1",
+  .a3_6_17[1][2] = S (12)
+};
+
+void elim_object_member_arrays (void)
+{
+  ELIM (strlen (ms10[0].a) == ms10[0].i);
+  ELIM (strlen (ms10[1].a) == ms10[1].i);
+  ELIM (strlen (ms10[2].a) == ms10[2].i);
+  ELIM (strlen (ms10[3].a) == ms10[3].i);
+  ELIM (strlen (ms10[4].a) == ms10[4].i);
+  ELIM (strlen (ms10[5].a) == ms10[5].i);
+  ELIM (strlen (ms10[6].a) == ms10[6].i);
+  ELIM (strlen (ms10[7].a) == ms10[7].i);
+  ELIM (strlen (ms10[8].a) == ms10[8].i);
+  ELIM (strlen (ms10[9].a) == ms10[9].i);
+
+  ELIM (strlen (ms.a5_5[0]) == 0);
+  ELIM (strlen (ms.a5_5[1]) == 4);
+  ELIM (strlen (ms.a5_5[2]) == 0);
+  ELIM (strlen (ms.a5_5[3]) == 1);
+  ELIM (strlen (ms.a5_5[4]) == 0);
+
+  ELIM (strlen (ms.a3_6_17[1][2]) == 12);
+}
+
+
+const struct MemberStringArrays ma[] = {
+  {
+    { "", "1", "12", "123", "1234" },
+    {
+      { S (0), S (1), S (2), S (3), S (4), S (5) },
+      { S (6), S (7), S (8), S (9), S (10), S (11) },
+      { S (12), S (13), S (14), S (15), S (16), S (17) },
+    }
+  },
+
+  {
+    { "1234", "123", "12", "1", "" },
+    {
+      { S (17), S (16), S (15), S (14), S (13), S (12) },
+      { S (11), S (10), S (9), S (8), S (7) , S (6) },
+      { S (5), S (4), S (3), S (2), S (1), S (0) },
+    }
+  },
+
+  {
+    { [3] = "123" },
+    .a3_6_17 [1][3] = S (13)
+  }
+};
+
+void elim_array_member_arrays (void)
+{
+  ELIM (strlen (ma[0].a5_5[0]) == 0);
+  ELIM (strlen (ma[0].a5_5[1]) == 1);
+  ELIM (strlen (ma[0].a5_5[2]) == 2);
+  ELIM (strlen (ma[0].a5_5[3]) == 3);
+  ELIM (strlen (ma[0].a5_5[4]) == 4);
+
+  ELIM (strlen (ma[0].a3_6_17[0][0]) == 0);
+  ELIM (strlen (ma[0].a3_6_17[0][1]) == 1);
+  ELIM (strlen (ma[0].a3_6_17[0][2]) == 2);
+  ELIM (strlen (ma[0].a3_6_17[0][3]) == 3);
+  ELIM (strlen (ma[0].a3_6_17[0][4]) == 4);
+  ELIM (strlen (ma[0].a3_6_17[0][5]) == 5);
+  ELIM (strlen (ma[0].a3_6_17[1][0]) == 6);
+  ELIM (strlen (ma[0].a3_6_17[1][1]) == 7);
+  ELIM (strlen (ma[0].a3_6_17[1][2]) == 8);
+  ELIM (strlen (ma[0].a3_6_17[1][3]) == 9);
+  ELIM (strlen (ma[0].a3_6_17[1][4]) == 10);
+  ELIM (strlen (ma[0].a3_6_17[1][5]) == 11);
+  ELIM (strlen (ma[0].a3_6_17[2][0]) == 12);
+  ELIM (strlen (ma[0].a3_6_17[2][1]) == 13);
+  ELIM (strlen (ma[0].a3_6_17[2][2]) == 14);
+  ELIM (strlen (ma[0].a3_6_17[2][3]) == 15);
+  ELIM (strlen (ma[0].a3_6_17[2][4]) == 16);
+  ELIM (strlen (ma[0].a3_6_17[2][5]) == 17);
+
+  ELIM (strlen (ma[1].a5_5[0]) == 4);
+  ELIM (strlen (ma[1].a5_5[1]) == 3);
+  ELIM (strlen (ma[1].a5_5[2]) == 2);
+  ELIM (strlen (ma[1].a5_5[3]) == 1);
+  ELIM (strlen (ma[1].a5_5[4]) == 0);
+
+  ELIM (strlen (ma[1].a3_6_17[0][0]) == 17);
+  ELIM (strlen (ma[1].a3_6_17[0][1]) == 16);
+  ELIM (strlen (ma[1].a3_6_17[0][2]) == 15);
+  ELIM (strlen (ma[1].a3_6_17[0][3]) == 14);
+  ELIM (strlen (ma[1].a3_6_17[0][4]) == 13);
+  ELIM (strlen (ma[1].a3_6_17[0][5]) == 12);
+  ELIM (strlen (ma[1].a3_6_17[1][0]) == 11);
+  ELIM (strlen (ma[1].a3_6_17[1][1]) == 10);
+  ELIM (strlen (ma[1].a3_6_17[1][2]) == 9);
+  ELIM (strlen (ma[1].a3_6_17[1][3]) == 8);
+  ELIM (strlen (ma[1].a3_6_17[1][4]) == 7);
+  ELIM (strlen (ma[1].a3_6_17[1][5]) == 6);
+  ELIM (strlen (ma[1].a3_6_17[2][0]) == 5);
+  ELIM (strlen (ma[1].a3_6_17[2][1]) == 4);
+  ELIM (strlen (ma[1].a3_6_17[2][2]) == 3);
+  ELIM (strlen (ma[1].a3_6_17[2][3]) == 2);
+  ELIM (strlen (ma[1].a3_6_17[2][4]) == 1);
+  ELIM (strlen (ma[1].a3_6_17[2][5]) == 0);
+
+  ELIM (strlen (ma[2].a5_5[0]) == 0);
+  ELIM (strlen (ma[2].a5_5[1]) == 0);
+  ELIM (strlen (ma[2].a5_5[2]) == 0);
+  ELIM (strlen (ma[2].a5_5[3]) == 3);
+  ELIM (strlen (ma[2].a5_5[4]) == 0);
+
+  ELIM (strlen (ma[2].a3_6_17[1][3]) == 13);
+}
+
+
+#line 10000
+
+char modstr[5] = "1234";
+
+const char* const xa[][3] = {
+  { "", "1", "12" },
+  { "123", modstr, "12345" }
+};
+
+void keep_arrays (void)
+{
+  ELIM (strlen (xa[0][0]) == 0);
+  ELIM (strlen (xa[0][1]) == 1);
+  ELIM (strlen (xa[0][2]) == 2);
+  ELIM (strlen (xa[1][0]) == 3);
+  /* The following refers to an initialized non-const character array
+     so the strlen() expression must not be eliminated.  */
+  KEEP (strlen (xa[1][1]) == 4);
+  ELIM (strlen (xa[1][2]) == 5);
+}
+
+/* { dg-final { scan-tree-dump-times "_not_eliminated" 0 "optimized" } }
+   { dg-final { scan-tree-dump-times "call_made_in_true_branch_on_line_1\[0-9\]\[0-9\]\[0-9\]" 1 "optimized" } }
+   { dg-final { scan-tree-dump-times "call_made_in_false_branch_on_line_1\[0-9\]\[0-9\]\[0-9\]" 1 "optimized" } } */
+
diff --git a/gcc/testsuite/gcc.dg/strlenopt-43.c b/gcc/testsuite/gcc.dg/strlenopt-43.c
new file mode 100644
index 0000000..d1b37d4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/strlenopt-43.c
@@ -0,0 +1,257 @@
+/* PR tree-optimization/83693 - missing strlen optimization for array
+   of arrays
+   Test to verify that strlen() calls with arguments that are copies
+   of constant arrays of arrays, made either by strcpy() or sprintf(),
+   are folded as if the copies were made from string literals.
+   { dg-do compile }
+   { dg-options "-O2 -fdump-tree-optimized" } */
+
+#include "strlenopt.h"
+
+extern int snprintf (char*, size_t, const char*, ...);
+
+#define CAT(x, y) x ## y
+#define CONCAT(x, y) CAT (x, y)
+#define FAILNAME(name) CONCAT (call_ ## name ##_on_line_, __LINE__)
+
+#define FAIL(name) do {				\
+    extern void FAILNAME (name) (void);		\
+    FAILNAME (name)();				\
+  } while (0)
+
+/* Macro to emit a call to function named
+     call_in_true_branch_not_eliminated_on_line_NNN()
+   for each call that's expected to be eliminated.  The dg-final
+   scan-tree-dump-time directive at the bottom of the test verifies
+   that no such call appears in output.  */
+#define ELIM(expr)					\
+  if (!(expr)) FAIL (in_true_branch_not_eliminated);	\
+  else (void)0
+
+/* Macro to emit a call to a function named
+     call_made_in_{true,false}_branch_on_line_NNN()
+   for each call that's expected to be retained.  The dg-final
+   scan-tree-dump-time directive at the bottom of the test verifies
+   that the expected number of both kinds of calls appears in output
+   (a pair for each line with the invocation of the KEEP() macro.  */
+#define KEEP(expr)				\
+  if (expr)					\
+    FAIL (made_in_true_branch);			\
+  else						\
+    FAIL (made_in_false_branch)
+
+
+struct MemArray {
+  char a[2][3][10];
+};
+
+const struct MemArray obj = {
+  .a[1][2] = "12345678",
+  .a[0][1] = "12345"
+};
+
+const struct MemArray arr[][3] = {
+  [1][1] = { .a[1][2] = "12345678" },
+  [0][0] = { .a[0][0] = "1234" }
+};
+
+void elim_memcpy_obj (void)
+{
+  char d[10];
+
+  memcpy (d, obj.a[0][0], 4);
+  ELIM (strlen (d) == 0);
+
+  memcpy (d, obj.a[0][1], 6);
+  ELIM (strlen (d) == 5);
+
+  memcpy (d, obj.a[0][2], 6);
+  ELIM (strlen (d) == 0);
+
+  memcpy (d, obj.a[1][0], 7);
+  ELIM (strlen (d) == 0);
+
+  memcpy (d, obj.a[1][1], 8);
+  ELIM (strlen (d) == 0);
+
+  memcpy (d, obj.a[1][2], 9);
+  ELIM (strlen (d) == 8);
+}
+
+void elim_memcpy_array (void)
+{
+  char d[10];
+
+  memcpy (d, arr[0][0].a[0][0], 9);
+  ELIM (strlen (d) == 4);
+
+  memcpy (d, arr[0][0].a[0][1], 9);
+  ELIM (strlen (d) == 0);
+
+  memcpy (d, arr[0][0].a[1][0], 9);
+  ELIM (strlen (d) == 0);
+
+  memcpy (d, arr[1][1].a[0][0], 9);
+  ELIM (strlen (d) == 0);
+
+  memcpy (d, arr[1][1].a[0][1], 9);
+  ELIM (strlen (d) == 0);
+
+  memcpy (d, arr[1][1].a[1][0], 9);
+  ELIM (strlen (d) == 0);
+
+  memcpy (d, arr[1][1].a[1][1], 9);
+  ELIM (strlen (d) == 0);
+
+  memcpy (d, arr[1][1].a[1][2], 9);
+  ELIM (strlen (d) == 8);
+}
+
+void elim_strcpy_array (void)
+{
+  char d[10];
+
+  strcpy (d, arr[0][0].a[0][0]);
+  ELIM (strlen (d) == 4);
+
+  strcpy (d, arr[0][0].a[0][1]);
+  ELIM (strlen (d) == 0);
+
+  strcpy (d, arr[0][0].a[1][0]);
+  ELIM (strlen (d) == 0);
+
+  strcpy (d, arr[1][1].a[0][0]);
+  ELIM (strlen (d) == 0);
+
+  strcpy (d, arr[1][1].a[0][1]);
+  ELIM (strlen (d) == 0);
+
+  strcpy (d, arr[1][1].a[1][0]);
+  ELIM (strlen (d) == 0);
+
+  strcpy (d, arr[1][1].a[1][1]);
+  ELIM (strlen (d) == 0);
+
+  strcpy (d, arr[1][1].a[1][2]);
+  ELIM (strlen (d) == 8);
+}
+
+void elim_snprintf_array (void)
+{
+  int n;
+  n = snprintf (0, 0, "%s", arr[0][0].a[0][0]);
+  ELIM (n == 4);
+
+  n = snprintf (0, 0, "%s5", arr[0][0].a[0][0]);
+  ELIM (n == 5);
+
+  n = snprintf (0, 0, "%s", arr[0][0].a[0][1]);
+  ELIM (n == 0);
+
+  n = snprintf (0, 0, "%s56", arr[0][0].a[0][1]);
+  ELIM (n == 2);
+}
+
+#line 1000
+
+char str[] = "666";
+
+const struct {
+  char *p[2][3];
+} ptr[][2] = {
+  [1][1] = { .p[1][2] = "12345678" },
+  [0][1] = { .p[1][2] = str },
+  [0][0] = { .p[0][0] = "1234" }
+};
+
+void keep_strcpy_ptr (void)
+{
+  char d[10];
+
+  strcpy (d, ptr[0][1].p[1][2]);
+  KEEP (strlen (d) == 3);
+
+  strcpy (d, ptr[1][1].p[1][2]);
+  ELIM (strlen (d) == 8);
+}
+
+void keep_snprintf_ptr (void)
+{
+  int n = snprintf (0, 0, "%s", ptr[0][1].p[1][2]);
+  KEEP (n == 3);
+}
+
+__WCHAR_TYPE__ wstr[] = L"666";
+
+const struct {
+  __WCHAR_TYPE__ a5[5];
+  __WCHAR_TYPE__ a5_5[5][5];
+  __WCHAR_TYPE__ *p5[5];
+} warr[3] = {
+  [1] = {
+    .a5 = L"1234",
+    .a5_5 = { L"", L"1", L"12", L"123", L"1234" },
+    .p5 = { L"1234", wstr, L"12", L"1" }
+  }
+};
+
+
+void keep_snprintf_wide (void)
+{
+  ELIM (0 == snprintf (0, 0, "%ls", warr[0].a5));
+  ELIM (0 == snprintf (0, 0, "%ls", warr[2].a5));
+
+  /* A non-empty wide string could, in theory, convert into no
+     bytes on output, but no known encoding exists where L"1234"
+     will convert into more than 4 * MB_CHAR_MAX which is (in
+     all known environments) 24.  */
+  int n = snprintf (0, 0, "%ls", warr[1].a5);
+  KEEP (n == 4);
+  ELIM (n < 25);
+
+  ELIM (0 == snprintf (0, 0, "%ls", warr[1].a5_5[0]));
+
+  n = snprintf (0, 0, "%ls", warr[1].a5_5[1]);
+  KEEP (n == 1);
+  ELIM (n < 7);
+
+  n = snprintf (0, 0, "%ls", warr[1].a5_5[2]);
+  KEEP (n == 2);
+  ELIM (n < 13);
+
+  n = snprintf (0, 0, "%ls", warr[1].a5_5[3]);
+  KEEP (n == 3);
+  ELIM (n < 19);
+
+  n = snprintf (0, 0, "%ls", warr[1].a5_5[4]);
+  KEEP (n == 4);
+  ELIM (n < 25);
+
+  n = snprintf (0, 0, "%ls", warr[1].p5[0]);
+  KEEP (n == 4);
+  ELIM (n < 25);
+
+  /* warr[1].p5[1] points to the non-const wstr which could have
+     changed after it was initialized.  */
+  n = snprintf (0, 0, "%ls", warr[1].p5[1]);
+  KEEP (n == 4);
+  KEEP (n < 19);
+  KEEP (n > 19);
+
+  n = snprintf (0, 0, "%ls", warr[1].p5[2]);
+  KEEP (n == 2);
+  ELIM (n < 13);
+
+  n = snprintf (0, 0, "%ls", warr[1].p5[3]);
+  KEEP (n == 1);
+  ELIM (n < 7);
+
+  /* The following is undefined because warr[1].p5[4] is not initialized
+     but the test verifies it's handled gracefully (i.e., doesn't ICE).  */
+  if (wstr[1] && 0 == snprintf (0, 0, "%ls", warr[1].p5[4]))
+    wstr[0] = 0;
+}
+
+/* { dg-final { scan-tree-dump-times "_not_eliminated" 0 "optimized" } }
+   { dg-final { scan-tree-dump-times "call_made_in_false_branch_on_line_1\[0-9\]\[0-9\]\[0-9\]" 13 "optimized" } }
+   { dg-final { scan-tree-dump-times "call_made_in_true_branch_on_line_1\[0-9\]\[0-9\]\[0-9\]" 13 "optimized" } } */

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] fold strlen of constant aggregates (PR 83693)
  2018-01-09 21:43     ` Martin Sebor
@ 2018-01-12 22:05       ` Martin Sebor
  2018-04-30 17:24       ` Jeff Law
  1 sibling, 0 replies; 7+ messages in thread
From: Martin Sebor @ 2018-01-12 22:05 UTC (permalink / raw)
  To: Richard Biener, Gcc Patch List

Ping in advance of stage 3 closing:
   https://gcc.gnu.org/ml/gcc-patches/2018-01/msg00680.html

(As the majority of the strlen issues I raised recently, this
one also came out of my testing of the -Wrray-bounds and
-Wrestrict warnings newly enhanced in GCC 8.)

On 01/09/2018 02:41 PM, Martin Sebor wrote:
> I found a few problems in the previous revision:
>
> 1) It didn't handle the simple case of member arrays of const
>    struct objects (only member arrays of  const arrays of structs
>    were handled).
> 2) The array_ctor_elt() function returned a narrow empty string
>    for an uninitialized CONSTRUCTOR element of any character type
>    when it should return the same string in the expected character
>    type (char, wchar_t, etc.)
> 3) The string_constant() function would in some cases use a byte
>    offset to get the initializer from a CONSTRUCTOR instead of
>    an array index.
>
> The attached version 3 of the patch corrects these issues.
> Retested on x86_64 and with the Glibc ToT.
>
>> After sleeping on it I realized that although enhancing
>> gimple_fold_builtin_strlen is an improvement, it only benefits
>> straight calls to strlen and nothing else.  Calls to strcmp,
>> sprintf, or strcpy (and along with it the rest of the strlen
>> pass) are still expanded as if the argument were unknown.  To
>> improve even those callers, the folding needs to be done at
>> a lower level (otherwise they'd all have to duplicate the same
>> code as gimple_fold_builtin_strlen).  With that in mind I've
>> moved the logic to string_constant() so all of those clients
>> benefit.
>>
>> Retested on x86_64-linux.  Out of paranoia I also built and
>> tested the top of Glibc trunk with no unusual failures.
>>
>> Martin
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] fold strlen of constant aggregates (PR 83693)
  2018-01-09 21:43     ` Martin Sebor
  2018-01-12 22:05       ` Martin Sebor
@ 2018-04-30 17:24       ` Jeff Law
  2018-05-02 10:00         ` Richard Biener
  1 sibling, 1 reply; 7+ messages in thread
From: Jeff Law @ 2018-04-30 17:24 UTC (permalink / raw)
  To: Martin Sebor, Richard Biener; +Cc: Gcc Patch List

On 01/09/2018 02:41 PM, Martin Sebor wrote:
> I found a few problems in the previous revision:
> 
> 1) It didn't handle the simple case of member arrays of const
>    struct objects (only member arrays of  const arrays of structs
>    were handled).
> 2) The array_ctor_elt() function returned a narrow empty string
>    for an uninitialized CONSTRUCTOR element of any character type
>    when it should return the same string in the expected character
>    type (char, wchar_t, etc.)
> 3) The string_constant() function would in some cases use a byte
>    offset to get the initializer from a CONSTRUCTOR instead of
>    an array index.
> 
> The attached version 3 of the patch corrects these issues.
> Retested on x86_64 and with the Glibc ToT.
> 
>> After sleeping on it I realized that although enhancing
>> gimple_fold_builtin_strlen is an improvement, it only benefits
>> straight calls to strlen and nothing else.  Calls to strcmp,
>> sprintf, or strcpy (and along with it the rest of the strlen
>> pass) are still expanded as if the argument were unknown.  To
>> improve even those callers, the folding needs to be done at
>> a lower level (otherwise they'd all have to duplicate the same
>> code as gimple_fold_builtin_strlen).  With that in mind I've
>> moved the logic to string_constant() so all of those clients
>> benefit.
>>
>> Retested on x86_64-linux.  Out of paranoia I also built and
>> tested the top of Glibc trunk with no unusual failures.
>>
>> Martin
> 
> 
> gcc-83693.diff
> 
> 
> PR tree-optimization/83693 - missing strlen optimization for array of arrays
> 
> gcc/ChangeLog:
> 
> 	PR tree-optimization/83693
> 	* expr.c (array_ctor_elt): New function.
> 	(string_constant): Call it.  Handle initializers of arrays of arrays.
> 
> gcc/testsuite/ChangeLog:
> 
> 	PR tree-optimization/83693
> 	* gcc.dg/strcmp-2.c: New test.
> 	* gcc.dg/strlenopt-42.c: New test.
> 	* gcc.dg/strlenopt-43.c: New test.
> 
> diff --git a/gcc/expr.c b/gcc/expr.c
> index cd1e57d..75110e5 100644
> --- a/gcc/expr.c
> +++ b/gcc/expr.c
> @@ -62,7 +62,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "rtl-chkp.h"
>  #include "ccmp.h"
>  #include "rtx-vector-builder.h"
> -
> +#include "gimple-fold.h"
>  
>  /* If this is nonzero, we do not bother generating VOLATILE
>     around volatile memory references, and we are willing to
> @@ -11343,6 +11343,50 @@ is_aligning_offset (const_tree offset, const_tree exp)
>    return TREE_CODE (offset) == ADDR_EXPR && TREE_OPERAND (offset, 0) == exp;
>  }
>  
> +/* Return initializer element IDX for the array CONSTRUCTOR initializer
> +   INIT or an empty string constant with type CHARTYPE if no such element
> +   exists.  If IDX is null, simply return an empty string.  If IDX is not
> +   constant, return NULL_TREE.  A helper of string_constant.  */
> +
> +static tree
> +array_ctor_elt (tree chartype, tree init, tree idx)
> +{
> +  if (idx)
> +    {
> +      if (!tree_fits_uhwi_p (idx))
> +	return NULL_TREE;
> +
> +      HOST_WIDE_INT i = tree_to_uhwi (idx);
> +
> +      if (i < CONSTRUCTOR_NELTS (init)
> +	  && tree_int_cst_equal (CONSTRUCTOR_ELT (init, i)->index, idx))
> +	return CONSTRUCTOR_ELT (init, i)->value;
> +
> +      tree index, value;
> +      FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (init), i, index, value)
> +	{
> +	  if (tree_int_cst_equal (index, idx))
> +	    return value;
> +	}
So you first look at IDX and if it patches you return the appropriate value.

Else you iterate from the start of the constructor list through the
elements until you find a match.

Would a binary search between 0..IDX be better here?  Do we have any
imposed ordering on the elements?

> +    }
> +
> +  /* Build and return a STRING_CST representing the empty string with
> +     CHARTYPE.  Make sure the string representation has enough zero
> +     bytes for CHARTYPE.  */
> +  const char nuls[16] = "";
> +  unsigned elemsize = tree_to_uhwi (TYPE_SIZE_UNIT(chartype));
> +  tree str = build_string (elemsize, nuls);
> +  tree elemtype = build_qualified_type (chartype, TYPE_QUAL_CONST);
> +  tree indextype = build_index_type (size_zero_node);
> +  tree arraytype = build_array_type (elemtype, indextype);
> +  TREE_TYPE (str) = arraytype;
> +  TREE_CONSTANT (str) = true;
> +  TREE_READONLY (str) = true;
> +  TREE_STATIC (str) = true;
I'm a but surprised we don't have a suitable string constant lying
around, but I don't see one.


So really the only concern is the compile-time cost of array_ctor_elt.
If we know anything about the ordering of elements within the
constructor, then we could do a lot better WRT the compile-time cost.

jeff

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] fold strlen of constant aggregates (PR 83693)
  2018-04-30 17:24       ` Jeff Law
@ 2018-05-02 10:00         ` Richard Biener
  0 siblings, 0 replies; 7+ messages in thread
From: Richard Biener @ 2018-05-02 10:00 UTC (permalink / raw)
  To: Jeff Law; +Cc: Martin Sebor, Gcc Patch List

On Mon, Apr 30, 2018 at 7:16 PM, Jeff Law <law@redhat.com> wrote:
> On 01/09/2018 02:41 PM, Martin Sebor wrote:
>> I found a few problems in the previous revision:
>>
>> 1) It didn't handle the simple case of member arrays of const
>>    struct objects (only member arrays of  const arrays of structs
>>    were handled).
>> 2) The array_ctor_elt() function returned a narrow empty string
>>    for an uninitialized CONSTRUCTOR element of any character type
>>    when it should return the same string in the expected character
>>    type (char, wchar_t, etc.)
>> 3) The string_constant() function would in some cases use a byte
>>    offset to get the initializer from a CONSTRUCTOR instead of
>>    an array index.
>>
>> The attached version 3 of the patch corrects these issues.
>> Retested on x86_64 and with the Glibc ToT.
>>
>>> After sleeping on it I realized that although enhancing
>>> gimple_fold_builtin_strlen is an improvement, it only benefits
>>> straight calls to strlen and nothing else.  Calls to strcmp,
>>> sprintf, or strcpy (and along with it the rest of the strlen
>>> pass) are still expanded as if the argument were unknown.  To
>>> improve even those callers, the folding needs to be done at
>>> a lower level (otherwise they'd all have to duplicate the same
>>> code as gimple_fold_builtin_strlen).  With that in mind I've
>>> moved the logic to string_constant() so all of those clients
>>> benefit.
>>>
>>> Retested on x86_64-linux.  Out of paranoia I also built and
>>> tested the top of Glibc trunk with no unusual failures.
>>>
>>> Martin
>>
>>
>> gcc-83693.diff
>>
>>
>> PR tree-optimization/83693 - missing strlen optimization for array of arrays
>>
>> gcc/ChangeLog:
>>
>>       PR tree-optimization/83693
>>       * expr.c (array_ctor_elt): New function.
>>       (string_constant): Call it.  Handle initializers of arrays of arrays.
>>
>> gcc/testsuite/ChangeLog:
>>
>>       PR tree-optimization/83693
>>       * gcc.dg/strcmp-2.c: New test.
>>       * gcc.dg/strlenopt-42.c: New test.
>>       * gcc.dg/strlenopt-43.c: New test.
>>
>> diff --git a/gcc/expr.c b/gcc/expr.c
>> index cd1e57d..75110e5 100644
>> --- a/gcc/expr.c
>> +++ b/gcc/expr.c
>> @@ -62,7 +62,7 @@ along with GCC; see the file COPYING3.  If not see
>>  #include "rtl-chkp.h"
>>  #include "ccmp.h"
>>  #include "rtx-vector-builder.h"
>> -
>> +#include "gimple-fold.h"
>>
>>  /* If this is nonzero, we do not bother generating VOLATILE
>>     around volatile memory references, and we are willing to
>> @@ -11343,6 +11343,50 @@ is_aligning_offset (const_tree offset, const_tree exp)
>>    return TREE_CODE (offset) == ADDR_EXPR && TREE_OPERAND (offset, 0) == exp;
>>  }
>>
>> +/* Return initializer element IDX for the array CONSTRUCTOR initializer
>> +   INIT or an empty string constant with type CHARTYPE if no such element
>> +   exists.  If IDX is null, simply return an empty string.  If IDX is not
>> +   constant, return NULL_TREE.  A helper of string_constant.  */
>> +
>> +static tree
>> +array_ctor_elt (tree chartype, tree init, tree idx)
>> +{
>> +  if (idx)
>> +    {
>> +      if (!tree_fits_uhwi_p (idx))
>> +     return NULL_TREE;
>> +
>> +      HOST_WIDE_INT i = tree_to_uhwi (idx);
>> +
>> +      if (i < CONSTRUCTOR_NELTS (init)
>> +       && tree_int_cst_equal (CONSTRUCTOR_ELT (init, i)->index, idx))
>> +     return CONSTRUCTOR_ELT (init, i)->value;
>> +
>> +      tree index, value;
>> +      FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (init), i, index, value)
>> +     {
>> +       if (tree_int_cst_equal (index, idx))
>> +         return value;
>> +     }
> So you first look at IDX and if it patches you return the appropriate value.
>
> Else you iterate from the start of the constructor list through the
> elements until you find a match.
>
> Would a binary search between 0..IDX be better here?  Do we have any
> imposed ordering on the elements?

There is also get_array_ctor_element_at_index you can simply use.

>> +    }
>> +
>> +  /* Build and return a STRING_CST representing the empty string with
>> +     CHARTYPE.  Make sure the string representation has enough zero
>> +     bytes for CHARTYPE.  */
>> +  const char nuls[16] = "";
>> +  unsigned elemsize = tree_to_uhwi (TYPE_SIZE_UNIT(chartype));
>> +  tree str = build_string (elemsize, nuls);
>> +  tree elemtype = build_qualified_type (chartype, TYPE_QUAL_CONST);
>> +  tree indextype = build_index_type (size_zero_node);
>> +  tree arraytype = build_array_type (elemtype, indextype);
>> +  TREE_TYPE (str) = arraytype;
>> +  TREE_CONSTANT (str) = true;
>> +  TREE_READONLY (str) = true;
>> +  TREE_STATIC (str) = true;
> I'm a but surprised we don't have a suitable string constant lying
> around, but I don't see one.

It's ugly.  Is it really necessary to do this?

Richard.

>
> So really the only concern is the compile-time cost of array_ctor_elt.
> If we know anything about the ordering of elements within the
> constructor, then we could do a lot better WRT the compile-time cost.
>
> jeff

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2018-05-02 10:00 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-01-08  2:11 [PATCH] fold strlen of constant aggregates (PR 83693) Martin Sebor
2018-01-08 11:41 ` Richard Biener
2018-01-09  1:20   ` Martin Sebor
2018-01-09 21:43     ` Martin Sebor
2018-01-12 22:05       ` Martin Sebor
2018-04-30 17:24       ` Jeff Law
2018-05-02 10:00         ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).