public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] enhance strlen to understand MEM_REF and partial overlaps (PR 86042, 86043)
@ 2018-06-07 15:57 Martin Sebor
  2018-06-29 17:06 ` Jeff Law
  0 siblings, 1 reply; 8+ messages in thread
From: Martin Sebor @ 2018-06-07 15:57 UTC (permalink / raw)
  To: Gcc Patch List

[-- Attachment #1: Type: text/plain, Size: 1057 bytes --]

The attached patch enhances the strlen pass to more consistently
deal with MEM_REF assignments (PR 86042) and to track string
lengths across calls to memcpy that overwrite parts of a string
with sequences of non-nul characters (PR 86043).

Fixes for both bugs rely on changes to the same code so I chose
to include them in the same patch.

To fix PR 86042 the patch extends handle_char_store() to deal with
more forms of multi-character assignments from MEM_REF (originally
introduced in r256180).  To handle assignments from strings of
multiple nuls the patch also extends the initializer_zerop()
function to understand MEM_REFs of the form:

    MEM[(char * {ref-all})&a] = MEM[(char * {ref-all})"..."];

The solution for PR 86043 consists of two parts: the extension
above which lets handle_char_store() recognize assignments of
sequences of non-null characters that overwrite some portion of
the leading non-zero characters in the destination and avoid
discarding the destination information, and a similar extension
to handle_builtin_memcpy().

Martin

[-- Attachment #2: gcc-86042.diff --]
[-- Type: text/x-patch, Size: 16115 bytes --]

PR tree-optimization/86042 - missing strlen optimization after second strcpy

gcc/ChangeLog:

	PR tree-optimization/86042
	* tree-ssa-strlen.c (handle_builtin_memcpy): Handle strict overlaps.
	(get_string_cst_length): Rename...
	(get_min_string_length): ...to this.  Add argument.
	(handle_char_store): Extend to handle multi-character stores by
	MEM_REF.
	* tree.c (initializer_zerop): Use new argument.  Handle MEM_REF.
	* tree.h (initializer_zerop): Add argument.

gcc/testsuite/ChangeLog:

	PR tree-optimization/86042
	* gcc.dg/strlenopt-44.c: New test.

diff --git a/gcc/testsuite/gcc.dg/strlenopt-44.c b/gcc/testsuite/gcc.dg/strlenopt-44.c
new file mode 100644
index 0000000..3479746
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/strlenopt-44.c
@@ -0,0 +1,109 @@
+/* PR tree-optimization/86042 - missing strlen optimization after second strcpy
+   { dg-do compile }
+   { dg-options "-O2 -Wall -fdump-tree-optimized" } */
+
+#include "strlenopt.h"
+
+#define CAT(x, y) x ## y
+#define CONCAT(x, y) CAT (x, y)
+#define FAILNAME(name) CONCAT (call_ ## name ##_on_line_, __LINE__)
+
+#define FAIL(name) do {				\
+    extern void FAILNAME (name) (void);		\
+    FAILNAME (name)();				\
+  } while (0)
+
+/* Macro to emit a call to funcation named
+     call_in_true_branch_not_eliminated_on_line_NNN()
+   for each call that's expected to be eliminated.  The dg-final
+   scan-tree-dump-time directive at the bottom of the test verifies
+   that no such call appears in output.  */
+#define ELIM(expr) \
+  if (!(expr)) FAIL (in_true_branch_not_eliminated); else (void)0
+
+void elim_after_duplicate_strcpy (void)
+{
+#define T(N, assign, off, str, r0, r1)		\
+  do {						\
+    char a[N];					\
+    strcpy (a, assign);				\
+    unsigned n0 = strlen (a);			\
+    strcpy (a + off, str);			\
+    unsigned n1 = strlen (a);			\
+    ELIM (n0 == r0 && n1 == r1);		\
+  } while (0)
+
+  T (2, "",   0, "1",   0, 1);
+
+  T (2, "1",  0, "2",   1, 1);
+  T (2, "1",  1, "",    1, 1);
+
+  T (3, "\0", 0, "1",   0, 1);
+  T (3, "1",  1, "2",   1, 2);
+
+  T (3, "12", 0, "23",  2, 2);
+  T (3, "12", 1, "3",   2, 2);
+  T (3, "12", 2, "",    2, 2);
+
+  T (4, "1",   1, "23",  1, 3);
+  T (4, "12",  1, "23",  2, 3);
+
+  T (4, "123", 0, "234", 3, 3);
+  T (4, "123", 1, "34",  3, 3);
+  T (4, "123", 2, "4",   3, 3);
+  T (4, "123", 3, "",    3, 3);
+
+  T (5, "1234", 0, "1",    4, 1);
+  T (5, "1234", 0, "12",   4, 2);
+  T (5, "1234", 0, "123",  4, 3);
+  T (5, "1234", 0, "1234", 4, 4);
+
+  T (5, "123",  1, "234", 3, 4);
+  T (6, "123",  2, "234", 3, 5);
+}
+
+void elim_after_init_memcpy (void)
+{
+#undef T
+#define T(init, off, str, n, res)		\
+  do {						\
+    char a[] = init;				\
+    memcpy (a + off, str, n);			\
+    ELIM (strlen (a) == res);			\
+  } while (0)
+
+  T ("\0",   0, "1",    2, 1);
+  T ("\0\0", 0, "12",   3, 2);
+
+#define INIT { '1', '2', '3', '4' }
+  T (INIT,   0, "",     1, 0);
+  T (INIT,   0, "1",    2, 1);
+  T (INIT,   0, "12",   3, 2);
+  T (INIT,   0, "123",  4, 3);
+
+  T ("1234", 0, "2",    1, 4);
+  T ("1234", 0, "23",   2, 4);
+  T ("1234", 0, "234",  3, 4);
+  T ("1234", 0, "2345", 4, 4);
+  T ("1234", 0, "2345", 5, 4);
+
+  T ("1234", 1, "2",    1, 4);
+  T ("1234", 1, "23",   2, 4);
+  T ("1234", 1, "234",  3, 4);
+  T ("1234", 1, "234",  4, 4);
+
+  T ("1234", 2, "3",    1, 4);
+  T ("1234", 2, "3",    2, 3);
+  T ("1234", 2, "34",   2, 4);
+  T ("1234", 2, "34",   3, 4);
+
+  T ("12\00034", 0, "1", 1, 2);
+  T ("12\00034", 0, "1", 2, 1);
+
+  T ("AB\000CD", 0, "ab", 2, 2);
+  T ("AB\000CD", 0, "ab", 3, 2);
+  T ("AB\000CD", 0, "ab\000c", 4, 2);
+}
+
+/* { dg-final { scan-tree-dump-times "strlen" 0 "optimized" } }
+   { dg-final { scan-tree-dump-times "call_in_true_branch_not_eliminated" 0 "optimized" } } */
diff --git a/gcc/tree-ssa-strlen.c b/gcc/tree-ssa-strlen.c
index 7a89174..b0b3c6e 100644
--- a/gcc/tree-ssa-strlen.c
+++ b/gcc/tree-ssa-strlen.c
@@ -2255,6 +2255,16 @@ handle_builtin_memcpy (enum built_in_function bcode, gimple_stmt_iterator *gsi)
       full_string_p = clen > nonzero_chars;
     }
 
+  if (!full_string_p
+      && olddsi
+      && tree_int_cst_le (newlen, olddsi->nonzero_chars))
+    {
+      /* The SRC substring being written strictly overlaps
+	 a subsequence of the existing string OLDDSI.  */
+      newlen = olddsi->nonzero_chars;
+      full_string_p = olddsi->full_string_p;
+    }
+
   if (olddsi != NULL && TREE_CODE (len) == SSA_NAME)
     adjust_last_stmt (olddsi, stmt, false);
 
@@ -3087,11 +3097,25 @@ handle_pointer_plus (gimple_stmt_iterator *gsi)
 }
 
 /* If RHS, either directly or indirectly, refers to a string of constant
-   length, return it.  Otherwise return a negative value.  */
+   length, return it.  Otherwise, if it refers to a character constant,
+   return 1 if the constant is non-zero and 0 if it is nul.  Otherwise,
+   return a negative value.  */
 
 static HOST_WIDE_INT
-get_string_cst_length (tree rhs)
+get_min_string_length (tree rhs, bool *full_string_p)
 {
+  if (TREE_CODE (rhs) == INTEGER_CST)
+    {
+      if (integer_zerop (rhs))
+	{
+	  *full_string_p = true;
+	  return 0;
+	}
+
+      *full_string_p = false;
+      return 1;
+    }
+
   if (TREE_CODE (rhs) == MEM_REF
       && integer_zerop (TREE_OPERAND (rhs, 1)))
     {
@@ -3108,9 +3132,11 @@ get_string_cst_length (tree rhs)
 		{
 		  strinfo *si = get_strinfo (idx);
 		  if (si
-		      && si->full_string_p
 		      && tree_fits_shwi_p (si->nonzero_chars))
-		    return tree_to_shwi (si->nonzero_chars);
+		    {
+		      *full_string_p = si->full_string_p;
+		      return tree_to_shwi (si->nonzero_chars);
+		    }
 		}
 	    }
 	}
@@ -3121,12 +3147,17 @@ get_string_cst_length (tree rhs)
     rhs = DECL_INITIAL (rhs);
 
   if (rhs && TREE_CODE (rhs) == STRING_CST)
-    return strlen (TREE_STRING_POINTER (rhs));
+    {
+      *full_string_p = true;
+      return strlen (TREE_STRING_POINTER (rhs));
+    }
 
   return -1;
 }
 
-/* Handle a single character store.  */
+/* Handle a single or multiple character store either by single
+   character assignment or by multi-character assignment from
+   MEM_REF.  */
 
 static bool
 handle_char_store (gimple_stmt_iterator *gsi)
@@ -3164,10 +3195,13 @@ handle_char_store (gimple_stmt_iterator *gsi)
 	si = get_strinfo (idx);
     }
 
-  bool storing_zero_p = initializer_zerop (rhs);
-  bool storing_nonzero_p = (!storing_zero_p
-			    && TREE_CODE (rhs) == INTEGER_CST
-			    && integer_nonzerop (rhs));
+  /* STORING_NONZERO_P is true when not all stored characters are zero.
+     STORING_ALL_ZEROS_P is true when  all stored characters are zero.
+     Both are false when it's impossible to determine which is true.  */
+  bool storing_nonzero_p;
+  bool storing_all_zeros_p = initializer_zerop (rhs, &storing_nonzero_p);
+  bool full_string_p = storing_all_zeros_p;
+
   /* Set to the length of the string being assigned if known.  */
   HOST_WIDE_INT rhslen;
 
@@ -3175,7 +3209,7 @@ handle_char_store (gimple_stmt_iterator *gsi)
     {
       int cmp = compare_nonzero_chars (si, offset);
       gcc_assert (offset == 0 || cmp >= 0);
-      if (storing_zero_p && cmp == 0 && si->full_string_p)
+      if (storing_all_zeros_p && cmp == 0 && si->full_string_p)
 	{
 	  /* When overwriting a '\0' with a '\0', the store can be removed
 	     if we know it has been stored in the current function.  */
@@ -3218,17 +3252,25 @@ handle_char_store (gimple_stmt_iterator *gsi)
 	  gsi_next (gsi);
 	  return false;
 	}
-      else if (storing_zero_p || storing_nonzero_p || (offset != 0 && cmp > 0))
+      else if (storing_all_zeros_p || storing_nonzero_p || (offset != 0 && cmp > 0))
 	{
-	  /* When storing_nonzero_p, we know that the string now starts
-	     with OFFSET + 1 nonzero characters, but don't know whether
-	     there's a following nul terminator.
+	  /* When storing_nonzero_p, we know that the string will start
+	     with at least OFFSET + 1 nonzero characters.  If storing
+	     a single character, set si->NONZERO_CHARS to it.  If
+	     storing multiple characters, try to determine the number
+	     of leading non-zero characters and set si->NONZERO_CHARS
+	     to it instead.
 
-	     When storing_zero_p, we know that the string is now OFFSET
+	     When storing_all_zeros_p, we know that the string is now OFFSET
 	     characters long.
 
 	     Otherwise, we're storing an unknown value at offset OFFSET,
 	     so need to clip the nonzero_chars to OFFSET.  */
+	  bool full_string_p = storing_all_zeros_p;
+	  HOST_WIDE_INT len = (storing_nonzero_p
+			       ? get_min_string_length (rhs, &full_string_p)
+			       : 1);
+
 	  location_t loc = gimple_location (stmt);
 	  tree oldlen = si->nonzero_chars;
 	  if (cmp == 0 && si->full_string_p)
@@ -3238,11 +3280,11 @@ handle_char_store (gimple_stmt_iterator *gsi)
 	    adjust_last_stmt (si, stmt, false);
 	  si = unshare_strinfo (si);
 	  if (storing_nonzero_p)
-	    si->nonzero_chars = build_int_cst (size_type_node, offset + 1);
+	    si->nonzero_chars = build_int_cst (size_type_node, offset + len);
 	  else
 	    si->nonzero_chars = build_int_cst (size_type_node, offset);
-	  si->full_string_p = storing_zero_p;
-	  if (storing_zero_p
+	  si->full_string_p = full_string_p;
+	  if (storing_all_zeros_p
 	      && ssaname
 	      && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ssaname))
 	    si->endptr = ssaname;
@@ -3262,7 +3304,7 @@ handle_char_store (gimple_stmt_iterator *gsi)
 	    si->prev = 0;
 	}
     }
-  else if (idx == 0 && (storing_zero_p || storing_nonzero_p))
+  else if (idx == 0 && (storing_all_zeros_p || storing_nonzero_p))
     {
       if (ssaname)
 	idx = new_stridx (ssaname);
@@ -3271,10 +3313,17 @@ handle_char_store (gimple_stmt_iterator *gsi)
       if (idx != 0)
 	{
 	  tree ptr = (ssaname ? ssaname : build_fold_addr_expr (lhs));
-	  tree len = storing_nonzero_p ? size_one_node : size_zero_node;
-	  si = new_strinfo (ptr, idx, len, storing_zero_p);
+	  HOST_WIDE_INT slen = (storing_all_zeros_p
+				? 0
+				: (storing_nonzero_p
+				   ? get_min_string_length (rhs, &full_string_p)
+				   : -1));
+	  tree len = (slen <= 0
+		      ? size_zero_node
+		      : build_int_cst (size_type_node, slen));
+	  si = new_strinfo (ptr, idx, len, slen >= 0 && full_string_p);
 	  set_strinfo (idx, si);
-	  if (storing_zero_p
+	  if (storing_all_zeros_p
 	      && ssaname
 	      && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ssaname))
 	    si->endptr = ssaname;
@@ -3283,7 +3332,7 @@ handle_char_store (gimple_stmt_iterator *gsi)
 	}
     }
   else if (idx == 0
-	   && (rhslen = get_string_cst_length (gimple_assign_rhs1 (stmt))) >= 0
+	   && (rhslen = get_min_string_length (rhs, &full_string_p)) >= 0
 	   && ssaname == NULL_TREE
 	   && TREE_CODE (TREE_TYPE (lhs)) == ARRAY_TYPE)
     {
@@ -3294,14 +3343,15 @@ handle_char_store (gimple_stmt_iterator *gsi)
 	  if (idx != 0)
 	    {
 	      si = new_strinfo (build_fold_addr_expr (lhs), idx,
-				build_int_cst (size_type_node, rhslen), true);
+				build_int_cst (size_type_node, rhslen),
+				full_string_p);
 	      set_strinfo (idx, si);
 	      si->dont_invalidate = true;
 	    }
 	}
     }
 
-  if (si != NULL && offset == 0 && storing_zero_p)
+  if (si != NULL && offset == 0 && storing_all_zeros_p)
     {
       /* Allow adjust_last_stmt to remove it if the stored '\0'
 	 is immediately overwritten.  */
diff --git a/gcc/tree.c b/gcc/tree.c
index 2c75953..54c7c6a 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -10632,61 +10632,124 @@ vector_cst_elt (const_tree t, unsigned int i)
 }
 
 /* Given an initializer INIT, return TRUE if INIT is zero or some
-   aggregate of zeros.  Otherwise return FALSE.  */
+   aggregate of zeros.  Otherwise return FALSE.  If NONZERO is not
+   null, set *NONZERO if and only if INIT is known not to be all
+   zeros.  The combination of return value of false and *NONZERO
+   false implies that INIT may but need not be all zeros.  Other
+   combinations indicate definitive answers.  */
+
 bool
-initializer_zerop (const_tree init)
+initializer_zerop (const_tree init, bool *nonzero /* = NULL */)
 {
-  tree elt;
+  bool dummy;
+  if (!nonzero)
+    nonzero = &dummy;
+
+  /* Conservatively clear NONZERO and set it only if INIT is definitely
+     not all zero.  */
+  *nonzero = false;
 
   STRIP_NOPS (init);
 
+  unsigned HOST_WIDE_INT off = 0;
+
   switch (TREE_CODE (init))
     {
     case INTEGER_CST:
-      return integer_zerop (init);
+      if (integer_zerop (init))
+	return true;
+
+      *nonzero = true;
+      return false;
 
     case REAL_CST:
       /* ??? Note that this is not correct for C4X float formats.  There,
 	 a bit pattern of all zeros is 1.0; 0.0 is encoded with the most
 	 negative exponent.  */
-      return real_zerop (init)
-	&& ! REAL_VALUE_MINUS_ZERO (TREE_REAL_CST (init));
+      if (real_zerop (init)
+	  && !REAL_VALUE_MINUS_ZERO (TREE_REAL_CST (init)))
+	return true;
+
+      *nonzero = true;
+      return false;
 
     case FIXED_CST:
-      return fixed_zerop (init);
+      if (fixed_zerop (init))
+	return true;
+
+      *nonzero = true;
+      return false;
 
     case COMPLEX_CST:
-      return integer_zerop (init)
-	|| (real_zerop (init)
-	    && ! REAL_VALUE_MINUS_ZERO (TREE_REAL_CST (TREE_REALPART (init)))
-	    && ! REAL_VALUE_MINUS_ZERO (TREE_REAL_CST (TREE_IMAGPART (init))));
+      if (integer_zerop (init)
+	  || (real_zerop (init)
+	      && !REAL_VALUE_MINUS_ZERO (TREE_REAL_CST (TREE_REALPART (init)))
+	      && !REAL_VALUE_MINUS_ZERO (TREE_REAL_CST (TREE_IMAGPART (init)))))
+	return true;
+
+      *nonzero = true;
+      return false;
 
     case VECTOR_CST:
-      return (VECTOR_CST_NPATTERNS (init) == 1
-	      && VECTOR_CST_DUPLICATE_P (init)
-	      && initializer_zerop (VECTOR_CST_ENCODED_ELT (init, 0)));
+      if (VECTOR_CST_NPATTERNS (init) == 1
+	  && VECTOR_CST_DUPLICATE_P (init)
+	  && initializer_zerop (VECTOR_CST_ENCODED_ELT (init, 0)))
+	return true;
+
+      *nonzero = true;
+      return false;
 
     case CONSTRUCTOR:
       {
-	unsigned HOST_WIDE_INT idx;
-
 	if (TREE_CLOBBER_P (init))
 	  return false;
+
+	unsigned HOST_WIDE_INT idx;
+	tree elt;
+
 	FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (init), idx, elt)
-	  if (!initializer_zerop (elt))
+	  if (!initializer_zerop (elt, nonzero))
 	    return false;
+	
 	return true;
       }
 
+    case MEM_REF:
+      {
+	tree arg = TREE_OPERAND (init, 0);
+	if (TREE_CODE (arg) != ADDR_EXPR)
+	  return false;
+	tree offset = TREE_OPERAND (init, 1);
+	if (TREE_CODE (offset) != INTEGER_CST
+	    || !tree_fits_uhwi_p (offset))
+	  return false;
+	off = tree_to_uhwi (offset);
+	if (INT_MAX < off)
+	  return false;
+	arg = TREE_OPERAND (arg, 0);
+	if (TREE_CODE (arg) != STRING_CST)
+	  return false;
+	init = arg;
+      }
+      /* Fall through.  */
+
     case STRING_CST:
       {
-	int i;
+	gcc_assert (off <= INT_MAX);
+
+	int i = off;
+	int n = TREE_STRING_LENGTH (init);
+	if (n <= i)
+	  return false;
 
 	/* We need to loop through all elements to handle cases like
 	   "\0" and "\0foobar".  */
-	for (i = 0; i < TREE_STRING_LENGTH (init); ++i)
+	for (i = 0; i < n; ++i)
 	  if (TREE_STRING_POINTER (init)[i] != '\0')
-	    return false;
+	    {
+	      *nonzero = true;
+	      return false;
+	    }
 
 	return true;
       }
diff --git a/gcc/tree.h b/gcc/tree.h
index ef8bff40..e71be71 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -4452,9 +4452,13 @@ extern int list_length (const_tree);
 extern tree first_field (const_tree);
 
 /* Given an initializer INIT, return TRUE if INIT is zero or some
-   aggregate of zeros.  Otherwise return FALSE.  */
+   aggregate of zeros.  Otherwise return FALSE.  If NONZERO is not
+   null, set *NONZERO if and only if INIT is known not to be all
+   zeros.  The combination of return value of false and *NONZERO
+   false implies that INIT may but need not be all zeros.  Other
+   combinations indicate definitive answers.  */
 
-extern bool initializer_zerop (const_tree);
+extern bool initializer_zerop (const_tree, bool * = NULL);
 
 extern wide_int vector_cst_int_elt (const_tree, unsigned int);
 extern tree vector_cst_elt (const_tree, unsigned int);


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] enhance strlen to understand MEM_REF and partial overlaps (PR 86042, 86043)
  2018-06-07 15:57 [PATCH] enhance strlen to understand MEM_REF and partial overlaps (PR 86042, 86043) Martin Sebor
@ 2018-06-29 17:06 ` Jeff Law
  2018-07-26 16:50   ` Martin Sebor
  0 siblings, 1 reply; 8+ messages in thread
From: Jeff Law @ 2018-06-29 17:06 UTC (permalink / raw)
  To: Martin Sebor, Gcc Patch List

On 06/07/2018 09:57 AM, Martin Sebor wrote:
> The attached patch enhances the strlen pass to more consistently
> deal with MEM_REF assignments (PR 86042) and to track string
> lengths across calls to memcpy that overwrite parts of a string
> with sequences of non-nul characters (PR 86043).
> 
> Fixes for both bugs rely on changes to the same code so I chose
> to include them in the same patch.
> 
> To fix PR 86042 the patch extends handle_char_store() to deal with
> more forms of multi-character assignments from MEM_REF (originally
> introduced in r256180).  To handle assignments from strings of
> multiple nuls the patch also extends the initializer_zerop()
> function to understand MEM_REFs of the form:
> 
>    MEM[(char * {ref-all})&a] = MEM[(char * {ref-all})"..."];
> 
> The solution for PR 86043 consists of two parts: the extension
> above which lets handle_char_store() recognize assignments of
> sequences of non-null characters that overwrite some portion of
> the leading non-zero characters in the destination and avoid
> discarding the destination information, and a similar extension
> to handle_builtin_memcpy().
> 
> Martin
> 
> gcc-86042.diff
> 
> 
> PR tree-optimization/86042 - missing strlen optimization after second strcpy
> 
> gcc/ChangeLog:
> 
> 	PR tree-optimization/86042
> 	* tree-ssa-strlen.c (handle_builtin_memcpy): Handle strict overlaps.
> 	(get_string_cst_length): Rename...
> 	(get_min_string_length): ...to this.  Add argument.
> 	(handle_char_store): Extend to handle multi-character stores by
> 	MEM_REF.
> 	* tree.c (initializer_zerop): Use new argument.  Handle MEM_REF.
> 	* tree.h (initializer_zerop): Add argument.
> 
> gcc/testsuite/ChangeLog:
> 
> 	PR tree-optimization/86042
> 	* gcc.dg/strlenopt-44.c: New test.
OK.

Thanks,
jeff

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] enhance strlen to understand MEM_REF and partial overlaps (PR 86042, 86043)
  2018-06-29 17:06 ` Jeff Law
@ 2018-07-26 16:50   ` Martin Sebor
  2018-07-27  9:05     ` Eric Botcazou
  0 siblings, 1 reply; 8+ messages in thread
From: Martin Sebor @ 2018-07-26 16:50 UTC (permalink / raw)
  To: Jeff Law, Gcc Patch List

On 06/29/2018 11:05 AM, Jeff Law wrote:
> On 06/07/2018 09:57 AM, Martin Sebor wrote:
>> The attached patch enhances the strlen pass to more consistently
>> deal with MEM_REF assignments (PR 86042) and to track string
>> lengths across calls to memcpy that overwrite parts of a string
>> with sequences of non-nul characters (PR 86043).
>>
>> Fixes for both bugs rely on changes to the same code so I chose
>> to include them in the same patch.
>>
>> To fix PR 86042 the patch extends handle_char_store() to deal with
>> more forms of multi-character assignments from MEM_REF (originally
>> introduced in r256180).  To handle assignments from strings of
>> multiple nuls the patch also extends the initializer_zerop()
>> function to understand MEM_REFs of the form:
>>
>>    MEM[(char * {ref-all})&a] = MEM[(char * {ref-all})"..."];
>>
>> The solution for PR 86043 consists of two parts: the extension
>> above which lets handle_char_store() recognize assignments of
>> sequences of non-null characters that overwrite some portion of
>> the leading non-zero characters in the destination and avoid
>> discarding the destination information, and a similar extension
>> to handle_builtin_memcpy().
>>
>> Martin
>>
>> gcc-86042.diff
>>
>>
>> PR tree-optimization/86042 - missing strlen optimization after second strcpy
>>
>> gcc/ChangeLog:
>>
>> 	PR tree-optimization/86042
>> 	* tree-ssa-strlen.c (handle_builtin_memcpy): Handle strict overlaps.
>> 	(get_string_cst_length): Rename...
>> 	(get_min_string_length): ...to this.  Add argument.
>> 	(handle_char_store): Extend to handle multi-character stores by
>> 	MEM_REF.
>> 	* tree.c (initializer_zerop): Use new argument.  Handle MEM_REF.
>> 	* tree.h (initializer_zerop): Add argument.
>>
>> gcc/testsuite/ChangeLog:
>>
>> 	PR tree-optimization/86042
>> 	* gcc.dg/strlenopt-44.c: New test.
> OK.

I missed your approval and didn't get to committing the patch
until today.  While retesting it on top of fresh trunk I noticed
a few test failures due to other recent strlen changes.  I made
adjustments to the patch to avoid most of them and opened bug
86688 for one that I think needs a separate code change and
xfailed the test cases until the bug gets resolved.

Martin

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] enhance strlen to understand MEM_REF and partial overlaps (PR 86042, 86043)
  2018-07-26 16:50   ` Martin Sebor
@ 2018-07-27  9:05     ` Eric Botcazou
  2018-07-27 15:07       ` Martin Sebor
  0 siblings, 1 reply; 8+ messages in thread
From: Eric Botcazou @ 2018-07-27  9:05 UTC (permalink / raw)
  To: gcc-patches, Martin Sebor; +Cc: Jeff Law

> I missed your approval and didn't get to committing the patch
> until today.  While retesting it on top of fresh trunk I noticed
> a few test failures due to other recent strlen changes.  I made
> adjustments to the patch to avoid most of them and opened bug
> 86688 for one that I think needs a separate code change and
> xfailed the test cases until the bug gets resolved.

But it has introduced a couple of regressions in the ACATS testsuite:

FAIL:   c52103c
FAIL:   cde0001

                === acats Summary ===
# of expected passes            2318
# of unexpected failures        2
Native configuration is x86_64-suse-linux-gnu


+===========================GNAT BUG DETECTED==============================+
| 9.0.0 20180727 (experimental) [trunk revision 263028] (x86_64-suse-linux) 
GCC error:|
| in handle_char_store, at tree-ssa-strlen.c:3332                          |
| Error detected around c52103c.adb:43:1                                   |

-- 
Eric Botcazou

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] enhance strlen to understand MEM_REF and partial overlaps (PR 86042, 86043)
  2018-07-27  9:05     ` Eric Botcazou
@ 2018-07-27 15:07       ` Martin Sebor
  2018-07-27 16:17         ` Eric Botcazou
  0 siblings, 1 reply; 8+ messages in thread
From: Martin Sebor @ 2018-07-27 15:07 UTC (permalink / raw)
  To: Eric Botcazou, gcc-patches; +Cc: Jeff Law

On 07/27/2018 03:05 AM, Eric Botcazou wrote:
>> I missed your approval and didn't get to committing the patch
>> until today.  While retesting it on top of fresh trunk I noticed
>> a few test failures due to other recent strlen changes.  I made
>> adjustments to the patch to avoid most of them and opened bug
>> 86688 for one that I think needs a separate code change and
>> xfailed the test cases until the bug gets resolved.
>
> But it has introduced a couple of regressions in the ACATS testsuite:
>
> FAIL:   c52103c
> FAIL:   cde0001
>
>                 === acats Summary ===
> # of expected passes            2318
> # of unexpected failures        2
> Native configuration is x86_64-suse-linux-gnu

Yes, I didn't see these failures, sorry.

FWIW, there are 128 failures in the GCC test suite on x86_64.
Many of these have been there for weeks (e.g., the lto failures
due to PR86004), even years (guality).  My script to compare
the results against a baseline uses the following regular
expression to extract the names of failing (and other
"interesting") tests.  As it happens, the Ada logs have
extra spaces before the test names that makes the script
exclude them from the list:

	    tlist=$(sed -n -e "s/^FAIL: \([^ ]*\) .*/\1/p" \
  			   -e "s/^XFAIL: \([^ ]*\) .*/\1/p" \
  			   -e "s/^UNRESOLVED: \([^ ]*\) .*/\1/p" \
			   -e "s/^XPASS: \([^ ]*\) .*/\1/p" \
			   test_summary.log \
			| sort -u)

Martin

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] enhance strlen to understand MEM_REF and partial overlaps (PR 86042, 86043)
  2018-07-27 15:07       ` Martin Sebor
@ 2018-07-27 16:17         ` Eric Botcazou
  2018-07-27 16:54           ` Martin Sebor
  0 siblings, 1 reply; 8+ messages in thread
From: Eric Botcazou @ 2018-07-27 16:17 UTC (permalink / raw)
  To: Martin Sebor; +Cc: gcc-patches, Jeff Law

> FWIW, there are 128 failures in the GCC test suite on x86_64.
> Many of these have been there for weeks (e.g., the lto failures
> due to PR86004), even years (guality).  My script to compare
> the results against a baseline uses the following regular
> expression to extract the names of failing (and other
> "interesting") tests. 

Why not just use "make mail-report.log" ?

-- 
Eric Botcazou

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] enhance strlen to understand MEM_REF and partial overlaps (PR 86042, 86043)
  2018-07-27 16:17         ` Eric Botcazou
@ 2018-07-27 16:54           ` Martin Sebor
  2018-07-27 21:31             ` Eric Botcazou
  0 siblings, 1 reply; 8+ messages in thread
From: Martin Sebor @ 2018-07-27 16:54 UTC (permalink / raw)
  To: Eric Botcazou; +Cc: gcc-patches, Jeff Law

[-- Attachment #1: Type: text/plain, Size: 1543 bytes --]

On 07/27/2018 10:17 AM, Eric Botcazou wrote:
>> FWIW, there are 128 failures in the GCC test suite on x86_64.
>> Many of these have been there for weeks (e.g., the lto failures
>> due to PR86004), even years (guality).  My script to compare
>> the results against a baseline uses the following regular
>> expression to extract the names of failing (and other
>> "interesting") tests.
>
> Why not just use "make mail-report.log" ?

My script downloads the latest result posted to gcc-testresults
for the target I'm building for, iterates over the interesting
test results in my build logs, compares them to the baseline,
and prints a condensed report like the one in the attachment.
This helps me decide what failures to look into and which ones
are probably safe to ignore.  The tests marked with '!' are
regressions in my run vs the baseline.  I only look at those,
and only those I don't recognize (which admittedly is error-prone).

The others are either the same failures or improvements.
(The numbers in parentheses show the difference between the number
of times the test was seen to fail in the baseline vs in my run.)

What I was trying to highlight is that rolling my own solution like
this makes missing regressions more likely than having a shared
solution would.  It's a separate issue that some of these failures
continue to persist despite having the appearance of regressions
the LTO failures), and another problem are the failures in
the guality tests.  These also make it more likely that
a regression will be missed.

Martin

[-- Attachment #2: gcc-86042-summary.log --]
[-- Type: text/x-log, Size: 2281 bytes --]

!  FAIL: gcc.dg/attr-nonstring-2.c (4: +4)
   FAIL: gcc.dg/guality/pr54200.c (1)
   FAIL: gcc.dg/guality/pr54519-1.c (4: -12)
   FAIL: gcc.dg/guality/pr54519-2.c (2: -2)
   FAIL: gcc.dg/guality/pr54519-3.c (8: -8)
   FAIL: gcc.dg/guality/pr54519-4.c (2: -2)
   FAIL: gcc.dg/guality/pr54519-5.c (2: -2)
   FAIL: gcc.dg/guality/pr54970.c (29: -29)
   FAIL: gcc.dg/guality/pr56154-1.c (1: -1)
   FAIL: gcc.dg/guality/pr59776.c (8: -8)
   FAIL: gcc.dg/guality/pr68860-1.c (24: -24)
   FAIL: gcc.dg/guality/sra-1.c (6: -6)
   FAIL: gcc.dg/guality/vla-1.c (8: -8)
   FAIL: g++.dg/guality/pr55665.C (1: -1)
!  FAIL: g++.dg/lto/20091002-1 (1: +1)
!  FAIL: g++.dg/lto/pr64043 (1: +1)
!  FAIL: g++.dg/lto/pr65193 (1: +1)
!  FAIL: g++.dg/lto/pr65302 (1: +1)
!  FAIL: g++.dg/lto/pr65316 (1: +1)
!  FAIL: g++.dg/lto/pr65549 (2: +2)
!  FAIL: g++.dg/lto/pr66180 (1: +1)
!  FAIL: g++.dg/lto/pr66705 (1: +1)
!  FAIL: g++.dg/lto/pr68057 (4: +4)
!  FAIL: g++.dg/lto/pr69077 (1: +1)
!  FAIL: g++.dg/lto/pr69133 (1: +1)
!  FAIL: g++.dg/lto/pr69137 (1: +1)
!  FAIL: g++.dg/lto/pr79000 (1: +1)
!  FAIL: g++.dg/lto/pr81940 (1: +1)
!  FAIL: g++.dg/lto/pr85176 (1: +1)
   FAIL: g++.dg/pr80481.C (3)
   FAIL: g++.dg/pr83239.C (2)
!  FAIL: gfortran.dg/derived_constructor_comps_5.f90 (6: +6)
!  FAIL: gfortran.dg/lto/pr79108 (1: +1)
!  FAIL: ./index0-out.go (1: +1)
! UNRESOLVED: gcc.dg/tree-prof/crossmodule-indircall-1a.c (1: +1)
! UNRESOLVED: gcc.dg/tree-prof/pr59003.c (1: +1)
! UNRESOLVED: gcc.dg/tree-prof/wcoverage-mismatch.c (2: +2)
! UNRESOLVED: g++.dg/bprob/g++-bprob-2.C (2: +2)
! UNRESOLVED: gfortran.dg/derived_constructor_comps_5.f90 (3: +3)
  UNRESOLVED: libgomp.oacc-c++/non-scalar-data.C (1: -1)
  XPASS: gcc.dg/guality/example.c (3: -7)
  XPASS: gcc.dg/guality/guality.c (8: -8)
  XPASS: gcc.dg/guality/inline-params.c (5: -5)
  XPASS: gcc.dg/guality/pr41353-1.c (2: -3)
  XPASS: gcc.dg/guality/pr54970.c (5: -5)
warnings found:
 360   Wabi
   7   Wcast-function-type
   4   Wcpp
   6   Wdeprecated-copy
   5   Wformat-truncation=
   8   Wimplicit-fallthrough=
  14   Wincompatible-pointer-types
  49   Wmaybe-uninitialized
  43   Wmisleading-indentation
  12   Wnonnull
   8   Wsign-compare
   4   Wstringop-truncation
  19   Wuninitialized
   2   Wunused-function
  20   Wunused-parameter

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] enhance strlen to understand MEM_REF and partial overlaps (PR 86042, 86043)
  2018-07-27 16:54           ` Martin Sebor
@ 2018-07-27 21:31             ` Eric Botcazou
  0 siblings, 0 replies; 8+ messages in thread
From: Eric Botcazou @ 2018-07-27 21:31 UTC (permalink / raw)
  To: Martin Sebor; +Cc: gcc-patches, Jeff Law

> What I was trying to highlight is that rolling my own solution like
> this makes missing regressions more likely than having a shared
> solution would.

But 'make mail-report.log' is precisely the shared solution, no need to 
reinvent the wheel here.

-- 
Eric Botcazou

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2018-07-27 21:31 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-06-07 15:57 [PATCH] enhance strlen to understand MEM_REF and partial overlaps (PR 86042, 86043) Martin Sebor
2018-06-29 17:06 ` Jeff Law
2018-07-26 16:50   ` Martin Sebor
2018-07-27  9:05     ` Eric Botcazou
2018-07-27 15:07       ` Martin Sebor
2018-07-27 16:17         ` Eric Botcazou
2018-07-27 16:54           ` Martin Sebor
2018-07-27 21:31             ` Eric Botcazou

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).