public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] convert braced initializers to strings (PR 71625)
@ 2018-07-30 23:51 Martin Sebor
  2018-07-31 13:39 ` Jason Merrill
  2018-08-06 16:41 ` Martin Sebor
  0 siblings, 2 replies; 31+ messages in thread
From: Martin Sebor @ 2018-07-30 23:51 UTC (permalink / raw)
  To: Gcc Patch List; +Cc: Jason Merrill, Joseph Myers

[-- Attachment #1: Type: text/plain, Size: 946 bytes --]

The middle-end contains code to determine the lengths of constant
character arrays initialized by string literals.  The code is used
in a number of optimizations and warnings.

However, the code is unable to deal with constant arrays initialized
using the braced initializer syntax, as in

   const char a[] = { '1', '2', '\0' };

The attached patch extends the C and C++ front-ends to convert such
initializers into a STRING_CST form.

The goal of this work is to both enable existing optimizations for
such arrays, and to help detect bugs due to using non-nul terminated
arrays where nul-terminated strings are expected.  The latter is
an extension of the GCC 8 _Wstringop-overflow and
-Wstringop-truncation warnings that help detect or prevent reading
past the end of dynamically created character arrays.  Future work
includes detecting potential past-the-end reads from uninitialized
local character arrays.

Tested on x86_64-linux.

Martin

[-- Attachment #2: gcc-71625.diff --]
[-- Type: text/x-patch, Size: 10206 bytes --]

PR tree-optimization/71625 - missing strlen optimization on different array initialization style

gcc/c/ChangeLog:

	PR tree-optimization/71625
	* c-parser.c (c_parser_declaration_or_fndef): Call
	convert_braced_list_to_string.

gcc/c-family/ChangeLog:

	PR tree-optimization/71625
	* c-common.c (convert_braced_list_to_string): New function.
	* c-common.h (convert_braced_list_to_string): Declare it.

gcc/cp/ChangeLog:

	PR tree-optimization/71625
	* parser.c (cp_parser_init_declarator):  Call
	convert_braced_list_to_string.

gcc/testsuite/ChangeLog:

	PR tree-optimization/71625
	* g++.dg/init/string2.C: New test.
	* g++.dg/init/string3.C: New test.
	* gcc.dg/strlenopt-55.c: New test.

diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
index 422d668..9a93175 100644
--- a/gcc/c-family/c-common.c
+++ b/gcc/c-family/c-common.c
@@ -8345,4 +8345,72 @@ maybe_add_include_fixit (rich_location *richloc, const char *header)
   free (text);
 }
 
+/* Attempt to convert a braced array initializer list CTOR into
+   a STRING_CST for convenience and efficiency.  When non-null,
+   use EVAL to attempt to evalue constants (used by C++).
+   MAXELTS gives the maximum number of elements to accept.
+   Return the converted string on success or null on failure.  */
+
+tree
+convert_braced_list_to_string (tree ctor, tree (*eval)(tree),
+			       unsigned HOST_WIDE_INT maxelts)
+{
+  unsigned HOST_WIDE_INT nelts = CONSTRUCTOR_NELTS (ctor);
+
+  auto_vec<char> str;
+  str.reserve (nelts + 1);
+
+  unsigned HOST_WIDE_INT i;
+  tree index, value;
+
+  FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (ctor), i, index, value)
+    {
+      unsigned HOST_WIDE_INT idx = index ? tree_to_uhwi (index) : i;
+
+      /* auto_vec is limited to UINT_MAX elements.  */
+      if (idx > UINT_MAX)
+	return NULL_TREE;
+
+      /* Attempt to evaluate constants.  */
+      if (eval)
+	value = eval (value);
+
+      /* Avoid non-constant initializers.  */
+     if (!tree_fits_uhwi_p (value))
+	return NULL_TREE;
+
+      /* Skip over embedded nuls.  */
+      unsigned val = tree_to_uhwi (value);
+      if (!val)
+	continue;
+
+      /* Bail if the CTOR has a block of more than 256 embedded nuls
+	 due to implicitly initialized elements.  */
+      unsigned nelts = (idx - str.length ()) + 1;
+      if (nelts > 256)
+	return NULL_TREE;
+
+      if (nelts > 1)
+	{
+	  str.reserve (idx);
+	  str.quick_grow_cleared (idx);
+	}
+
+      if (idx > maxelts)
+	return NULL_TREE;
+
+      str.safe_insert (idx, val);
+    }
+
+  /* Append a nul for the empty initializer { } and for the last
+     explicit initializer in the loop above that is a nul.  */
+  if (!nelts || str.length () < i)
+    str.safe_push (0);
+
+  /* Build a string literal but return the embedded STRING_CST.  */
+  tree res = build_string_literal (str.length (), str.begin ());
+  res = TREE_OPERAND (TREE_OPERAND (res, 0), 0);
+  return res;
+}
+
 #include "gt-c-family-c-common.h"
diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h
index fcec95b..343a1ae 100644
--- a/gcc/c-family/c-common.h
+++ b/gcc/c-family/c-common.h
@@ -1331,6 +1331,8 @@ extern void maybe_add_include_fixit (rich_location *, const char *);
 extern void maybe_suggest_missing_token_insertion (rich_location *richloc,
 						   enum cpp_ttype token_type,
 						   location_t prev_token_loc);
+extern tree convert_braced_list_to_string (tree, tree (*)(tree) = NULL,
+					   unsigned HOST_WIDE_INT = -1);
 
 #if CHECKING_P
 namespace selftest {
diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c
index 7a92628..e12d270 100644
--- a/gcc/c/c-parser.c
+++ b/gcc/c/c-parser.c
@@ -2126,6 +2126,23 @@ c_parser_declaration_or_fndef (c_parser *parser, bool fndef_ok,
 	      if (d != error_mark_node)
 		{
 		  maybe_warn_string_init (init_loc, TREE_TYPE (d), init);
+
+		  /* Convert a string CONSTRUCTOR into a STRING_CST.  */
+		  tree valtype = TREE_TYPE (init.value);
+		  if (TREE_CODE (init.value) == CONSTRUCTOR
+		      && TREE_CODE (valtype) == ARRAY_TYPE)
+		    {
+		      valtype = TREE_TYPE (valtype);
+		      if (TYPE_STRING_FLAG (valtype))
+			if (tree str
+			    = convert_braced_list_to_string (init.value))
+			  {
+			    /* Replace the initializer with the string
+			       constant.  The resu*/
+			    init.value = str;
+			  }
+		    }
+
 		  finish_decl (d, init_loc, init.value,
 			       init.original_type, asm_name);
 		}
diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index d44a6b8..c35c2f1 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -19825,6 +19825,32 @@ cp_parser_init_declarator (cp_parser* parser,
 	    finish_lambda_scope ();
 	  if (initializer == error_mark_node)
 	    cp_parser_skip_to_end_of_statement (parser);
+	  else if (decl)
+	    {
+	      tree valtype = TREE_TYPE (decl);
+	      if (TREE_CODE (valtype) == ARRAY_TYPE
+		  && TYPE_STRING_FLAG (TREE_TYPE (valtype))
+		  && TYPE_MAIN_VARIANT (TREE_TYPE (valtype)) == char_type_node)
+		{
+		  /* If the array has an explicit bound, use it to
+		     constrain the size of the string.  */
+		  unsigned HOST_WIDE_INT maxelts = HOST_WIDE_INT_M1U;
+		  if (tree nelts = DECL_SIZE_UNIT (decl))
+		    if (tree_fits_uhwi_p (nelts))
+		      maxelts = tree_to_uhwi (nelts);
+
+		  /* Convert a string CONSTRUCTOR into a STRING_CST.  */
+		  if (TREE_CODE (initializer) == CONSTRUCTOR
+		      && TREE_TYPE (initializer) == init_list_type_node)
+		    {
+		      if (tree str
+			  = convert_braced_list_to_string (initializer,
+						   scalar_constant_value,
+						   maxelts))
+			initializer = str;
+		    }
+		}
+	    }
 	}
     }
 
diff --git a/gcc/testsuite/g++.dg/init/string2.C b/gcc/testsuite/g++.dg/init/string2.C
new file mode 100644
index 0000000..acb2f5b
--- /dev/null
+++ b/gcc/testsuite/g++.dg/init/string2.C
@@ -0,0 +1,49 @@
+// PR tree-optimization/71625 - missing strlen optimization on different
+// array initialization style
+//
+// Verify that strlen() calls with constant character array arguments
+// initialized with string constants are folded.  (This is a small
+// subset of pr63989).
+// { dg-do compile }
+// { dg-options "-O0 -fdump-tree-gimple" }
+
+const char a0[] = { 'a', 'b', 'c', '\0' };
+
+int len0 ()
+{
+  return __builtin_strlen (a0);
+}
+
+const char c = 0;
+const char a1[] = { 'a', 'b', 'c', c };
+
+int len1 ()
+{
+  return __builtin_strlen (a1);
+}
+
+#if 0
+
+// The following aren't handled.
+
+const char &cref = c;
+const char a2[] = { 'a', 'b', 'c', cref };
+
+int len2 ()
+{
+  return __builtin_strlen (a2);
+}
+
+
+const char* const cptr = &cref;
+const char a3[] = { 'a', 'b', 'c', *cptr };
+
+int len3 ()
+{
+  return __builtin_strlen (a3);
+}
+
+#endif
+
+// { dg-final { scan-tree-dump-times "strlen" 0 "gimple" } }
diff --git a/gcc/testsuite/g++.dg/init/string3.C b/gcc/testsuite/g++.dg/init/string3.C
new file mode 100644
index 0000000..f7c7f2f
--- /dev/null
+++ b/gcc/testsuite/g++.dg/init/string3.C
@@ -0,0 +1,37 @@
+// PR tree-optimization/71625 - missing strlen optimization on different
+// array initialization style
+//
+// Verify that strlen() call with a constant character array argument
+// initialized with non-constant elements isn't folded.  (This is a small
+// subset of pr63989).
+//
+// { dg-do compile }
+// { dg-options "-O2 -fdump-tree-optimized" }
+
+
+extern const char c;
+const char a0[] = { 'a', 'b', 'c', c };
+
+int len0 ()
+{
+  return __builtin_strlen (a0);
+}
+
+const char &ref = c;
+const char a1[] = { 'a', 'b', 'c', ref };
+
+int len1 ()
+{
+  return __builtin_strlen (a1);
+}
+
+const char* const ptr = &c;
+const char a2[] = { 'a', 'b', 'c', *ptr };
+
+int len2 ()
+{
+  return __builtin_strlen (a2);
+}
+
+// { dg-final { scan-tree-dump-times "strlen" 3 "optimized" } }
diff --git a/gcc/testsuite/gcc.dg/strlenopt-55.c b/gcc/testsuite/gcc.dg/strlenopt-55.c
new file mode 100644
index 0000000..68c7f1d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/strlenopt-55.c
@@ -0,0 +1,83 @@
+/* PR tree-optimization/71625 - missing strlen optimization on different
+   array initialization style
+
+   Verify that strlen() of braced initialized array is folded
+   { dg-do compile }
+   { dg-options "-O0 -Wall -fdump-tree-gimple -fdump-tree-optimized" } */
+
+#include "strlenopt.h"
+
+const char a2_implicit[2] = { };
+const char a3_implicit[3] = { };
+
+const char a3_nul[3] = { 0 };
+const char a5_nul1[3] = { [1] = 0 };
+const char a7_nul2[3] = { [2] = 0 };
+
+const char ax_2_nul[] = { '1', '2', '\0' };
+const char ax_3_nul[] = { '1', '2', '3', '\0' };
+
+const char ax_3_des_nul[] = { [3] = 0, [2] = '3', [1] = '2', [0] = '1' };
+
+const char ax_3[] = { '1', '2', '3' };
+const char a3_3[3] = { '1', '2', '3' };
+
+const char a100_3[] = { '1', '2', '3', [100] = '\0' };
+
+#define CONCAT(x, y) x ## y
+#define CAT(x, y) CONCAT (x, y)
+#define FAILNAME(name) CAT (call_ ## name ##_on_line_, __LINE__)
+
+#define FAIL(name) do {				\
+    extern void FAILNAME (name) (void);		\
+    FAILNAME (name)();				\
+  } while (0)
+
+/* Macro to emit a call to funcation named
+   call_in_true_branch_not_eliminated_on_line_NNN()
+   for each call that's expected to be eliminated.  The dg-final
+   scan-tree-dump-time directive at the bottom of the test verifies
+   that no such call appears in output.  */
+#define ELIM(expr)							\
+  if (!(expr)) FAIL (in_true_branch_not_eliminated); else (void)0
+
+#define T(s, n) ELIM (strlen (s) == n)
+
+void test_nulstring (void)
+{
+  T (a2_implicit, 0);
+  T (a3_implicit, 0);
+
+  T (a3_nul, 0);
+  T (a5_nul1, 0);
+  T (a7_nul2, 0);
+
+  T (ax_2_nul, 2);
+  T (ax_3_nul, 3);
+  T (ax_3_des_nul, 3);
+
+  T (a100_3, 3);
+}
+
+/* Verify that excessively large initializers don't run out of
+   memory.  */
+
+const char large_string[] = { 'a', [__INT_MAX__] = '\0' };
+
+const int test_large_string (void)
+{
+  return large_string[0] + large_string[__INT_MAX__ - 1];
+}
+
+
+const char very_large_string[] = { 'a', [__LONG_MAX__ / 2] = '\0' };
+
+const int test_very_large_string (void)
+{
+  return very_large_string[0] + very_large_string[__LONG_MAX__ / 2 - 1];
+}
+
+
+/* { dg-final { scan-tree-dump-times "strlen" 0 "gimple" } }
+   { dg-final { scan-tree-dump-times "call_in_true_branch_not_eliminated" 0 "optimized" } } */

^ permalink raw reply	[flat|nested] 31+ messages in thread

end of thread, other threads:[~2018-08-17 17:22 UTC | newest]

Thread overview: 31+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-07-30 23:51 [PATCH] convert braced initializers to strings (PR 71625) Martin Sebor
2018-07-31 13:39 ` Jason Merrill
2018-07-31 14:49   ` Martin Sebor
2018-08-07  8:58     ` Jason Merrill
2018-08-07 23:04       ` Martin Sebor
2018-08-08 11:09         ` Jason Merrill
2018-08-09  0:17           ` Martin Sebor
2018-08-09 22:07             ` Joseph Myers
2018-08-13 10:36             ` Jason Merrill
2018-08-14 13:27             ` James Greenhalgh
2018-08-14 15:08               ` Martin Sebor
2018-08-14 15:24                 ` Martin Sebor
2018-08-15  2:34                   ` Martin Sebor
2018-08-15 10:29                     ` James Greenhalgh
2018-08-15 15:04                       ` Richard Biener
2018-08-15 15:51                       ` Martin Sebor
2018-08-17 17:19                         ` [PATCH] Fix poly types after PR tree-optimization/71625 strlen optimization Szabolcs Nagy
2018-08-17 17:22                           ` Kyrill Tkachov
2018-08-14 21:14               ` [PATCH] convert braced initializers to strings (PR 71625) Joseph Myers
2018-08-14 22:18                 ` Martin Sebor
2018-08-15 12:07                   ` Joseph Myers
2018-08-15 21:02                     ` Martin Sebor
2018-08-15 21:14                       ` Jeff Law
2018-08-15 21:34                       ` Jeff Law
2018-08-16 15:23                         ` Martin Sebor
2018-08-16 15:32                           ` Jeff Law
2018-08-06 16:41 ` Martin Sebor
2018-08-06 17:04   ` Joseph Myers
2018-08-07  2:02     ` Martin Sebor
2018-08-07 11:31       ` Joseph Myers
2018-08-07 23:05         ` Martin Sebor

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).