public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] convert braced initializers to strings (PR 71625)
@ 2018-07-30 23:51 Martin Sebor
  2018-07-31 13:39 ` Jason Merrill
  2018-08-06 16:41 ` Martin Sebor
  0 siblings, 2 replies; 45+ messages in thread
From: Martin Sebor @ 2018-07-30 23:51 UTC (permalink / raw)
  To: Gcc Patch List; +Cc: Jason Merrill, Joseph Myers

[-- Attachment #1: Type: text/plain, Size: 946 bytes --]

The middle-end contains code to determine the lengths of constant
character arrays initialized by string literals.  The code is used
in a number of optimizations and warnings.

However, the code is unable to deal with constant arrays initialized
using the braced initializer syntax, as in

   const char a[] = { '1', '2', '\0' };

The attached patch extends the C and C++ front-ends to convert such
initializers into a STRING_CST form.

The goal of this work is to both enable existing optimizations for
such arrays, and to help detect bugs due to using non-nul terminated
arrays where nul-terminated strings are expected.  The latter is
an extension of the GCC 8 _Wstringop-overflow and
-Wstringop-truncation warnings that help detect or prevent reading
past the end of dynamically created character arrays.  Future work
includes detecting potential past-the-end reads from uninitialized
local character arrays.

Tested on x86_64-linux.

Martin

[-- Attachment #2: gcc-71625.diff --]
[-- Type: text/x-patch, Size: 10206 bytes --]

PR tree-optimization/71625 - missing strlen optimization on different array initialization style

gcc/c/ChangeLog:

	PR tree-optimization/71625
	* c-parser.c (c_parser_declaration_or_fndef): Call
	convert_braced_list_to_string.

gcc/c-family/ChangeLog:

	PR tree-optimization/71625
	* c-common.c (convert_braced_list_to_string): New function.
	* c-common.h (convert_braced_list_to_string): Declare it.

gcc/cp/ChangeLog:

	PR tree-optimization/71625
	* parser.c (cp_parser_init_declarator):  Call
	convert_braced_list_to_string.

gcc/testsuite/ChangeLog:

	PR tree-optimization/71625
	* g++.dg/init/string2.C: New test.
	* g++.dg/init/string3.C: New test.
	* gcc.dg/strlenopt-55.c: New test.

diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
index 422d668..9a93175 100644
--- a/gcc/c-family/c-common.c
+++ b/gcc/c-family/c-common.c
@@ -8345,4 +8345,72 @@ maybe_add_include_fixit (rich_location *richloc, const char *header)
   free (text);
 }
 
+/* Attempt to convert a braced array initializer list CTOR into
+   a STRING_CST for convenience and efficiency.  When non-null,
+   use EVAL to attempt to evalue constants (used by C++).
+   MAXELTS gives the maximum number of elements to accept.
+   Return the converted string on success or null on failure.  */
+
+tree
+convert_braced_list_to_string (tree ctor, tree (*eval)(tree),
+			       unsigned HOST_WIDE_INT maxelts)
+{
+  unsigned HOST_WIDE_INT nelts = CONSTRUCTOR_NELTS (ctor);
+
+  auto_vec<char> str;
+  str.reserve (nelts + 1);
+
+  unsigned HOST_WIDE_INT i;
+  tree index, value;
+
+  FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (ctor), i, index, value)
+    {
+      unsigned HOST_WIDE_INT idx = index ? tree_to_uhwi (index) : i;
+
+      /* auto_vec is limited to UINT_MAX elements.  */
+      if (idx > UINT_MAX)
+	return NULL_TREE;
+
+      /* Attempt to evaluate constants.  */
+      if (eval)
+	value = eval (value);
+
+      /* Avoid non-constant initializers.  */
+     if (!tree_fits_uhwi_p (value))
+	return NULL_TREE;
+
+      /* Skip over embedded nuls.  */
+      unsigned val = tree_to_uhwi (value);
+      if (!val)
+	continue;
+
+      /* Bail if the CTOR has a block of more than 256 embedded nuls
+	 due to implicitly initialized elements.  */
+      unsigned nelts = (idx - str.length ()) + 1;
+      if (nelts > 256)
+	return NULL_TREE;
+
+      if (nelts > 1)
+	{
+	  str.reserve (idx);
+	  str.quick_grow_cleared (idx);
+	}
+
+      if (idx > maxelts)
+	return NULL_TREE;
+
+      str.safe_insert (idx, val);
+    }
+
+  /* Append a nul for the empty initializer { } and for the last
+     explicit initializer in the loop above that is a nul.  */
+  if (!nelts || str.length () < i)
+    str.safe_push (0);
+
+  /* Build a string literal but return the embedded STRING_CST.  */
+  tree res = build_string_literal (str.length (), str.begin ());
+  res = TREE_OPERAND (TREE_OPERAND (res, 0), 0);
+  return res;
+}
+
 #include "gt-c-family-c-common.h"
diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h
index fcec95b..343a1ae 100644
--- a/gcc/c-family/c-common.h
+++ b/gcc/c-family/c-common.h
@@ -1331,6 +1331,8 @@ extern void maybe_add_include_fixit (rich_location *, const char *);
 extern void maybe_suggest_missing_token_insertion (rich_location *richloc,
 						   enum cpp_ttype token_type,
 						   location_t prev_token_loc);
+extern tree convert_braced_list_to_string (tree, tree (*)(tree) = NULL,
+					   unsigned HOST_WIDE_INT = -1);
 
 #if CHECKING_P
 namespace selftest {
diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c
index 7a92628..e12d270 100644
--- a/gcc/c/c-parser.c
+++ b/gcc/c/c-parser.c
@@ -2126,6 +2126,23 @@ c_parser_declaration_or_fndef (c_parser *parser, bool fndef_ok,
 	      if (d != error_mark_node)
 		{
 		  maybe_warn_string_init (init_loc, TREE_TYPE (d), init);
+
+		  /* Convert a string CONSTRUCTOR into a STRING_CST.  */
+		  tree valtype = TREE_TYPE (init.value);
+		  if (TREE_CODE (init.value) == CONSTRUCTOR
+		      && TREE_CODE (valtype) == ARRAY_TYPE)
+		    {
+		      valtype = TREE_TYPE (valtype);
+		      if (TYPE_STRING_FLAG (valtype))
+			if (tree str
+			    = convert_braced_list_to_string (init.value))
+			  {
+			    /* Replace the initializer with the string
+			       constant.  The resu*/
+			    init.value = str;
+			  }
+		    }
+
 		  finish_decl (d, init_loc, init.value,
 			       init.original_type, asm_name);
 		}
diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index d44a6b8..c35c2f1 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -19825,6 +19825,32 @@ cp_parser_init_declarator (cp_parser* parser,
 	    finish_lambda_scope ();
 	  if (initializer == error_mark_node)
 	    cp_parser_skip_to_end_of_statement (parser);
+	  else if (decl)
+	    {
+	      tree valtype = TREE_TYPE (decl);
+	      if (TREE_CODE (valtype) == ARRAY_TYPE
+		  && TYPE_STRING_FLAG (TREE_TYPE (valtype))
+		  && TYPE_MAIN_VARIANT (TREE_TYPE (valtype)) == char_type_node)
+		{
+		  /* If the array has an explicit bound, use it to
+		     constrain the size of the string.  */
+		  unsigned HOST_WIDE_INT maxelts = HOST_WIDE_INT_M1U;
+		  if (tree nelts = DECL_SIZE_UNIT (decl))
+		    if (tree_fits_uhwi_p (nelts))
+		      maxelts = tree_to_uhwi (nelts);
+
+		  /* Convert a string CONSTRUCTOR into a STRING_CST.  */
+		  if (TREE_CODE (initializer) == CONSTRUCTOR
+		      && TREE_TYPE (initializer) == init_list_type_node)
+		    {
+		      if (tree str
+			  = convert_braced_list_to_string (initializer,
+						   scalar_constant_value,
+						   maxelts))
+			initializer = str;
+		    }
+		}
+	    }
 	}
     }
 
diff --git a/gcc/testsuite/g++.dg/init/string2.C b/gcc/testsuite/g++.dg/init/string2.C
new file mode 100644
index 0000000..acb2f5b
--- /dev/null
+++ b/gcc/testsuite/g++.dg/init/string2.C
@@ -0,0 +1,49 @@
+// PR tree-optimization/71625 - missing strlen optimization on different
+// array initialization style
+//
+// Verify that strlen() calls with constant character array arguments
+// initialized with string constants are folded.  (This is a small
+// subset of pr63989).
+// { dg-do compile }
+// { dg-options "-O0 -fdump-tree-gimple" }
+
+const char a0[] = { 'a', 'b', 'c', '\0' };
+
+int len0 ()
+{
+  return __builtin_strlen (a0);
+}
+
+const char c = 0;
+const char a1[] = { 'a', 'b', 'c', c };
+
+int len1 ()
+{
+  return __builtin_strlen (a1);
+}
+
+#if 0
+
+// The following aren't handled.
+
+const char &cref = c;
+const char a2[] = { 'a', 'b', 'c', cref };
+
+int len2 ()
+{
+  return __builtin_strlen (a2);
+}
+
+
+const char* const cptr = &cref;
+const char a3[] = { 'a', 'b', 'c', *cptr };
+
+int len3 ()
+{
+  return __builtin_strlen (a3);
+}
+
+#endif
+
+// { dg-final { scan-tree-dump-times "strlen" 0 "gimple" } }
diff --git a/gcc/testsuite/g++.dg/init/string3.C b/gcc/testsuite/g++.dg/init/string3.C
new file mode 100644
index 0000000..f7c7f2f
--- /dev/null
+++ b/gcc/testsuite/g++.dg/init/string3.C
@@ -0,0 +1,37 @@
+// PR tree-optimization/71625 - missing strlen optimization on different
+// array initialization style
+//
+// Verify that strlen() call with a constant character array argument
+// initialized with non-constant elements isn't folded.  (This is a small
+// subset of pr63989).
+//
+// { dg-do compile }
+// { dg-options "-O2 -fdump-tree-optimized" }
+
+
+extern const char c;
+const char a0[] = { 'a', 'b', 'c', c };
+
+int len0 ()
+{
+  return __builtin_strlen (a0);
+}
+
+const char &ref = c;
+const char a1[] = { 'a', 'b', 'c', ref };
+
+int len1 ()
+{
+  return __builtin_strlen (a1);
+}
+
+const char* const ptr = &c;
+const char a2[] = { 'a', 'b', 'c', *ptr };
+
+int len2 ()
+{
+  return __builtin_strlen (a2);
+}
+
+// { dg-final { scan-tree-dump-times "strlen" 3 "optimized" } }
diff --git a/gcc/testsuite/gcc.dg/strlenopt-55.c b/gcc/testsuite/gcc.dg/strlenopt-55.c
new file mode 100644
index 0000000..68c7f1d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/strlenopt-55.c
@@ -0,0 +1,83 @@
+/* PR tree-optimization/71625 - missing strlen optimization on different
+   array initialization style
+
+   Verify that strlen() of braced initialized array is folded
+   { dg-do compile }
+   { dg-options "-O0 -Wall -fdump-tree-gimple -fdump-tree-optimized" } */
+
+#include "strlenopt.h"
+
+const char a2_implicit[2] = { };
+const char a3_implicit[3] = { };
+
+const char a3_nul[3] = { 0 };
+const char a5_nul1[3] = { [1] = 0 };
+const char a7_nul2[3] = { [2] = 0 };
+
+const char ax_2_nul[] = { '1', '2', '\0' };
+const char ax_3_nul[] = { '1', '2', '3', '\0' };
+
+const char ax_3_des_nul[] = { [3] = 0, [2] = '3', [1] = '2', [0] = '1' };
+
+const char ax_3[] = { '1', '2', '3' };
+const char a3_3[3] = { '1', '2', '3' };
+
+const char a100_3[] = { '1', '2', '3', [100] = '\0' };
+
+#define CONCAT(x, y) x ## y
+#define CAT(x, y) CONCAT (x, y)
+#define FAILNAME(name) CAT (call_ ## name ##_on_line_, __LINE__)
+
+#define FAIL(name) do {				\
+    extern void FAILNAME (name) (void);		\
+    FAILNAME (name)();				\
+  } while (0)
+
+/* Macro to emit a call to funcation named
+   call_in_true_branch_not_eliminated_on_line_NNN()
+   for each call that's expected to be eliminated.  The dg-final
+   scan-tree-dump-time directive at the bottom of the test verifies
+   that no such call appears in output.  */
+#define ELIM(expr)							\
+  if (!(expr)) FAIL (in_true_branch_not_eliminated); else (void)0
+
+#define T(s, n) ELIM (strlen (s) == n)
+
+void test_nulstring (void)
+{
+  T (a2_implicit, 0);
+  T (a3_implicit, 0);
+
+  T (a3_nul, 0);
+  T (a5_nul1, 0);
+  T (a7_nul2, 0);
+
+  T (ax_2_nul, 2);
+  T (ax_3_nul, 3);
+  T (ax_3_des_nul, 3);
+
+  T (a100_3, 3);
+}
+
+/* Verify that excessively large initializers don't run out of
+   memory.  */
+
+const char large_string[] = { 'a', [__INT_MAX__] = '\0' };
+
+const int test_large_string (void)
+{
+  return large_string[0] + large_string[__INT_MAX__ - 1];
+}
+
+
+const char very_large_string[] = { 'a', [__LONG_MAX__ / 2] = '\0' };
+
+const int test_very_large_string (void)
+{
+  return very_large_string[0] + very_large_string[__LONG_MAX__ / 2 - 1];
+}
+
+
+/* { dg-final { scan-tree-dump-times "strlen" 0 "gimple" } }
+   { dg-final { scan-tree-dump-times "call_in_true_branch_not_eliminated" 0 "optimized" } } */

^ permalink raw reply	[flat|nested] 45+ messages in thread
* Re: [PATCH] convert braced initializers to strings (PR 71625)
@ 2018-08-08 17:33 Bernd Edlinger
  2018-08-08 18:34 ` Bernd Edlinger
  0 siblings, 1 reply; 45+ messages in thread
From: Bernd Edlinger @ 2018-08-08 17:33 UTC (permalink / raw)
  To: Martin Sebor, Jason Merrill; +Cc: gcc-patches, Joseph Myers

Hi Martin,

sorry, I hope you forgive me, when I add a few comments to your patch.

> +  unsigned HOST_WIDE_INT nelts = CONSTRUCTOR_NELTS (ctor);
> +  tree eltype = TREE_TYPE (type);
...
> +      /* Bail if the CTOR has a block of more than 256 embedded nuls
> +        due to implicitly initialized elements.  */
> +      unsigned nelts = (idx - str.length ()) + 1;
> +      if (nelts > 256)
> +       return NULL_TREE;

nelts shadows previous nelts.

> +  if (!nelts || str.length () < i)

I don't understand when is str.length () < i ?

> +    /* Append a nul for the empty initializer { } and for the last
> +       explicit initializer in the loop above that is a nul.  */
> +    str.safe_push (0);
> +
> +  /* Build a string literal but return the embedded STRING_CST.  */
> +  tree res = build_string_literal (str.length (), str.begin ());
> +  res = TREE_OPERAND (TREE_OPERAND (res, 0), 0);
> +  return res;
> +}


res has a different type than the object you initialize.
I think you should use:

tree res = build_string (str.length (), str.begin ());
TREE_TYPE (res) = type;
return res;


Bernd.

^ permalink raw reply	[flat|nested] 45+ messages in thread
* Re: [PATCH] convert braced initializers to strings (PR 71625)
@ 2018-08-16  6:37 Bernd Edlinger
  2018-08-17 22:26 ` Bernd Edlinger
  0 siblings, 1 reply; 45+ messages in thread
From: Bernd Edlinger @ 2018-08-16  6:37 UTC (permalink / raw)
  To: Jeff Law, Martin Sebor, Joseph S. Myers
  Cc: James Greenhalgh, Jason Merrill, GCC Patches, nd

Jeff Law wrote:
> I wonder if the change to how we set up the initializers is ultimately
> changing the section those go into and ultimately causing an overflow of
> the .sdata section.


Yes, that is definitely the case.
Due to the -fmerge-all-constants option used
named arrays with brace initializer look like string initializers
and can go into the merge section if there are no embedded nul chars.
But the string constants can now be huge.

See my other patch about string merging:
[PATCH] Handle not explicitly zero terminated strings in merge sections
https://gcc.gnu.org/ml/gcc-patches/2018-08/msg00481.html


Can this section overflow?


Bernd.

^ permalink raw reply	[flat|nested] 45+ messages in thread

end of thread, other threads:[~2018-08-18 17:09 UTC | newest]

Thread overview: 45+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-07-30 23:51 [PATCH] convert braced initializers to strings (PR 71625) Martin Sebor
2018-07-31 13:39 ` Jason Merrill
2018-07-31 14:49   ` Martin Sebor
2018-08-07  8:58     ` Jason Merrill
2018-08-07 23:04       ` Martin Sebor
2018-08-08 11:09         ` Jason Merrill
2018-08-09  0:17           ` Martin Sebor
2018-08-09 22:07             ` Joseph Myers
2018-08-13 10:36             ` Jason Merrill
2018-08-14 13:27             ` James Greenhalgh
2018-08-14 15:08               ` Martin Sebor
2018-08-14 15:24                 ` Martin Sebor
2018-08-15  2:34                   ` Martin Sebor
2018-08-15 10:29                     ` James Greenhalgh
2018-08-15 15:04                       ` Richard Biener
2018-08-15 15:51                       ` Martin Sebor
2018-08-17 17:19                         ` [PATCH] Fix poly types after PR tree-optimization/71625 strlen optimization Szabolcs Nagy
2018-08-17 17:22                           ` Kyrill Tkachov
2018-08-14 21:14               ` [PATCH] convert braced initializers to strings (PR 71625) Joseph Myers
2018-08-14 22:18                 ` Martin Sebor
2018-08-15 12:07                   ` Joseph Myers
2018-08-15 21:02                     ` Martin Sebor
2018-08-15 21:14                       ` Jeff Law
2018-08-15 21:34                       ` Jeff Law
2018-08-16 15:23                         ` Martin Sebor
2018-08-16 15:32                           ` Jeff Law
2018-08-06 16:41 ` Martin Sebor
2018-08-06 17:04   ` Joseph Myers
2018-08-07  2:02     ` Martin Sebor
2018-08-07 11:31       ` Joseph Myers
2018-08-07 23:05         ` Martin Sebor
2018-08-08 17:33 Bernd Edlinger
2018-08-08 18:34 ` Bernd Edlinger
2018-08-08 19:50   ` Martin Sebor
2018-08-08 20:35     ` Bernd Edlinger
2018-08-08 20:48     ` Joseph Myers
2018-08-09  0:23       ` Martin Sebor
2018-08-09 10:47         ` Joseph Myers
2018-08-16  6:37 Bernd Edlinger
2018-08-17 22:26 ` Bernd Edlinger
2018-08-18  4:13   ` Jeff Law
2018-08-18 10:40   ` Richard Sandiford
2018-08-18 14:25     ` Bernd Edlinger
2018-08-18 16:46       ` Richard Sandiford
2018-08-18 17:09         ` Bernd Edlinger

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).