public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Convert character arrays to string csts
@ 2016-11-03 12:12 Martin Liška
  2016-11-03 12:47 ` Richard Biener
  2016-11-03 12:49 ` Bernd Schmidt
  0 siblings, 2 replies; 15+ messages in thread
From: Martin Liška @ 2016-11-03 12:12 UTC (permalink / raw)
  To: GCC Patches; +Cc: Jan Hubicka

[-- Attachment #1: Type: text/plain, Size: 396 bytes --]

Hello.

This is small follow-up of the patches I sent to string folding.
The patch transforms character array defined in an initializer to string
constant:

+const char global[] = {'a', 'b', 'c', 'd', '\0'};

Apart from that, it also enables string folding of local symbols like:
+  const char local[] = "abcd";

Patch can bootstrap on ppc64le-redhat-linux and survives regression tests.

Martin

[-- Attachment #2: 0001-Convert-character-arrays-to-string-csts.patch --]
[-- Type: text/x-patch, Size: 9105 bytes --]

From ecd2b614072f55e71896f7f5bf290072b3a4b04c Mon Sep 17 00:00:00 2001
From: marxin <mliska@suse.cz>
Date: Mon, 17 Oct 2016 15:24:46 +0200
Subject: [PATCH] Convert character arrays to string csts

gcc/testsuite/ChangeLog:

2016-10-24  Martin Liska  <mliska@suse.cz>

	* gcc.dg/tree-ssa/builtins-folding-gimple.c (main): Add new
	tests.

gcc/ChangeLog:

2016-10-24  Martin Liska  <mliska@suse.cz>

	* cgraphunit.c (varpool_node::finalize_decl): Convert ctors
	to string constants if possible.
	* gimplify.c (gimplify_decl_expr): Emit INIT_EXPR just if it
	cannot be converted to a string constant.
	(gimplify_init_constructor): Create string constant for local
	variables (if possible).
	* tree.c (build_string_cst_from_ctor): New function.
	(build_string_literal): Add new argument.
	(can_convert_ctor_to_string_cst): New function.
	* tree.h: Declare new functions.
	* varpool.c (ctor_for_folding): Support automatic variables.
---
 gcc/cgraphunit.c                                   |  6 ++
 gcc/gimplify.c                                     | 14 ++++-
 .../gcc.dg/tree-ssa/builtins-folding-gimple.c      |  7 +++
 gcc/tree.c                                         | 68 ++++++++++++++++++++--
 gcc/tree.h                                         |  6 +-
 gcc/varpool.c                                      |  8 ++-
 6 files changed, 100 insertions(+), 9 deletions(-)

diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c
index e315a77..bc041d6 100644
--- a/gcc/cgraphunit.c
+++ b/gcc/cgraphunit.c
@@ -791,6 +791,12 @@ process_function_and_variable_attributes (cgraph_node *first,
 void
 varpool_node::finalize_decl (tree decl)
 {
+  tree init = DECL_INITIAL (decl);
+  if (init
+      && TREE_READONLY (decl)
+      && can_convert_ctor_to_string_cst (init))
+    DECL_INITIAL (decl) = build_string_cst_from_ctor (init);
+
   varpool_node *node = varpool_node::get_create (decl);
 
   gcc_assert (TREE_STATIC (decl) || DECL_EXTERNAL (decl));
diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index 1531582..4520843 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -1495,7 +1495,8 @@ gimplify_decl_expr (tree *stmt_p, gimple_seq *seq_p)
 	{
 	  if (!TREE_STATIC (decl))
 	    {
-	      DECL_INITIAL (decl) = NULL_TREE;
+	      if (!TREE_READONLY (decl) || TREE_CODE (init) != STRING_CST)
+		DECL_INITIAL (decl) = NULL_TREE;
 	      init = build2 (INIT_EXPR, void_type_node, decl, init);
 	      gimplify_and_add (init, seq_p);
 	      ggc_free (init);
@@ -4438,6 +4439,17 @@ gimplify_init_constructor (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p,
 	    break;
 	  }
 
+	/* Replace a ctor with a string constant with possible.  */
+	if (TREE_READONLY (object)
+	    && VAR_P (object)
+	    && can_convert_ctor_to_string_cst (ctor))
+	  {
+	    ctor = build_string_cst_from_ctor (ctor);
+	    TREE_OPERAND (*expr_p, 1) = ctor;
+	    DECL_INITIAL (object) = ctor;
+	    break;
+	  }
+
 	/* Fetch information about the constructor to direct later processing.
 	   We might want to make static versions of it in various cases, and
 	   can only do so if it known to be a valid constant initializer.  */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple.c b/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple.c
index 283bd1c..b2d1fd5 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple.c
@@ -4,12 +4,15 @@
 char *buffer1;
 char *buffer2;
 
+const char global[] = {'a', 'b', 'c', 'd', '\0'};
+
 #define SIZE 1000
 
 int
 main (void)
 {
   const char* const foo1 = "hello world";
+  const char local[] = "abcd";
 
   buffer1 = __builtin_malloc (SIZE);
   __builtin_strcpy (buffer1, foo1);
@@ -45,6 +48,10 @@ main (void)
     __builtin_abort ();
   if (__builtin_memchr (foo1, null, 12) != foo1 + 11)
     __builtin_abort ();
+  if (__builtin_memchr (global, null, 5) == 0)
+    __builtin_abort ();
+  if (__builtin_memchr (local, null, 5) == 0)
+    __builtin_abort ();
 
   __builtin_memchr (foo1, x, 11);
   __builtin_memchr (buffer1, x, zero);
diff --git a/gcc/tree.c b/gcc/tree.c
index 56cc653..dcf767f 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -1784,6 +1784,26 @@ build_vector_from_val (tree vectype, tree sc)
     }
 }
 
+/* Build string constant from a constructor CTOR.  */
+
+tree
+build_string_cst_from_ctor (tree ctor)
+{
+  unsigned HOST_WIDE_INT idx;
+  tree value;
+  vec<constructor_elt, va_gc> *elts = CONSTRUCTOR_ELTS (ctor);
+  char *str = XNEWVEC (char, elts->length ());
+
+  FOR_EACH_CONSTRUCTOR_VALUE (elts, idx, value)
+    str[idx] = (char)tree_to_uhwi (value);
+
+  tree init = build_string_literal (elts->length (),
+				    ggc_alloc_string (str, elts->length ()),
+				    false);
+  free (str);
+  return init;
+}
+
 /* Something has messed with the elements of CONSTRUCTOR C after it was built;
    calculate TREE_CONSTANT and TREE_SIDE_EFFECTS.  */
 
@@ -11344,7 +11364,7 @@ maybe_build_call_expr_loc (location_t loc, combined_fn fn, tree type,
 /* Create a new constant string literal and return a char* pointer to it.
    The STRING_CST value is the LEN characters at STR.  */
 tree
-build_string_literal (int len, const char *str)
+build_string_literal (int len, const char *str, bool build_addr_expr)
 {
   tree t, elem, index, type;
 
@@ -11357,10 +11377,14 @@ build_string_literal (int len, const char *str)
   TREE_READONLY (t) = 1;
   TREE_STATIC (t) = 1;
 
-  type = build_pointer_type (elem);
-  t = build1 (ADDR_EXPR, type,
-	      build4 (ARRAY_REF, elem,
-		      t, integer_zero_node, NULL_TREE, NULL_TREE));
+  if (build_addr_expr)
+    {
+      type = build_pointer_type (elem);
+      t = build1 (ADDR_EXPR, type,
+		  build4 (ARRAY_REF, elem,
+			  t, integer_zero_node, NULL_TREE, NULL_TREE));
+    }
+
   return t;
 }
 
@@ -14217,6 +14241,40 @@ combined_fn_name (combined_fn fn)
     return internal_fn_name (as_internal_fn (fn));
 }
 
+/* Return TRUE when CTOR can be converted to a string constant.  */
+
+bool
+can_convert_ctor_to_string_cst (tree ctor)
+{
+  unsigned HOST_WIDE_INT idx;
+  tree value, key;
+
+  if (TREE_CODE (ctor) != CONSTRUCTOR
+      || TREE_CODE (TREE_TYPE (ctor)) != ARRAY_TYPE)
+    return false;
+
+  tree t = TREE_TYPE (TREE_TYPE (ctor));
+  if (TREE_CODE (TYPE_SIZE (t)) != INTEGER_CST
+      || tree_to_uhwi (TYPE_SIZE (t)) != BITS_PER_UNIT)
+    return false;
+
+  unsigned HOST_WIDE_INT elements = CONSTRUCTOR_NELTS (ctor);
+  FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (ctor), idx, key, value)
+    {
+      if (key == NULL_TREE
+	  || TREE_CODE (key) != INTEGER_CST
+	  || !tree_fits_uhwi_p (value)
+	  || !useless_type_conversion_p (TREE_TYPE (value), char_type_node))
+	return false;
+
+      /* Allow zero character just at the end of a string.  */
+      if (integer_zerop (value))
+	return idx == elements - 1;
+    }
+
+  return false;
+}
+
 #if CHECKING_P
 
 namespace selftest {
diff --git a/gcc/tree.h b/gcc/tree.h
index 531bc5e..ea5d184 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -3975,6 +3975,8 @@ extern tree build_vector_stat (tree, tree * MEM_STAT_DECL);
 #define build_vector(t,v) build_vector_stat (t, v MEM_STAT_INFO)
 extern tree build_vector_from_ctor (tree, vec<constructor_elt, va_gc> *);
 extern tree build_vector_from_val (tree, tree);
+extern tree build_string_cst_from_ctor (tree);
+extern tree build_vector_from_val (tree, tree);
 extern void recompute_constructor_flags (tree);
 extern void verify_constructor_flags (tree);
 extern tree build_constructor (tree, vec<constructor_elt, va_gc> *);
@@ -4022,7 +4024,8 @@ extern tree build_call_expr_internal_loc_array (location_t, enum internal_fn,
 						tree, int, const tree *);
 extern tree maybe_build_call_expr_loc (location_t, combined_fn, tree,
 				       int, ...);
-extern tree build_string_literal (int, const char *);
+extern tree build_string_literal (int len, const char *str,
+				  bool build_addr_expr = true);
 
 /* Construct various nodes representing data types.  */
 
@@ -4688,6 +4691,7 @@ extern void assign_assembler_name_if_neeeded (tree);
 extern void warn_deprecated_use (tree, tree);
 extern void cache_integer_cst (tree);
 extern const char *combined_fn_name (combined_fn);
+extern bool can_convert_ctor_to_string_cst (tree ctor);
 
 /* Compare and hash for any structure which begins with a canonical
    pointer.  Assumes all pointers are interchangeable, which is sort
diff --git a/gcc/varpool.c b/gcc/varpool.c
index 78969d2..6c9d067 100644
--- a/gcc/varpool.c
+++ b/gcc/varpool.c
@@ -412,11 +412,15 @@ ctor_for_folding (tree decl)
     return error_mark_node;
 
   /* Do not care about automatic variables.  Those are never initialized
-     anyway, because gimplifier exapnds the code.  */
+     anyway, because gimplifier expands the code.  */
   if (!TREE_STATIC (decl) && !DECL_EXTERNAL (decl))
     {
       gcc_assert (!TREE_PUBLIC (decl));
-      return error_mark_node;
+      if (!TREE_READONLY (decl) || TREE_THIS_VOLATILE (decl))
+	return error_mark_node;
+
+      tree ctor = DECL_INITIAL (decl);
+      return ctor == NULL_TREE ? error_mark_node : ctor;
     }
 
   gcc_assert (VAR_P (decl));
-- 
2.10.1


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] Convert character arrays to string csts
  2016-11-03 12:12 [PATCH] Convert character arrays to string csts Martin Liška
@ 2016-11-03 12:47 ` Richard Biener
  2016-11-04 13:32   ` Martin Liška
  2016-11-03 12:49 ` Bernd Schmidt
  1 sibling, 1 reply; 15+ messages in thread
From: Richard Biener @ 2016-11-03 12:47 UTC (permalink / raw)
  To: Martin Liška; +Cc: GCC Patches, Jan Hubicka

On Thu, Nov 3, 2016 at 1:12 PM, Martin Liška <mliska@suse.cz> wrote:
> Hello.
>
> This is small follow-up of the patches I sent to string folding.
> The patch transforms character array defined in an initializer to string
> constant:
>
> +const char global[] = {'a', 'b', 'c', 'd', '\0'};
>
> Apart from that, it also enables string folding of local symbols like:
> +  const char local[] = "abcd";
>
> Patch can bootstrap on ppc64le-redhat-linux and survives regression tests.

Hmm, does this handle

const char global[] = {'a', [4] = 'd', 'b', [3] = '\0'};

correctly?

I think you need to check that 'key' is increasing and there are no gaps
(ISTR constructor elements are sorted but gaps can still appear).

+         || !useless_type_conversion_p (TREE_TYPE (value), char_type_node))

please instead check the element type of the array constructor.  I'd also
use a stricter check like TYPE_MAIN_VARIANT (type) == char_type_node
to avoid changing non-strings like unsigned / signed char.

Finally I'm a bit worried about doing this for all 'char'
constructors.  Maybe we
should restrict this to those with ISPRINT () entries?

Richard.


> Martin

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] Convert character arrays to string csts
  2016-11-03 12:12 [PATCH] Convert character arrays to string csts Martin Liška
  2016-11-03 12:47 ` Richard Biener
@ 2016-11-03 12:49 ` Bernd Schmidt
  2016-11-03 13:01   ` Jan Hubicka
  1 sibling, 1 reply; 15+ messages in thread
From: Bernd Schmidt @ 2016-11-03 12:49 UTC (permalink / raw)
  To: Martin Liška, GCC Patches; +Cc: Jan Hubicka

On 11/03/2016 01:12 PM, Martin Liška wrote:
> +  tree init = DECL_INITIAL (decl);
> +  if (init
> +      && TREE_READONLY (decl)
> +      && can_convert_ctor_to_string_cst (init))
> +    DECL_INITIAL (decl) = build_string_cst_from_ctor (init);

I'd merge these two new functions since they're only ever called 
together. We'd then have something like

if (init && TREE_READONLY (decl))
   init = convert_ctor_to_string_cst (init);
if (init)
   DECL_INITIAL (decl) = init;

I'll defer to Jan on whether finalize_decl seems like a good place to do 
this.

> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple.c b/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple.c
> index 283bd1c..b2d1fd5 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple.c
> @@ -4,12 +4,15 @@
>  char *buffer1;
>  char *buffer2;
>
> +const char global[] = {'a', 'b', 'c', 'd', '\0'};
> +
>  #define SIZE 1000
>
>  int
>  main (void)
>  {
>    const char* const foo1 = "hello world";
> +  const char local[] = "abcd";
>
>    buffer1 = __builtin_malloc (SIZE);
>    __builtin_strcpy (buffer1, foo1);
> @@ -45,6 +48,10 @@ main (void)
>      __builtin_abort ();
>    if (__builtin_memchr (foo1, null, 12) != foo1 + 11)
>      __builtin_abort ();
> +  if (__builtin_memchr (global, null, 5) == 0)
> +    __builtin_abort ();
> +  if (__builtin_memchr (local, null, 5) == 0)
> +    __builtin_abort ();

How is that a meaningful test? This seems to work even with an unpatched 
gcc. I'd have expected something that shows a benefit for doing this 
conversion, and maybe also a test that shows it isn't done in cases 
where it's not allowed.

>  tree
> -build_string_literal (int len, const char *str)
> +build_string_literal (int len, const char *str, bool build_addr_expr)

New arguments should be documented in the function comment.

> +/* Return TRUE when CTOR can be converted to a string constant.  */

"if", not "when".

> +  unsigned HOST_WIDE_INT elements = CONSTRUCTOR_NELTS (ctor);
> +  FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (ctor), idx, key, value)
> +    {
> +      if (key == NULL_TREE
> +	  || TREE_CODE (key) != INTEGER_CST
> +	  || !tree_fits_uhwi_p (value)
> +	  || !useless_type_conversion_p (TREE_TYPE (value), char_type_node))
> +	return false;

Shouldn't all elements have the same type, or do you really have to call 
useless_type_conversion in a loop?

> +      /* Allow zero character just at the end of a string.  */
> +      if (integer_zerop (value))
> +	return idx == elements - 1;

Don't you also have to explicitly check it's there?


Bernd

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] Convert character arrays to string csts
  2016-11-03 12:49 ` Bernd Schmidt
@ 2016-11-03 13:01   ` Jan Hubicka
  2016-11-04 13:34     ` Martin Liška
  0 siblings, 1 reply; 15+ messages in thread
From: Jan Hubicka @ 2016-11-03 13:01 UTC (permalink / raw)
  To: Bernd Schmidt; +Cc: Martin Liška, GCC Patches, Jan Hubicka

> On 11/03/2016 01:12 PM, Martin Liška wrote:
> >+  tree init = DECL_INITIAL (decl);
> >+  if (init
> >+      && TREE_READONLY (decl)
> >+      && can_convert_ctor_to_string_cst (init))
> >+    DECL_INITIAL (decl) = build_string_cst_from_ctor (init);
> 
> I'd merge these two new functions since they're only ever called
> together. We'd then have something like
> 
> if (init && TREE_READONLY (decl))
>   init = convert_ctor_to_string_cst (init);
> if (init)
>   DECL_INITIAL (decl) = init;
> 
> I'll defer to Jan on whether finalize_decl seems like a good place
> to do this.

I think finalize_decl may be bit too early because frontends may expects the
ctors to be in a way they produced them.  We only want to convert those arrays
seen by middle-end so I would move the logic to varpool_node::analyze

Otherwise the patch seems fine to me.

Honza
> 
> >diff --git a/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple.c b/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple.c
> >index 283bd1c..b2d1fd5 100644
> >--- a/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple.c
> >+++ b/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple.c
> >@@ -4,12 +4,15 @@
> > char *buffer1;
> > char *buffer2;
> >
> >+const char global[] = {'a', 'b', 'c', 'd', '\0'};
> >+
> > #define SIZE 1000
> >
> > int
> > main (void)
> > {
> >   const char* const foo1 = "hello world";
> >+  const char local[] = "abcd";
> >
> >   buffer1 = __builtin_malloc (SIZE);
> >   __builtin_strcpy (buffer1, foo1);
> >@@ -45,6 +48,10 @@ main (void)
> >     __builtin_abort ();
> >   if (__builtin_memchr (foo1, null, 12) != foo1 + 11)
> >     __builtin_abort ();
> >+  if (__builtin_memchr (global, null, 5) == 0)
> >+    __builtin_abort ();
> >+  if (__builtin_memchr (local, null, 5) == 0)
> >+    __builtin_abort ();
> 
> How is that a meaningful test? This seems to work even with an
> unpatched gcc. I'd have expected something that shows a benefit for
> doing this conversion, and maybe also a test that shows it isn't
> done in cases where it's not allowed.
> 
> > tree
> >-build_string_literal (int len, const char *str)
> >+build_string_literal (int len, const char *str, bool build_addr_expr)
> 
> New arguments should be documented in the function comment.
> 
> >+/* Return TRUE when CTOR can be converted to a string constant.  */
> 
> "if", not "when".
> 
> >+  unsigned HOST_WIDE_INT elements = CONSTRUCTOR_NELTS (ctor);
> >+  FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (ctor), idx, key, value)
> >+    {
> >+      if (key == NULL_TREE
> >+	  || TREE_CODE (key) != INTEGER_CST
> >+	  || !tree_fits_uhwi_p (value)
> >+	  || !useless_type_conversion_p (TREE_TYPE (value), char_type_node))
> >+	return false;
> 
> Shouldn't all elements have the same type, or do you really have to
> call useless_type_conversion in a loop?
> 
> >+      /* Allow zero character just at the end of a string.  */
> >+      if (integer_zerop (value))
> >+	return idx == elements - 1;
> 
> Don't you also have to explicitly check it's there?
> 
> 
> Bernd

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] Convert character arrays to string csts
  2016-11-03 12:47 ` Richard Biener
@ 2016-11-04 13:32   ` Martin Liška
  0 siblings, 0 replies; 15+ messages in thread
From: Martin Liška @ 2016-11-04 13:32 UTC (permalink / raw)
  To: Richard Biener; +Cc: GCC Patches, Jan Hubicka

On 11/03/2016 01:46 PM, Richard Biener wrote:
> On Thu, Nov 3, 2016 at 1:12 PM, Martin Liška <mliska@suse.cz> wrote:
>> Hello.
>>
>> This is small follow-up of the patches I sent to string folding.
>> The patch transforms character array defined in an initializer to string
>> constant:
>>
>> +const char global[] = {'a', 'b', 'c', 'd', '\0'};
>>
>> Apart from that, it also enables string folding of local symbols like:
>> +  const char local[] = "abcd";
>>
>> Patch can bootstrap on ppc64le-redhat-linux and survives regression tests.
> 
> Hmm, does this handle
> 
> const char global[] = {'a', [4] = 'd', 'b', [3] = '\0'};
> 
> correctly?

It hasn't, fixed in the v2. As ctor indices are sorted in increasing order,
I only check that there are no holes.

> 
> I think you need to check that 'key' is increasing and there are no gaps
> (ISTR constructor elements are sorted but gaps can still appear).
> 
> +         || !useless_type_conversion_p (TREE_TYPE (value), char_type_node))
> 
> please instead check the element type of the array constructor.  I'd also
> use a stricter check like TYPE_MAIN_VARIANT (type) == char_type_node
> to avoid changing non-strings like unsigned / signed char.

Ok, v2 does: TYPE_MAIN_VARIANT (type) == char_type_node

> 
> Finally I'm a bit worried about doing this for all 'char'
> constructors.  Maybe we
> should restrict this to those with ISPRINT () entries?

Also done for all characters except the trailing \0 character.
I'll send patch in the next email.

Martin

> 
> Richard.
> 
> 
>> Martin

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] Convert character arrays to string csts
  2016-11-03 13:01   ` Jan Hubicka
@ 2016-11-04 13:34     ` Martin Liška
  2016-11-09 10:23       ` Richard Biener
  0 siblings, 1 reply; 15+ messages in thread
From: Martin Liška @ 2016-11-04 13:34 UTC (permalink / raw)
  To: Jan Hubicka, Bernd Schmidt; +Cc: GCC Patches

[-- Attachment #1: Type: text/plain, Size: 3384 bytes --]

On 11/03/2016 02:00 PM, Jan Hubicka wrote:
>> On 11/03/2016 01:12 PM, Martin Liška wrote:
>>> +  tree init = DECL_INITIAL (decl);
>>> +  if (init
>>> +      && TREE_READONLY (decl)
>>> +      && can_convert_ctor_to_string_cst (init))
>>> +    DECL_INITIAL (decl) = build_string_cst_from_ctor (init);
>>
>> I'd merge these two new functions since they're only ever called
>> together. We'd then have something like
>>
>> if (init && TREE_READONLY (decl))
>>   init = convert_ctor_to_string_cst (init);
>> if (init)
>>   DECL_INITIAL (decl) = init;

Done.

>>
>> I'll defer to Jan on whether finalize_decl seems like a good place
>> to do this.
> 
> I think finalize_decl may be bit too early because frontends may expects the
> ctors to be in a way they produced them.  We only want to convert those arrays
> seen by middle-end so I would move the logic to varpool_node::analyze
> 
> Otherwise the patch seems fine to me.
> 
> Honza
>>
>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple.c b/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple.c
>>> index 283bd1c..b2d1fd5 100644
>>> --- a/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple.c
>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple.c
>>> @@ -4,12 +4,15 @@
>>> char *buffer1;
>>> char *buffer2;
>>>
>>> +const char global[] = {'a', 'b', 'c', 'd', '\0'};
>>> +
>>> #define SIZE 1000
>>>
>>> int
>>> main (void)
>>> {
>>>   const char* const foo1 = "hello world";
>>> +  const char local[] = "abcd";
>>>
>>>   buffer1 = __builtin_malloc (SIZE);
>>>   __builtin_strcpy (buffer1, foo1);
>>> @@ -45,6 +48,10 @@ main (void)
>>>     __builtin_abort ();
>>>   if (__builtin_memchr (foo1, null, 12) != foo1 + 11)
>>>     __builtin_abort ();
>>> +  if (__builtin_memchr (global, null, 5) == 0)
>>> +    __builtin_abort ();
>>> +  if (__builtin_memchr (local, null, 5) == 0)
>>> +    __builtin_abort ();
>>
>> How is that a meaningful test? This seems to work even with an
>> unpatched gcc. I'd have expected something that shows a benefit for
>> doing this conversion, and maybe also a test that shows it isn't
>> done in cases where it's not allowed.

It's meaningful as it scans that there's no __builtin_memchr in optimized dump.
I'm adding new tests that does the opposite test.

>>
>>> tree
>>> -build_string_literal (int len, const char *str)
>>> +build_string_literal (int len, const char *str, bool build_addr_expr)
>>
>> New arguments should be documented in the function comment.

Yep, improved.

>>
>>> +/* Return TRUE when CTOR can be converted to a string constant.  */
>>
>> "if", not "when".

Done.

>>
>>> +  unsigned HOST_WIDE_INT elements = CONSTRUCTOR_NELTS (ctor);
>>> +  FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (ctor), idx, key, value)
>>> +    {
>>> +      if (key == NULL_TREE
>>> +	  || TREE_CODE (key) != INTEGER_CST
>>> +	  || !tree_fits_uhwi_p (value)
>>> +	  || !useless_type_conversion_p (TREE_TYPE (value), char_type_node))
>>> +	return false;
>>
>> Shouldn't all elements have the same type, or do you really have to
>> call useless_type_conversion in a loop?
>>
>>> +      /* Allow zero character just at the end of a string.  */
>>> +      if (integer_zerop (value))
>>> +	return idx == elements - 1;
>>
>> Don't you also have to explicitly check it's there?
>>
>>
>> Bernd


Patch can bootstrap on ppc64le-redhat-linux and survives regression tests.

Martin


[-- Attachment #2: 0001-Convert-character-arrays-to-string-csts-v2.patch --]
[-- Type: text/x-patch, Size: 10597 bytes --]

From 4a2bd43ad10cfb0467dde15ca0be0deba8194ea7 Mon Sep 17 00:00:00 2001
From: marxin <mliska@suse.cz>
Date: Mon, 17 Oct 2016 15:24:46 +0200
Subject: [PATCH] Convert character arrays to string csts

gcc/testsuite/ChangeLog:

2016-11-04  Martin Liska  <mliska@suse.cz>

	* gcc.dg/tree-ssa/builtins-folding-gimple.c (main): Add new
	tests.
	* gcc.dg/tree-ssa/builtins-folding-gimple-ub.c (main): Likewise.

gcc/ChangeLog:

2016-11-04  Martin Liska  <mliska@suse.cz>

	* gimplify.c (gimplify_decl_expr): Emit INIT_EXPR just if it
	cannot be converted to a string constant.
	(gimplify_init_constructor): Create string constant for local
	variables (if possible).
	* tree.c (convert_ctor_to_string_cst): New function.
	(build_string_literal): Add new argument.
	(can_convert_ctor_to_string_cst): New function.
	* tree.h: Declare new functions.
	* varpool.c (ctor_for_folding): Return ctor for local variables.
	(varpool_node::analyze): Convert character array ctors to a
	string constant (if possible).
---
 gcc/gimplify.c                                     | 16 ++++-
 .../gcc.dg/tree-ssa/builtins-folding-gimple-ub.c   | 20 +++++-
 .../gcc.dg/tree-ssa/builtins-folding-gimple.c      |  7 ++
 gcc/tree.c                                         | 83 ++++++++++++++++++++--
 gcc/tree.h                                         |  5 +-
 gcc/varpool.c                                      | 14 +++-
 6 files changed, 134 insertions(+), 11 deletions(-)

diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index 1531582..32f0909 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -1495,7 +1495,8 @@ gimplify_decl_expr (tree *stmt_p, gimple_seq *seq_p)
 	{
 	  if (!TREE_STATIC (decl))
 	    {
-	      DECL_INITIAL (decl) = NULL_TREE;
+	      if (!TREE_READONLY (decl) || TREE_CODE (init) != STRING_CST)
+		DECL_INITIAL (decl) = NULL_TREE;
 	      init = build2 (INIT_EXPR, void_type_node, decl, init);
 	      gimplify_and_add (init, seq_p);
 	      ggc_free (init);
@@ -4438,6 +4439,19 @@ gimplify_init_constructor (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p,
 	    break;
 	  }
 
+	/* Replace a ctor with a string constant with possible.  */
+	if (TREE_READONLY (object)
+	    && VAR_P (object))
+	  {
+	    tree string_ctor = convert_ctor_to_string_cst (ctor);
+	    if (string_ctor)
+	      {
+		TREE_OPERAND (*expr_p, 1) = string_ctor;
+		DECL_INITIAL (object) = string_ctor;
+		break;
+	      }
+	  }
+
 	/* Fetch information about the constructor to direct later processing.
 	   We might want to make static versions of it in various cases, and
 	   can only do so if it known to be a valid constant initializer.  */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple-ub.c b/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple-ub.c
index e1658d1..ea73258 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple-ub.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple-ub.c
@@ -4,11 +4,18 @@
 char *buffer1;
 char *buffer2;
 
+const char global1[] = {'a', 'b', 'c', 'd'};
+const char global2[] = {'a', 'b', '\0', 'd', '\0'};
+const char global3[] = {'a', 'b', [3] = 'x', 'c', '\0'};
+const char global4[] = {'a', 'b', 'c', 'd', [5] = '\0'};
+char global5[] = {'a', 'b', 'c', 'd', '\0'};
+
 #define SIZE 1000
 
 int
 main (void)
 {
+  char null = '\0';
   const char* const foo1 = "hello world";
 
   /* MEMCHR.  */
@@ -17,6 +24,17 @@ main (void)
   if (__builtin_memchr (foo1, 'x', 1000)) /* Not folded away.  */
     __builtin_abort ();
 
+  if (__builtin_memchr (global1, null, 1) == foo1) /* Not folded away.  */
+    __builtin_abort ();
+  if (__builtin_memchr (global2, null, 1) == foo1) /* Not folded away.  */
+    __builtin_abort ();
+  if (__builtin_memchr (global3, null, 1) == foo1) /* Not folded away.  */
+    __builtin_abort ();
+  if (__builtin_memchr (global4, null, 1) == foo1) /* Not folded away.  */
+    __builtin_abort ();
+  if (__builtin_memchr (global5, null, 1) == foo1) /* Not folded away.  */
+    __builtin_abort ();
+
   /* STRNCMP.  */
   if (strncmp ("a", "b", -1)) /* { dg-warning "implicit declaration of function" } */
     __builtin_abort ();
@@ -24,4 +42,4 @@ main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "__builtin_memchr" 2 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "__builtin_memchr" 7 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple.c b/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple.c
index 283bd1c..b2d1fd5 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple.c
@@ -4,12 +4,15 @@
 char *buffer1;
 char *buffer2;
 
+const char global[] = {'a', 'b', 'c', 'd', '\0'};
+
 #define SIZE 1000
 
 int
 main (void)
 {
   const char* const foo1 = "hello world";
+  const char local[] = "abcd";
 
   buffer1 = __builtin_malloc (SIZE);
   __builtin_strcpy (buffer1, foo1);
@@ -45,6 +48,10 @@ main (void)
     __builtin_abort ();
   if (__builtin_memchr (foo1, null, 12) != foo1 + 11)
     __builtin_abort ();
+  if (__builtin_memchr (global, null, 5) == 0)
+    __builtin_abort ();
+  if (__builtin_memchr (local, null, 5) == 0)
+    __builtin_abort ();
 
   __builtin_memchr (foo1, x, 11);
   __builtin_memchr (buffer1, x, zero);
diff --git a/gcc/tree.c b/gcc/tree.c
index 434aff1..84e5774 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -1784,6 +1784,70 @@ build_vector_from_val (tree vectype, tree sc)
     }
 }
 
+/* Return TRUE if CTOR can be converted to a string constant.  */
+
+static bool
+can_convert_ctor_to_string_cst (tree ctor)
+{
+  unsigned HOST_WIDE_INT idx;
+  tree value, key;
+
+  tree type = TREE_TYPE (ctor);
+  if (TREE_CODE (ctor) != CONSTRUCTOR
+      || TREE_CODE (type) != ARRAY_TYPE
+      || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type)))
+    return false;
+
+  tree subtype = TREE_TYPE (type);
+  if (TYPE_MAIN_VARIANT (subtype) != char_type_node)
+    return false;
+
+  unsigned HOST_WIDE_INT ctor_length = tree_to_uhwi (TYPE_SIZE_UNIT (type));
+
+  /* Skip constructors with a hole.  */
+  if (CONSTRUCTOR_NELTS (ctor) != ctor_length)
+    return false;
+
+  FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (ctor), idx, key, value)
+    {
+      if (key == NULL_TREE
+	  || !tree_fits_uhwi_p (key)
+	  || !tree_fits_uhwi_p (value))
+	return false;
+
+      /* Allow zero character only at the end of a string.  */
+      if (integer_zerop (value))
+	return idx == ctor_length - 1;
+      else if (!ISPRINT ((char)tree_to_uhwi (value)))
+	return false;
+    }
+
+  return false;
+}
+
+/* Build string constant from a constructor CTOR.  */
+
+tree
+convert_ctor_to_string_cst (tree ctor)
+{
+  if (!can_convert_ctor_to_string_cst (ctor))
+    return NULL_TREE;
+
+  unsigned HOST_WIDE_INT idx;
+  tree value;
+  vec<constructor_elt, va_gc> *elts = CONSTRUCTOR_ELTS (ctor);
+  char *str = XNEWVEC (char, elts->length ());
+
+  FOR_EACH_CONSTRUCTOR_VALUE (elts, idx, value)
+    str[idx] = (char)tree_to_uhwi (value);
+
+  tree init = build_string_literal (elts->length (),
+				    ggc_alloc_string (str, elts->length ()),
+				    false);
+  free (str);
+  return init;
+}
+
 /* Something has messed with the elements of CONSTRUCTOR C after it was built;
    calculate TREE_CONSTANT and TREE_SIDE_EFFECTS.  */
 
@@ -11359,9 +11423,12 @@ maybe_build_call_expr_loc (location_t loc, combined_fn fn, tree type,
 }
 
 /* Create a new constant string literal and return a char* pointer to it.
-   The STRING_CST value is the LEN characters at STR.  */
+   The STRING_CST value is the LEN characters at STR.  If BUILD_ADDR_EXPR
+   is set to true, ADDR_EXPR of the newly created string constant is
+   returned.  */
+
 tree
-build_string_literal (int len, const char *str)
+build_string_literal (int len, const char *str, bool build_addr_expr)
 {
   tree t, elem, index, type;
 
@@ -11374,10 +11441,14 @@ build_string_literal (int len, const char *str)
   TREE_READONLY (t) = 1;
   TREE_STATIC (t) = 1;
 
-  type = build_pointer_type (elem);
-  t = build1 (ADDR_EXPR, type,
-	      build4 (ARRAY_REF, elem,
-		      t, integer_zero_node, NULL_TREE, NULL_TREE));
+  if (build_addr_expr)
+    {
+      type = build_pointer_type (elem);
+      t = build1 (ADDR_EXPR, type,
+		  build4 (ARRAY_REF, elem,
+			  t, integer_zero_node, NULL_TREE, NULL_TREE));
+    }
+
   return t;
 }
 
diff --git a/gcc/tree.h b/gcc/tree.h
index 6a98b6e..10ee0d0 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -3975,6 +3975,8 @@ extern tree build_vector_stat (tree, tree * MEM_STAT_DECL);
 #define build_vector(t,v) build_vector_stat (t, v MEM_STAT_INFO)
 extern tree build_vector_from_ctor (tree, vec<constructor_elt, va_gc> *);
 extern tree build_vector_from_val (tree, tree);
+extern tree convert_ctor_to_string_cst (tree);
+extern tree build_vector_from_val (tree, tree);
 extern void recompute_constructor_flags (tree);
 extern void verify_constructor_flags (tree);
 extern tree build_constructor (tree, vec<constructor_elt, va_gc> *);
@@ -4022,7 +4024,8 @@ extern tree build_call_expr_internal_loc_array (location_t, enum internal_fn,
 						tree, int, const tree *);
 extern tree maybe_build_call_expr_loc (location_t, combined_fn, tree,
 				       int, ...);
-extern tree build_string_literal (int, const char *);
+extern tree build_string_literal (int len, const char *str,
+				  bool build_addr_expr = true);
 
 /* Construct various nodes representing data types.  */
 
diff --git a/gcc/varpool.c b/gcc/varpool.c
index 78969d2..bb16c7b 100644
--- a/gcc/varpool.c
+++ b/gcc/varpool.c
@@ -412,11 +412,15 @@ ctor_for_folding (tree decl)
     return error_mark_node;
 
   /* Do not care about automatic variables.  Those are never initialized
-     anyway, because gimplifier exapnds the code.  */
+     anyway, because gimplifier expands the code.  */
   if (!TREE_STATIC (decl) && !DECL_EXTERNAL (decl))
     {
       gcc_assert (!TREE_PUBLIC (decl));
-      return error_mark_node;
+      if (!TREE_READONLY (decl) || TREE_THIS_VOLATILE (decl))
+	return error_mark_node;
+
+      tree ctor = DECL_INITIAL (decl);
+      return ctor == NULL_TREE ? error_mark_node : ctor;
     }
 
   gcc_assert (VAR_P (decl));
@@ -525,6 +529,12 @@ varpool_node::analyze (void)
       /* Compute the alignment early so function body expanders are
 	 already informed about increased alignment.  */
       align_variable (decl, 0);
+
+      tree init = DECL_INITIAL (decl);
+      if (init && TREE_READONLY (decl))
+	init = convert_ctor_to_string_cst (init);
+      if (init)
+	DECL_INITIAL (decl) = init;
     }
   if (alias)
     resolve_alias (varpool_node::get (alias_target));
-- 
2.10.1


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] Convert character arrays to string csts
  2016-11-04 13:34     ` Martin Liška
@ 2016-11-09 10:23       ` Richard Biener
  2017-08-08 11:47         ` Martin Liška
  0 siblings, 1 reply; 15+ messages in thread
From: Richard Biener @ 2016-11-09 10:23 UTC (permalink / raw)
  To: Martin Liška; +Cc: Jan Hubicka, Bernd Schmidt, GCC Patches

On Fri, Nov 4, 2016 at 2:33 PM, Martin Liška <mliska@suse.cz> wrote:
> On 11/03/2016 02:00 PM, Jan Hubicka wrote:
>>> On 11/03/2016 01:12 PM, Martin Liška wrote:
>>>> +  tree init = DECL_INITIAL (decl);
>>>> +  if (init
>>>> +      && TREE_READONLY (decl)
>>>> +      && can_convert_ctor_to_string_cst (init))
>>>> +    DECL_INITIAL (decl) = build_string_cst_from_ctor (init);
>>>
>>> I'd merge these two new functions since they're only ever called
>>> together. We'd then have something like
>>>
>>> if (init && TREE_READONLY (decl))
>>>   init = convert_ctor_to_string_cst (init);
>>> if (init)
>>>   DECL_INITIAL (decl) = init;
>
> Done.
>
>>>
>>> I'll defer to Jan on whether finalize_decl seems like a good place
>>> to do this.
>>
>> I think finalize_decl may be bit too early because frontends may expects the
>> ctors to be in a way they produced them.  We only want to convert those arrays
>> seen by middle-end so I would move the logic to varpool_node::analyze
>>
>> Otherwise the patch seems fine to me.
>>
>> Honza
>>>
>>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple.c b/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple.c
>>>> index 283bd1c..b2d1fd5 100644
>>>> --- a/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple.c
>>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple.c
>>>> @@ -4,12 +4,15 @@
>>>> char *buffer1;
>>>> char *buffer2;
>>>>
>>>> +const char global[] = {'a', 'b', 'c', 'd', '\0'};
>>>> +
>>>> #define SIZE 1000
>>>>
>>>> int
>>>> main (void)
>>>> {
>>>>   const char* const foo1 = "hello world";
>>>> +  const char local[] = "abcd";
>>>>
>>>>   buffer1 = __builtin_malloc (SIZE);
>>>>   __builtin_strcpy (buffer1, foo1);
>>>> @@ -45,6 +48,10 @@ main (void)
>>>>     __builtin_abort ();
>>>>   if (__builtin_memchr (foo1, null, 12) != foo1 + 11)
>>>>     __builtin_abort ();
>>>> +  if (__builtin_memchr (global, null, 5) == 0)
>>>> +    __builtin_abort ();
>>>> +  if (__builtin_memchr (local, null, 5) == 0)
>>>> +    __builtin_abort ();
>>>
>>> How is that a meaningful test? This seems to work even with an
>>> unpatched gcc. I'd have expected something that shows a benefit for
>>> doing this conversion, and maybe also a test that shows it isn't
>>> done in cases where it's not allowed.
>
> It's meaningful as it scans that there's no __builtin_memchr in optimized dump.
> I'm adding new tests that does the opposite test.
>
>>>
>>>> tree
>>>> -build_string_literal (int len, const char *str)
>>>> +build_string_literal (int len, const char *str, bool build_addr_expr)
>>>
>>> New arguments should be documented in the function comment.
>
> Yep, improved.
>
>>>
>>>> +/* Return TRUE when CTOR can be converted to a string constant.  */
>>>
>>> "if", not "when".
>
> Done.
>
>>>
>>>> +  unsigned HOST_WIDE_INT elements = CONSTRUCTOR_NELTS (ctor);
>>>> +  FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (ctor), idx, key, value)
>>>> +    {
>>>> +      if (key == NULL_TREE
>>>> +     || TREE_CODE (key) != INTEGER_CST
>>>> +     || !tree_fits_uhwi_p (value)
>>>> +     || !useless_type_conversion_p (TREE_TYPE (value), char_type_node))
>>>> +   return false;
>>>
>>> Shouldn't all elements have the same type, or do you really have to
>>> call useless_type_conversion in a loop?
>>>
>>>> +      /* Allow zero character just at the end of a string.  */
>>>> +      if (integer_zerop (value))
>>>> +   return idx == elements - 1;
>>>
>>> Don't you also have to explicitly check it's there?
>>>
>>>
>>> Bernd
>
>
> Patch can bootstrap on ppc64le-redhat-linux and survives regression tests.

I'm curious about the

@@ -1495,7 +1495,8 @@ gimplify_decl_expr (tree *stmt_p, gimple_seq *seq_p)
        {
          if (!TREE_STATIC (decl))
            {
-             DECL_INITIAL (decl) = NULL_TREE;
+             if (!TREE_READONLY (decl) || TREE_CODE (init) != STRING_CST)
+               DECL_INITIAL (decl) = NULL_TREE;
              init = build2 (INIT_EXPR, void_type_node, decl, init);
              gimplify_and_add (init, seq_p);
              ggc_free (init);

change.  Why keep DECL_INITIAL if you build a INIT_EXPR anyway?

@@ -4438,6 +4439,19 @@ gimplify_init_constructor (tree *expr_p,
gimple_seq *pre_p, gimple_seq *post_p,
            break;
          }

+       /* Replace a ctor with a string constant with possible.  */
+       if (TREE_READONLY (object)
+           && VAR_P (object))
+         {
+           tree string_ctor = convert_ctor_to_string_cst (ctor);
+           if (string_ctor)
+             {
+               TREE_OPERAND (*expr_p, 1) = string_ctor;
+               DECL_INITIAL (object) = string_ctor;
+               break;
+             }
+         }
+
        /* Fetch information about the constructor to direct later processing.
           We might want to make static versions of it in various cases, and
           can only do so if it known to be a valid constant initializer.  */

hmm, so both these hunks will end up keeping a DECL_INITIAL
for non-static local consts?  Usually we end up with

main ()
{
  const char local[5];

  <bb 2>:
  local = "abcd";

here.  So you keep DECL_INITIAL for folding?

I believe we should create CONST_DECLs for the above (and make
CONST_DECLs first-class
symtab citizens), thus avoid runtime stack initialization for the
above and instead emit
"abcd" to the constant pool?

+  /* Skip constructors with a hole.  */
+  if (CONSTRUCTOR_NELTS (ctor) != ctor_length)
+    return false;

not sure if that's maybe too clever ;)

+  FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (ctor), idx, key, value)
+    {
+      if (key == NULL_TREE
+         || !tree_fits_uhwi_p (key)
+         || !tree_fits_uhwi_p (value))
+       return false;

I think key == NULL is very well valid (you are not using it...).  I'd
instead do

     if (key && compare_tree_int (key, idx) != 0)
       return false;

for the hole check.  value should always fit uhwi given the earlier
element type check.

+tree
+convert_ctor_to_string_cst (tree ctor)
+{
+  if (!can_convert_ctor_to_string_cst (ctor))
+    return NULL_TREE;
+
+  unsigned HOST_WIDE_INT idx;
+  tree value;
+  vec<constructor_elt, va_gc> *elts = CONSTRUCTOR_ELTS (ctor);
+  char *str = XNEWVEC (char, elts->length ());
+
+  FOR_EACH_CONSTRUCTOR_VALUE (elts, idx, value)
+    str[idx] = (char)tree_to_uhwi (value);
+
+  tree init = build_string_literal (elts->length (),
+                                   ggc_alloc_string (str, elts->length ()),
+                                   false);
+  free (str);

why alloc str on the heap, copy it to gc and the copy it again?
That is, you can pass 'str' to build_string_literal directly I think.

In fact as you are refactoring build_string_literal please
refactor build_string into a build_string_raw taking just 'len'
(thus leaving out the actual string initialization apart from
'appending' '\0') and then avoid the heap allocation.

Refactor build_string_literal to take a tree STRING_CST
and build_string_literal_addr to build it's address.

Thanks,
Richard.




> Martin
>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] Convert character arrays to string csts
  2016-11-09 10:23       ` Richard Biener
@ 2017-08-08 11:47         ` Martin Liška
  2017-08-08 13:19           ` Richard Biener
  0 siblings, 1 reply; 15+ messages in thread
From: Martin Liška @ 2017-08-08 11:47 UTC (permalink / raw)
  To: Richard Biener; +Cc: Jan Hubicka, Bernd Schmidt, GCC Patches

On 11/09/2016 11:22 AM, Richard Biener wrote:
> On Fri, Nov 4, 2016 at 2:33 PM, Martin Liška <mliska@suse.cz> wrote:
>> On 11/03/2016 02:00 PM, Jan Hubicka wrote:
>>>> On 11/03/2016 01:12 PM, Martin Liška wrote:
>>>>> +  tree init = DECL_INITIAL (decl);
>>>>> +  if (init
>>>>> +      && TREE_READONLY (decl)
>>>>> +      && can_convert_ctor_to_string_cst (init))
>>>>> +    DECL_INITIAL (decl) = build_string_cst_from_ctor (init);
>>>>
>>>> I'd merge these two new functions since they're only ever called
>>>> together. We'd then have something like
>>>>
>>>> if (init && TREE_READONLY (decl))
>>>>   init = convert_ctor_to_string_cst (init);
>>>> if (init)
>>>>   DECL_INITIAL (decl) = init;
>>
>> Done.
>>
>>>>
>>>> I'll defer to Jan on whether finalize_decl seems like a good place
>>>> to do this.
>>>
>>> I think finalize_decl may be bit too early because frontends may expects the
>>> ctors to be in a way they produced them.  We only want to convert those arrays
>>> seen by middle-end so I would move the logic to varpool_node::analyze
>>>
>>> Otherwise the patch seems fine to me.
>>>
>>> Honza
>>>>
>>>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple.c b/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple.c
>>>>> index 283bd1c..b2d1fd5 100644
>>>>> --- a/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple.c
>>>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple.c
>>>>> @@ -4,12 +4,15 @@
>>>>> char *buffer1;
>>>>> char *buffer2;
>>>>>
>>>>> +const char global[] = {'a', 'b', 'c', 'd', '\0'};
>>>>> +
>>>>> #define SIZE 1000
>>>>>
>>>>> int
>>>>> main (void)
>>>>> {
>>>>>   const char* const foo1 = "hello world";
>>>>> +  const char local[] = "abcd";
>>>>>
>>>>>   buffer1 = __builtin_malloc (SIZE);
>>>>>   __builtin_strcpy (buffer1, foo1);
>>>>> @@ -45,6 +48,10 @@ main (void)
>>>>>     __builtin_abort ();
>>>>>   if (__builtin_memchr (foo1, null, 12) != foo1 + 11)
>>>>>     __builtin_abort ();
>>>>> +  if (__builtin_memchr (global, null, 5) == 0)
>>>>> +    __builtin_abort ();
>>>>> +  if (__builtin_memchr (local, null, 5) == 0)
>>>>> +    __builtin_abort ();
>>>>
>>>> How is that a meaningful test? This seems to work even with an
>>>> unpatched gcc. I'd have expected something that shows a benefit for
>>>> doing this conversion, and maybe also a test that shows it isn't
>>>> done in cases where it's not allowed.
>>
>> It's meaningful as it scans that there's no __builtin_memchr in optimized dump.
>> I'm adding new tests that does the opposite test.
>>
>>>>
>>>>> tree
>>>>> -build_string_literal (int len, const char *str)
>>>>> +build_string_literal (int len, const char *str, bool build_addr_expr)
>>>>
>>>> New arguments should be documented in the function comment.
>>
>> Yep, improved.
>>
>>>>
>>>>> +/* Return TRUE when CTOR can be converted to a string constant.  */
>>>>
>>>> "if", not "when".
>>
>> Done.
>>
>>>>
>>>>> +  unsigned HOST_WIDE_INT elements = CONSTRUCTOR_NELTS (ctor);
>>>>> +  FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (ctor), idx, key, value)
>>>>> +    {
>>>>> +      if (key == NULL_TREE
>>>>> +     || TREE_CODE (key) != INTEGER_CST
>>>>> +     || !tree_fits_uhwi_p (value)
>>>>> +     || !useless_type_conversion_p (TREE_TYPE (value), char_type_node))
>>>>> +   return false;
>>>>
>>>> Shouldn't all elements have the same type, or do you really have to
>>>> call useless_type_conversion in a loop?
>>>>
>>>>> +      /* Allow zero character just at the end of a string.  */
>>>>> +      if (integer_zerop (value))
>>>>> +   return idx == elements - 1;
>>>>
>>>> Don't you also have to explicitly check it's there?
>>>>
>>>>
>>>> Bernd
>>
>>
>> Patch can bootstrap on ppc64le-redhat-linux and survives regression tests.
> 
> I'm curious about the
> 
> @@ -1495,7 +1495,8 @@ gimplify_decl_expr (tree *stmt_p, gimple_seq *seq_p)
>         {
>           if (!TREE_STATIC (decl))
>             {
> -             DECL_INITIAL (decl) = NULL_TREE;
> +             if (!TREE_READONLY (decl) || TREE_CODE (init) != STRING_CST)
> +               DECL_INITIAL (decl) = NULL_TREE;
>               init = build2 (INIT_EXPR, void_type_node, decl, init);
>               gimplify_and_add (init, seq_p);
>               ggc_free (init);
> 
> change.  Why keep DECL_INITIAL if you build a INIT_EXPR anyway?
> 
> @@ -4438,6 +4439,19 @@ gimplify_init_constructor (tree *expr_p,
> gimple_seq *pre_p, gimple_seq *post_p,
>             break;
>           }
> 
> +       /* Replace a ctor with a string constant with possible.  */
> +       if (TREE_READONLY (object)
> +           && VAR_P (object))
> +         {
> +           tree string_ctor = convert_ctor_to_string_cst (ctor);
> +           if (string_ctor)
> +             {
> +               TREE_OPERAND (*expr_p, 1) = string_ctor;
> +               DECL_INITIAL (object) = string_ctor;
> +               break;
> +             }
> +         }
> +
>         /* Fetch information about the constructor to direct later processing.
>            We might want to make static versions of it in various cases, and
>            can only do so if it known to be a valid constant initializer.  */
> 
> hmm, so both these hunks will end up keeping a DECL_INITIAL
> for non-static local consts?  Usually we end up with
> 
> main ()
> {
>   const char local[5];
> 
>   <bb 2>:
>   local = "abcd";
> 
> here.  So you keep DECL_INITIAL for folding?
> 
> I believe we should create CONST_DECLs for the above (and make
> CONST_DECLs first-class
> symtab citizens), thus avoid runtime stack initialization for the
> above and instead emit
> "abcd" to the constant pool?

Hi Richi.

I've just returned to this in order to resolve long pending unfinished stuff.
I have couple of questions:

a) what should create a CONST_DECL, a FE or gimplifier when it identifies that
it can be converted to CONST_DECL?

b) Why do we need to put such local variables to symtab?

c) Do we have a target that uses CONST_DECL for such (or similar) purpose?

Thanks,
Martin

> 
> +  /* Skip constructors with a hole.  */
> +  if (CONSTRUCTOR_NELTS (ctor) != ctor_length)
> +    return false;
> 
> not sure if that's maybe too clever ;)
> 
> +  FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (ctor), idx, key, value)
> +    {
> +      if (key == NULL_TREE
> +         || !tree_fits_uhwi_p (key)
> +         || !tree_fits_uhwi_p (value))
> +       return false;
> 
> I think key == NULL is very well valid (you are not using it...).  I'd
> instead do
> 
>      if (key && compare_tree_int (key, idx) != 0)
>        return false;
> 
> for the hole check.  value should always fit uhwi given the earlier
> element type check.
> 
> +tree
> +convert_ctor_to_string_cst (tree ctor)
> +{
> +  if (!can_convert_ctor_to_string_cst (ctor))
> +    return NULL_TREE;
> +
> +  unsigned HOST_WIDE_INT idx;
> +  tree value;
> +  vec<constructor_elt, va_gc> *elts = CONSTRUCTOR_ELTS (ctor);
> +  char *str = XNEWVEC (char, elts->length ());
> +
> +  FOR_EACH_CONSTRUCTOR_VALUE (elts, idx, value)
> +    str[idx] = (char)tree_to_uhwi (value);
> +
> +  tree init = build_string_literal (elts->length (),
> +                                   ggc_alloc_string (str, elts->length ()),
> +                                   false);
> +  free (str);
> 
> why alloc str on the heap, copy it to gc and the copy it again?
> That is, you can pass 'str' to build_string_literal directly I think.
> 
> In fact as you are refactoring build_string_literal please
> refactor build_string into a build_string_raw taking just 'len'
> (thus leaving out the actual string initialization apart from
> 'appending' '\0') and then avoid the heap allocation.
> 
> Refactor build_string_literal to take a tree STRING_CST
> and build_string_literal_addr to build it's address.
> 
> Thanks,
> Richard.
> 
> 
> 
> 
>> Martin
>>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] Convert character arrays to string csts
  2017-08-08 11:47         ` Martin Liška
@ 2017-08-08 13:19           ` Richard Biener
  2017-08-09  9:15             ` Martin Liška
  0 siblings, 1 reply; 15+ messages in thread
From: Richard Biener @ 2017-08-08 13:19 UTC (permalink / raw)
  To: Martin Liška; +Cc: Jan Hubicka, Bernd Schmidt, GCC Patches

[-- Attachment #1: Type: text/plain, Size: 8687 bytes --]

On Tue, Aug 8, 2017 at 1:47 PM, Martin Liška <mliska@suse.cz> wrote:
> On 11/09/2016 11:22 AM, Richard Biener wrote:
>> On Fri, Nov 4, 2016 at 2:33 PM, Martin Liška <mliska@suse.cz> wrote:
>>> On 11/03/2016 02:00 PM, Jan Hubicka wrote:
>>>>> On 11/03/2016 01:12 PM, Martin Liška wrote:
>>>>>> +  tree init = DECL_INITIAL (decl);
>>>>>> +  if (init
>>>>>> +      && TREE_READONLY (decl)
>>>>>> +      && can_convert_ctor_to_string_cst (init))
>>>>>> +    DECL_INITIAL (decl) = build_string_cst_from_ctor (init);
>>>>>
>>>>> I'd merge these two new functions since they're only ever called
>>>>> together. We'd then have something like
>>>>>
>>>>> if (init && TREE_READONLY (decl))
>>>>>   init = convert_ctor_to_string_cst (init);
>>>>> if (init)
>>>>>   DECL_INITIAL (decl) = init;
>>>
>>> Done.
>>>
>>>>>
>>>>> I'll defer to Jan on whether finalize_decl seems like a good place
>>>>> to do this.
>>>>
>>>> I think finalize_decl may be bit too early because frontends may expects the
>>>> ctors to be in a way they produced them.  We only want to convert those arrays
>>>> seen by middle-end so I would move the logic to varpool_node::analyze
>>>>
>>>> Otherwise the patch seems fine to me.
>>>>
>>>> Honza
>>>>>
>>>>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple.c b/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple.c
>>>>>> index 283bd1c..b2d1fd5 100644
>>>>>> --- a/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple.c
>>>>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple.c
>>>>>> @@ -4,12 +4,15 @@
>>>>>> char *buffer1;
>>>>>> char *buffer2;
>>>>>>
>>>>>> +const char global[] = {'a', 'b', 'c', 'd', '\0'};
>>>>>> +
>>>>>> #define SIZE 1000
>>>>>>
>>>>>> int
>>>>>> main (void)
>>>>>> {
>>>>>>   const char* const foo1 = "hello world";
>>>>>> +  const char local[] = "abcd";
>>>>>>
>>>>>>   buffer1 = __builtin_malloc (SIZE);
>>>>>>   __builtin_strcpy (buffer1, foo1);
>>>>>> @@ -45,6 +48,10 @@ main (void)
>>>>>>     __builtin_abort ();
>>>>>>   if (__builtin_memchr (foo1, null, 12) != foo1 + 11)
>>>>>>     __builtin_abort ();
>>>>>> +  if (__builtin_memchr (global, null, 5) == 0)
>>>>>> +    __builtin_abort ();
>>>>>> +  if (__builtin_memchr (local, null, 5) == 0)
>>>>>> +    __builtin_abort ();
>>>>>
>>>>> How is that a meaningful test? This seems to work even with an
>>>>> unpatched gcc. I'd have expected something that shows a benefit for
>>>>> doing this conversion, and maybe also a test that shows it isn't
>>>>> done in cases where it's not allowed.
>>>
>>> It's meaningful as it scans that there's no __builtin_memchr in optimized dump.
>>> I'm adding new tests that does the opposite test.
>>>
>>>>>
>>>>>> tree
>>>>>> -build_string_literal (int len, const char *str)
>>>>>> +build_string_literal (int len, const char *str, bool build_addr_expr)
>>>>>
>>>>> New arguments should be documented in the function comment.
>>>
>>> Yep, improved.
>>>
>>>>>
>>>>>> +/* Return TRUE when CTOR can be converted to a string constant.  */
>>>>>
>>>>> "if", not "when".
>>>
>>> Done.
>>>
>>>>>
>>>>>> +  unsigned HOST_WIDE_INT elements = CONSTRUCTOR_NELTS (ctor);
>>>>>> +  FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (ctor), idx, key, value)
>>>>>> +    {
>>>>>> +      if (key == NULL_TREE
>>>>>> +     || TREE_CODE (key) != INTEGER_CST
>>>>>> +     || !tree_fits_uhwi_p (value)
>>>>>> +     || !useless_type_conversion_p (TREE_TYPE (value), char_type_node))
>>>>>> +   return false;
>>>>>
>>>>> Shouldn't all elements have the same type, or do you really have to
>>>>> call useless_type_conversion in a loop?
>>>>>
>>>>>> +      /* Allow zero character just at the end of a string.  */
>>>>>> +      if (integer_zerop (value))
>>>>>> +   return idx == elements - 1;
>>>>>
>>>>> Don't you also have to explicitly check it's there?
>>>>>
>>>>>
>>>>> Bernd
>>>
>>>
>>> Patch can bootstrap on ppc64le-redhat-linux and survives regression tests.
>>
>> I'm curious about the
>>
>> @@ -1495,7 +1495,8 @@ gimplify_decl_expr (tree *stmt_p, gimple_seq *seq_p)
>>         {
>>           if (!TREE_STATIC (decl))
>>             {
>> -             DECL_INITIAL (decl) = NULL_TREE;
>> +             if (!TREE_READONLY (decl) || TREE_CODE (init) != STRING_CST)
>> +               DECL_INITIAL (decl) = NULL_TREE;
>>               init = build2 (INIT_EXPR, void_type_node, decl, init);
>>               gimplify_and_add (init, seq_p);
>>               ggc_free (init);
>>
>> change.  Why keep DECL_INITIAL if you build a INIT_EXPR anyway?
>>
>> @@ -4438,6 +4439,19 @@ gimplify_init_constructor (tree *expr_p,
>> gimple_seq *pre_p, gimple_seq *post_p,
>>             break;
>>           }
>>
>> +       /* Replace a ctor with a string constant with possible.  */
>> +       if (TREE_READONLY (object)
>> +           && VAR_P (object))
>> +         {
>> +           tree string_ctor = convert_ctor_to_string_cst (ctor);
>> +           if (string_ctor)
>> +             {
>> +               TREE_OPERAND (*expr_p, 1) = string_ctor;
>> +               DECL_INITIAL (object) = string_ctor;
>> +               break;
>> +             }
>> +         }
>> +
>>         /* Fetch information about the constructor to direct later processing.
>>            We might want to make static versions of it in various cases, and
>>            can only do so if it known to be a valid constant initializer.  */
>>
>> hmm, so both these hunks will end up keeping a DECL_INITIAL
>> for non-static local consts?  Usually we end up with
>>
>> main ()
>> {
>>   const char local[5];
>>
>>   <bb 2>:
>>   local = "abcd";
>>
>> here.  So you keep DECL_INITIAL for folding?
>>
>> I believe we should create CONST_DECLs for the above (and make
>> CONST_DECLs first-class
>> symtab citizens), thus avoid runtime stack initialization for the
>> above and instead emit
>> "abcd" to the constant pool?
>
> Hi Richi.
>
> I've just returned to this in order to resolve long pending unfinished stuff.
> I have couple of questions:
>
> a) what should create a CONST_DECL, a FE or gimplifier when it identifies that
> it can be converted to CONST_DECL?

The gimplifier (prototype patches of mine did it that way when trying
to get rid of
&STRING_CST).

> b) Why do we need to put such local variables to symtab?

We don't need to, but we eventually should?

> c) Do we have a target that uses CONST_DECL for such (or similar) purpose?

Not sure.  gimplify_init_constructor ends up with .LC0* on for example aarch64
for initializers of arrays for example.

Attached old &STRING_CST -> &CONST_DECL gimplification patch (no
longer applies).

Richard.

> Thanks,
> Martin
>
>>
>> +  /* Skip constructors with a hole.  */
>> +  if (CONSTRUCTOR_NELTS (ctor) != ctor_length)
>> +    return false;
>>
>> not sure if that's maybe too clever ;)
>>
>> +  FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (ctor), idx, key, value)
>> +    {
>> +      if (key == NULL_TREE
>> +         || !tree_fits_uhwi_p (key)
>> +         || !tree_fits_uhwi_p (value))
>> +       return false;
>>
>> I think key == NULL is very well valid (you are not using it...).  I'd
>> instead do
>>
>>      if (key && compare_tree_int (key, idx) != 0)
>>        return false;
>>
>> for the hole check.  value should always fit uhwi given the earlier
>> element type check.
>>
>> +tree
>> +convert_ctor_to_string_cst (tree ctor)
>> +{
>> +  if (!can_convert_ctor_to_string_cst (ctor))
>> +    return NULL_TREE;
>> +
>> +  unsigned HOST_WIDE_INT idx;
>> +  tree value;
>> +  vec<constructor_elt, va_gc> *elts = CONSTRUCTOR_ELTS (ctor);
>> +  char *str = XNEWVEC (char, elts->length ());
>> +
>> +  FOR_EACH_CONSTRUCTOR_VALUE (elts, idx, value)
>> +    str[idx] = (char)tree_to_uhwi (value);
>> +
>> +  tree init = build_string_literal (elts->length (),
>> +                                   ggc_alloc_string (str, elts->length ()),
>> +                                   false);
>> +  free (str);
>>
>> why alloc str on the heap, copy it to gc and the copy it again?
>> That is, you can pass 'str' to build_string_literal directly I think.
>>
>> In fact as you are refactoring build_string_literal please
>> refactor build_string into a build_string_raw taking just 'len'
>> (thus leaving out the actual string initialization apart from
>> 'appending' '\0') and then avoid the heap allocation.
>>
>> Refactor build_string_literal to take a tree STRING_CST
>> and build_string_literal_addr to build it's address.
>>
>> Thanks,
>> Richard.
>>
>>
>>
>>
>>> Martin
>>>
>

[-- Attachment #2: fix-pr50199 --]
[-- Type: application/octet-stream, Size: 4801 bytes --]

Index: gcc/gimple.c
===================================================================
*** gcc/gimple.c.orig	2012-04-05 14:04:32.630714246 +0200
--- gcc/gimple.c	2012-04-05 14:05:06.477712225 +0200
*************** is_gimple_address (const_tree t)
*** 2742,2748 ****
        op = TREE_OPERAND (op, 0);
      }
  
!   if (CONSTANT_CLASS_P (op) || TREE_CODE (op) == MEM_REF)
      return true;
  
    switch (TREE_CODE (op))
--- 2742,2748 ----
        op = TREE_OPERAND (op, 0);
      }
  
!   if (TREE_CODE (op) == MEM_REF)
      return true;
  
    switch (TREE_CODE (op))
*************** is_gimple_invariant_address (const_tree
*** 2778,2788 ****
      {
        const_tree op0 = TREE_OPERAND (op, 0);
        return (TREE_CODE (op0) == ADDR_EXPR
! 	      && (CONSTANT_CLASS_P (TREE_OPERAND (op0, 0))
! 		  || decl_address_invariant_p (TREE_OPERAND (op0, 0))));
      }
  
!   return CONSTANT_CLASS_P (op) || decl_address_invariant_p (op);
  }
  
  /* Return true if T is a gimple invariant address at IPA level
--- 2778,2787 ----
      {
        const_tree op0 = TREE_OPERAND (op, 0);
        return (TREE_CODE (op0) == ADDR_EXPR
! 	      && decl_address_invariant_p (TREE_OPERAND (op0, 0)));
      }
  
!   return decl_address_invariant_p (op);
  }
  
  /* Return true if T is a gimple invariant address at IPA level
*************** is_gimple_ip_invariant_address (const_tr
*** 2804,2814 ****
      {
        const_tree op0 = TREE_OPERAND (op, 0);
        return (TREE_CODE (op0) == ADDR_EXPR
! 	      && (CONSTANT_CLASS_P (TREE_OPERAND (op0, 0))
! 		  || decl_address_ip_invariant_p (TREE_OPERAND (op0, 0))));
      }
  
!   return CONSTANT_CLASS_P (op) || decl_address_ip_invariant_p (op);
  }
  
  /* Return true if T is a GIMPLE minimal invariant.  It's a restricted
--- 2803,2812 ----
      {
        const_tree op0 = TREE_OPERAND (op, 0);
        return (TREE_CODE (op0) == ADDR_EXPR
! 	      && decl_address_ip_invariant_p (TREE_OPERAND (op0, 0)));
      }
  
!   return decl_address_ip_invariant_p (op);
  }
  
  /* Return true if T is a GIMPLE minimal invariant.  It's a restricted
*************** is_gimple_id (tree t)
*** 2906,2914 ****
    return (is_gimple_variable (t)
  	  || TREE_CODE (t) == FUNCTION_DECL
  	  || TREE_CODE (t) == LABEL_DECL
! 	  || TREE_CODE (t) == CONST_DECL
! 	  /* Allow string constants, since they are addressable.  */
! 	  || TREE_CODE (t) == STRING_CST);
  }
  
  /* Return true if T is a non-aggregate register variable.  */
--- 2904,2910 ----
    return (is_gimple_variable (t)
  	  || TREE_CODE (t) == FUNCTION_DECL
  	  || TREE_CODE (t) == LABEL_DECL
! 	  || TREE_CODE (t) == CONST_DECL);
  }
  
  /* Return true if T is a non-aggregate register variable.  */
*************** is_gimple_mem_ref_addr (tree t)
*** 3009,3016 ****
    return (is_gimple_reg (t)
  	  || TREE_CODE (t) == INTEGER_CST
  	  || (TREE_CODE (t) == ADDR_EXPR
! 	      && (CONSTANT_CLASS_P (TREE_OPERAND (t, 0))
! 		  || decl_address_invariant_p (TREE_OPERAND (t, 0)))));
  }
  
  
--- 3005,3011 ----
    return (is_gimple_reg (t)
  	  || TREE_CODE (t) == INTEGER_CST
  	  || (TREE_CODE (t) == ADDR_EXPR
! 	      && decl_address_invariant_p (TREE_OPERAND (t, 0))));
  }
  
  
Index: gcc/gimplify.c
===================================================================
*** gcc/gimplify.c.orig	2012-04-05 14:05:04.000000000 +0200
--- gcc/gimplify.c	2012-04-05 14:05:13.849711785 +0200
*************** gimplify_compound_lval (tree *expr_p, gi
*** 2130,2136 ****
       so as to match the min_lval predicate.  Failure to do so may result
       in the creation of large aggregate temporaries.  */
    tret = gimplify_expr (p, pre_p, post_p, is_gimple_min_lval,
! 			fallback | fb_lvalue);
    ret = MIN (ret, tret);
  
    /* And finally, the indices and operands to BIT_FIELD_REF.  During this
--- 2130,2137 ----
       so as to match the min_lval predicate.  Failure to do so may result
       in the creation of large aggregate temporaries.  */
    tret = gimplify_expr (p, pre_p, post_p, is_gimple_min_lval,
! 			VEC_length (tree, stack) != 0
! 			? fb_lvalue : fallback | fb_lvalue);
    ret = MIN (ret, tret);
  
    /* And finally, the indices and operands to BIT_FIELD_REF.  During this
*************** gimplify_expr (tree *expr_p, gimple_seq
*** 7086,7092 ****
  	case STRING_CST:
  	case COMPLEX_CST:
  	case VECTOR_CST:
! 	  ret = GS_ALL_DONE;
  	  break;
  
  	case CONST_DECL:
--- 7087,7102 ----
  	case STRING_CST:
  	case COMPLEX_CST:
  	case VECTOR_CST:
! 	  if (fallback & fb_rvalue)
! 	    ret = GS_ALL_DONE;
! 	  else
! 	    {
! 	      tree cdecl = build_decl (UNKNOWN_LOCATION, CONST_DECL,
! 				       NULL_TREE, TREE_TYPE (*expr_p));
! 	      DECL_INITIAL (cdecl) = *expr_p;
! 	      *expr_p = cdecl;
! 	      ret = GS_OK;
! 	    }
  	  break;
  
  	case CONST_DECL:

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] Convert character arrays to string csts
  2017-08-08 13:19           ` Richard Biener
@ 2017-08-09  9:15             ` Martin Liška
  2017-08-09  9:43               ` Richard Biener
  0 siblings, 1 reply; 15+ messages in thread
From: Martin Liška @ 2017-08-09  9:15 UTC (permalink / raw)
  To: Richard Biener; +Cc: Jan Hubicka, Bernd Schmidt, GCC Patches

On 08/08/2017 03:18 PM, Richard Biener wrote:
> On Tue, Aug 8, 2017 at 1:47 PM, Martin Liška <mliska@suse.cz> wrote:
>> On 11/09/2016 11:22 AM, Richard Biener wrote:
>>> On Fri, Nov 4, 2016 at 2:33 PM, Martin Liška <mliska@suse.cz> wrote:
>>>> On 11/03/2016 02:00 PM, Jan Hubicka wrote:
>>>>>> On 11/03/2016 01:12 PM, Martin Liška wrote:
>>>>>>> +  tree init = DECL_INITIAL (decl);
>>>>>>> +  if (init
>>>>>>> +      && TREE_READONLY (decl)
>>>>>>> +      && can_convert_ctor_to_string_cst (init))
>>>>>>> +    DECL_INITIAL (decl) = build_string_cst_from_ctor (init);
>>>>>>
>>>>>> I'd merge these two new functions since they're only ever called
>>>>>> together. We'd then have something like
>>>>>>
>>>>>> if (init && TREE_READONLY (decl))
>>>>>>   init = convert_ctor_to_string_cst (init);
>>>>>> if (init)
>>>>>>   DECL_INITIAL (decl) = init;
>>>>
>>>> Done.
>>>>
>>>>>>
>>>>>> I'll defer to Jan on whether finalize_decl seems like a good place
>>>>>> to do this.
>>>>>
>>>>> I think finalize_decl may be bit too early because frontends may expects the
>>>>> ctors to be in a way they produced them.  We only want to convert those arrays
>>>>> seen by middle-end so I would move the logic to varpool_node::analyze
>>>>>
>>>>> Otherwise the patch seems fine to me.
>>>>>
>>>>> Honza
>>>>>>
>>>>>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple.c b/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple.c
>>>>>>> index 283bd1c..b2d1fd5 100644
>>>>>>> --- a/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple.c
>>>>>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple.c
>>>>>>> @@ -4,12 +4,15 @@
>>>>>>> char *buffer1;
>>>>>>> char *buffer2;
>>>>>>>
>>>>>>> +const char global[] = {'a', 'b', 'c', 'd', '\0'};
>>>>>>> +
>>>>>>> #define SIZE 1000
>>>>>>>
>>>>>>> int
>>>>>>> main (void)
>>>>>>> {
>>>>>>>   const char* const foo1 = "hello world";
>>>>>>> +  const char local[] = "abcd";
>>>>>>>
>>>>>>>   buffer1 = __builtin_malloc (SIZE);
>>>>>>>   __builtin_strcpy (buffer1, foo1);
>>>>>>> @@ -45,6 +48,10 @@ main (void)
>>>>>>>     __builtin_abort ();
>>>>>>>   if (__builtin_memchr (foo1, null, 12) != foo1 + 11)
>>>>>>>     __builtin_abort ();
>>>>>>> +  if (__builtin_memchr (global, null, 5) == 0)
>>>>>>> +    __builtin_abort ();
>>>>>>> +  if (__builtin_memchr (local, null, 5) == 0)
>>>>>>> +    __builtin_abort ();
>>>>>>
>>>>>> How is that a meaningful test? This seems to work even with an
>>>>>> unpatched gcc. I'd have expected something that shows a benefit for
>>>>>> doing this conversion, and maybe also a test that shows it isn't
>>>>>> done in cases where it's not allowed.
>>>>
>>>> It's meaningful as it scans that there's no __builtin_memchr in optimized dump.
>>>> I'm adding new tests that does the opposite test.
>>>>
>>>>>>
>>>>>>> tree
>>>>>>> -build_string_literal (int len, const char *str)
>>>>>>> +build_string_literal (int len, const char *str, bool build_addr_expr)
>>>>>>
>>>>>> New arguments should be documented in the function comment.
>>>>
>>>> Yep, improved.
>>>>
>>>>>>
>>>>>>> +/* Return TRUE when CTOR can be converted to a string constant.  */
>>>>>>
>>>>>> "if", not "when".
>>>>
>>>> Done.
>>>>
>>>>>>
>>>>>>> +  unsigned HOST_WIDE_INT elements = CONSTRUCTOR_NELTS (ctor);
>>>>>>> +  FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (ctor), idx, key, value)
>>>>>>> +    {
>>>>>>> +      if (key == NULL_TREE
>>>>>>> +     || TREE_CODE (key) != INTEGER_CST
>>>>>>> +     || !tree_fits_uhwi_p (value)
>>>>>>> +     || !useless_type_conversion_p (TREE_TYPE (value), char_type_node))
>>>>>>> +   return false;
>>>>>>
>>>>>> Shouldn't all elements have the same type, or do you really have to
>>>>>> call useless_type_conversion in a loop?
>>>>>>
>>>>>>> +      /* Allow zero character just at the end of a string.  */
>>>>>>> +      if (integer_zerop (value))
>>>>>>> +   return idx == elements - 1;
>>>>>>
>>>>>> Don't you also have to explicitly check it's there?
>>>>>>
>>>>>>
>>>>>> Bernd
>>>>
>>>>
>>>> Patch can bootstrap on ppc64le-redhat-linux and survives regression tests.
>>>
>>> I'm curious about the
>>>
>>> @@ -1495,7 +1495,8 @@ gimplify_decl_expr (tree *stmt_p, gimple_seq *seq_p)
>>>         {
>>>           if (!TREE_STATIC (decl))
>>>             {
>>> -             DECL_INITIAL (decl) = NULL_TREE;
>>> +             if (!TREE_READONLY (decl) || TREE_CODE (init) != STRING_CST)
>>> +               DECL_INITIAL (decl) = NULL_TREE;
>>>               init = build2 (INIT_EXPR, void_type_node, decl, init);
>>>               gimplify_and_add (init, seq_p);
>>>               ggc_free (init);
>>>
>>> change.  Why keep DECL_INITIAL if you build a INIT_EXPR anyway?
>>>
>>> @@ -4438,6 +4439,19 @@ gimplify_init_constructor (tree *expr_p,
>>> gimple_seq *pre_p, gimple_seq *post_p,
>>>             break;
>>>           }
>>>
>>> +       /* Replace a ctor with a string constant with possible.  */
>>> +       if (TREE_READONLY (object)
>>> +           && VAR_P (object))
>>> +         {
>>> +           tree string_ctor = convert_ctor_to_string_cst (ctor);
>>> +           if (string_ctor)
>>> +             {
>>> +               TREE_OPERAND (*expr_p, 1) = string_ctor;
>>> +               DECL_INITIAL (object) = string_ctor;
>>> +               break;
>>> +             }
>>> +         }
>>> +
>>>         /* Fetch information about the constructor to direct later processing.
>>>            We might want to make static versions of it in various cases, and
>>>            can only do so if it known to be a valid constant initializer.  */
>>>
>>> hmm, so both these hunks will end up keeping a DECL_INITIAL
>>> for non-static local consts?  Usually we end up with
>>>
>>> main ()
>>> {
>>>   const char local[5];
>>>
>>>   <bb 2>:
>>>   local = "abcd";
>>>
>>> here.  So you keep DECL_INITIAL for folding?
>>>
>>> I believe we should create CONST_DECLs for the above (and make
>>> CONST_DECLs first-class
>>> symtab citizens), thus avoid runtime stack initialization for the
>>> above and instead emit
>>> "abcd" to the constant pool?
>>
>> Hi Richi.
>>
>> I've just returned to this in order to resolve long pending unfinished stuff.
>> I have couple of questions:
>>
>> a) what should create a CONST_DECL, a FE or gimplifier when it identifies that
>> it can be converted to CONST_DECL?
> 
> The gimplifier (prototype patches of mine did it that way when trying
> to get rid of
> &STRING_CST).
> 
>> b) Why do we need to put such local variables to symtab?
> 
> We don't need to, but we eventually should?
> 
>> c) Do we have a target that uses CONST_DECL for such (or similar) purpose?
> 
> Not sure.  gimplify_init_constructor ends up with .LC0* on for example aarch64
> for initializers of arrays for example.
> 
> Attached old &STRING_CST -> &CONST_DECL gimplification patch (no
> longer applies).

Can you please Richi send me the patch in unified format so that I can apply the
rejected hunks. Looks one just needs to put it into gimple-expr.c.

Thanks,
Martin

> 
> Richard.
> 
>> Thanks,
>> Martin
>>
>>>
>>> +  /* Skip constructors with a hole.  */
>>> +  if (CONSTRUCTOR_NELTS (ctor) != ctor_length)
>>> +    return false;
>>>
>>> not sure if that's maybe too clever ;)
>>>
>>> +  FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (ctor), idx, key, value)
>>> +    {
>>> +      if (key == NULL_TREE
>>> +         || !tree_fits_uhwi_p (key)
>>> +         || !tree_fits_uhwi_p (value))
>>> +       return false;
>>>
>>> I think key == NULL is very well valid (you are not using it...).  I'd
>>> instead do
>>>
>>>      if (key && compare_tree_int (key, idx) != 0)
>>>        return false;
>>>
>>> for the hole check.  value should always fit uhwi given the earlier
>>> element type check.
>>>
>>> +tree
>>> +convert_ctor_to_string_cst (tree ctor)
>>> +{
>>> +  if (!can_convert_ctor_to_string_cst (ctor))
>>> +    return NULL_TREE;
>>> +
>>> +  unsigned HOST_WIDE_INT idx;
>>> +  tree value;
>>> +  vec<constructor_elt, va_gc> *elts = CONSTRUCTOR_ELTS (ctor);
>>> +  char *str = XNEWVEC (char, elts->length ());
>>> +
>>> +  FOR_EACH_CONSTRUCTOR_VALUE (elts, idx, value)
>>> +    str[idx] = (char)tree_to_uhwi (value);
>>> +
>>> +  tree init = build_string_literal (elts->length (),
>>> +                                   ggc_alloc_string (str, elts->length ()),
>>> +                                   false);
>>> +  free (str);
>>>
>>> why alloc str on the heap, copy it to gc and the copy it again?
>>> That is, you can pass 'str' to build_string_literal directly I think.
>>>
>>> In fact as you are refactoring build_string_literal please
>>> refactor build_string into a build_string_raw taking just 'len'
>>> (thus leaving out the actual string initialization apart from
>>> 'appending' '\0') and then avoid the heap allocation.
>>>
>>> Refactor build_string_literal to take a tree STRING_CST
>>> and build_string_literal_addr to build it's address.
>>>
>>> Thanks,
>>> Richard.
>>>
>>>
>>>
>>>
>>>> Martin
>>>>
>>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] Convert character arrays to string csts
  2017-08-09  9:15             ` Martin Liška
@ 2017-08-09  9:43               ` Richard Biener
  2017-08-09 11:39                 ` Martin Liška
  0 siblings, 1 reply; 15+ messages in thread
From: Richard Biener @ 2017-08-09  9:43 UTC (permalink / raw)
  To: Martin Liška; +Cc: Jan Hubicka, Bernd Schmidt, GCC Patches

On August 9, 2017 11:15:44 AM GMT+02:00, "Martin Liška" <mliska@suse.cz> wrote:
>On 08/08/2017 03:18 PM, Richard Biener wrote:
>> On Tue, Aug 8, 2017 at 1:47 PM, Martin Liška <mliska@suse.cz> wrote:
>>> On 11/09/2016 11:22 AM, Richard Biener wrote:
>>>> On Fri, Nov 4, 2016 at 2:33 PM, Martin Liška <mliska@suse.cz>
>wrote:
>>>>> On 11/03/2016 02:00 PM, Jan Hubicka wrote:
>>>>>>> On 11/03/2016 01:12 PM, Martin Liška wrote:
>>>>>>>> +  tree init = DECL_INITIAL (decl);
>>>>>>>> +  if (init
>>>>>>>> +      && TREE_READONLY (decl)
>>>>>>>> +      && can_convert_ctor_to_string_cst (init))
>>>>>>>> +    DECL_INITIAL (decl) = build_string_cst_from_ctor (init);
>>>>>>>
>>>>>>> I'd merge these two new functions since they're only ever called
>>>>>>> together. We'd then have something like
>>>>>>>
>>>>>>> if (init && TREE_READONLY (decl))
>>>>>>>   init = convert_ctor_to_string_cst (init);
>>>>>>> if (init)
>>>>>>>   DECL_INITIAL (decl) = init;
>>>>>
>>>>> Done.
>>>>>
>>>>>>>
>>>>>>> I'll defer to Jan on whether finalize_decl seems like a good
>place
>>>>>>> to do this.
>>>>>>
>>>>>> I think finalize_decl may be bit too early because frontends may
>expects the
>>>>>> ctors to be in a way they produced them.  We only want to convert
>those arrays
>>>>>> seen by middle-end so I would move the logic to
>varpool_node::analyze
>>>>>>
>>>>>> Otherwise the patch seems fine to me.
>>>>>>
>>>>>> Honza
>>>>>>>
>>>>>>>> diff --git
>a/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple.c
>b/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple.c
>>>>>>>> index 283bd1c..b2d1fd5 100644
>>>>>>>> --- a/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple.c
>>>>>>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple.c
>>>>>>>> @@ -4,12 +4,15 @@
>>>>>>>> char *buffer1;
>>>>>>>> char *buffer2;
>>>>>>>>
>>>>>>>> +const char global[] = {'a', 'b', 'c', 'd', '\0'};
>>>>>>>> +
>>>>>>>> #define SIZE 1000
>>>>>>>>
>>>>>>>> int
>>>>>>>> main (void)
>>>>>>>> {
>>>>>>>>   const char* const foo1 = "hello world";
>>>>>>>> +  const char local[] = "abcd";
>>>>>>>>
>>>>>>>>   buffer1 = __builtin_malloc (SIZE);
>>>>>>>>   __builtin_strcpy (buffer1, foo1);
>>>>>>>> @@ -45,6 +48,10 @@ main (void)
>>>>>>>>     __builtin_abort ();
>>>>>>>>   if (__builtin_memchr (foo1, null, 12) != foo1 + 11)
>>>>>>>>     __builtin_abort ();
>>>>>>>> +  if (__builtin_memchr (global, null, 5) == 0)
>>>>>>>> +    __builtin_abort ();
>>>>>>>> +  if (__builtin_memchr (local, null, 5) == 0)
>>>>>>>> +    __builtin_abort ();
>>>>>>>
>>>>>>> How is that a meaningful test? This seems to work even with an
>>>>>>> unpatched gcc. I'd have expected something that shows a benefit
>for
>>>>>>> doing this conversion, and maybe also a test that shows it isn't
>>>>>>> done in cases where it's not allowed.
>>>>>
>>>>> It's meaningful as it scans that there's no __builtin_memchr in
>optimized dump.
>>>>> I'm adding new tests that does the opposite test.
>>>>>
>>>>>>>
>>>>>>>> tree
>>>>>>>> -build_string_literal (int len, const char *str)
>>>>>>>> +build_string_literal (int len, const char *str, bool
>build_addr_expr)
>>>>>>>
>>>>>>> New arguments should be documented in the function comment.
>>>>>
>>>>> Yep, improved.
>>>>>
>>>>>>>
>>>>>>>> +/* Return TRUE when CTOR can be converted to a string
>constant.  */
>>>>>>>
>>>>>>> "if", not "when".
>>>>>
>>>>> Done.
>>>>>
>>>>>>>
>>>>>>>> +  unsigned HOST_WIDE_INT elements = CONSTRUCTOR_NELTS (ctor);
>>>>>>>> +  FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (ctor), idx, key,
>value)
>>>>>>>> +    {
>>>>>>>> +      if (key == NULL_TREE
>>>>>>>> +     || TREE_CODE (key) != INTEGER_CST
>>>>>>>> +     || !tree_fits_uhwi_p (value)
>>>>>>>> +     || !useless_type_conversion_p (TREE_TYPE (value),
>char_type_node))
>>>>>>>> +   return false;
>>>>>>>
>>>>>>> Shouldn't all elements have the same type, or do you really have
>to
>>>>>>> call useless_type_conversion in a loop?
>>>>>>>
>>>>>>>> +      /* Allow zero character just at the end of a string.  */
>>>>>>>> +      if (integer_zerop (value))
>>>>>>>> +   return idx == elements - 1;
>>>>>>>
>>>>>>> Don't you also have to explicitly check it's there?
>>>>>>>
>>>>>>>
>>>>>>> Bernd
>>>>>
>>>>>
>>>>> Patch can bootstrap on ppc64le-redhat-linux and survives
>regression tests.
>>>>
>>>> I'm curious about the
>>>>
>>>> @@ -1495,7 +1495,8 @@ gimplify_decl_expr (tree *stmt_p, gimple_seq
>*seq_p)
>>>>         {
>>>>           if (!TREE_STATIC (decl))
>>>>             {
>>>> -             DECL_INITIAL (decl) = NULL_TREE;
>>>> +             if (!TREE_READONLY (decl) || TREE_CODE (init) !=
>STRING_CST)
>>>> +               DECL_INITIAL (decl) = NULL_TREE;
>>>>               init = build2 (INIT_EXPR, void_type_node, decl,
>init);
>>>>               gimplify_and_add (init, seq_p);
>>>>               ggc_free (init);
>>>>
>>>> change.  Why keep DECL_INITIAL if you build a INIT_EXPR anyway?
>>>>
>>>> @@ -4438,6 +4439,19 @@ gimplify_init_constructor (tree *expr_p,
>>>> gimple_seq *pre_p, gimple_seq *post_p,
>>>>             break;
>>>>           }
>>>>
>>>> +       /* Replace a ctor with a string constant with possible.  */
>>>> +       if (TREE_READONLY (object)
>>>> +           && VAR_P (object))
>>>> +         {
>>>> +           tree string_ctor = convert_ctor_to_string_cst (ctor);
>>>> +           if (string_ctor)
>>>> +             {
>>>> +               TREE_OPERAND (*expr_p, 1) = string_ctor;
>>>> +               DECL_INITIAL (object) = string_ctor;
>>>> +               break;
>>>> +             }
>>>> +         }
>>>> +
>>>>         /* Fetch information about the constructor to direct later
>processing.
>>>>            We might want to make static versions of it in various
>cases, and
>>>>            can only do so if it known to be a valid constant
>initializer.  */
>>>>
>>>> hmm, so both these hunks will end up keeping a DECL_INITIAL
>>>> for non-static local consts?  Usually we end up with
>>>>
>>>> main ()
>>>> {
>>>>   const char local[5];
>>>>
>>>>   <bb 2>:
>>>>   local = "abcd";
>>>>
>>>> here.  So you keep DECL_INITIAL for folding?
>>>>
>>>> I believe we should create CONST_DECLs for the above (and make
>>>> CONST_DECLs first-class
>>>> symtab citizens), thus avoid runtime stack initialization for the
>>>> above and instead emit
>>>> "abcd" to the constant pool?
>>>
>>> Hi Richi.
>>>
>>> I've just returned to this in order to resolve long pending
>unfinished stuff.
>>> I have couple of questions:
>>>
>>> a) what should create a CONST_DECL, a FE or gimplifier when it
>identifies that
>>> it can be converted to CONST_DECL?
>> 
>> The gimplifier (prototype patches of mine did it that way when trying
>> to get rid of
>> &STRING_CST).
>> 
>>> b) Why do we need to put such local variables to symtab?
>> 
>> We don't need to, but we eventually should?
>> 
>>> c) Do we have a target that uses CONST_DECL for such (or similar)
>purpose?
>> 
>> Not sure.  gimplify_init_constructor ends up with .LC0* on for
>example aarch64
>> for initializers of arrays for example.
>> 
>> Attached old &STRING_CST -> &CONST_DECL gimplification patch (no
>> longer applies).
>
>Can you please Richi send me the patch in unified format so that I can
>apply the
>rejected hunks. Looks one just needs to put it into gimple-expr.c.

I only have the patch I sent you so I can't re-diff.

Richard.

>Thanks,
>Martin
>
>> 
>> Richard.
>> 
>>> Thanks,
>>> Martin
>>>
>>>>
>>>> +  /* Skip constructors with a hole.  */
>>>> +  if (CONSTRUCTOR_NELTS (ctor) != ctor_length)
>>>> +    return false;
>>>>
>>>> not sure if that's maybe too clever ;)
>>>>
>>>> +  FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (ctor), idx, key,
>value)
>>>> +    {
>>>> +      if (key == NULL_TREE
>>>> +         || !tree_fits_uhwi_p (key)
>>>> +         || !tree_fits_uhwi_p (value))
>>>> +       return false;
>>>>
>>>> I think key == NULL is very well valid (you are not using it...). 
>I'd
>>>> instead do
>>>>
>>>>      if (key && compare_tree_int (key, idx) != 0)
>>>>        return false;
>>>>
>>>> for the hole check.  value should always fit uhwi given the earlier
>>>> element type check.
>>>>
>>>> +tree
>>>> +convert_ctor_to_string_cst (tree ctor)
>>>> +{
>>>> +  if (!can_convert_ctor_to_string_cst (ctor))
>>>> +    return NULL_TREE;
>>>> +
>>>> +  unsigned HOST_WIDE_INT idx;
>>>> +  tree value;
>>>> +  vec<constructor_elt, va_gc> *elts = CONSTRUCTOR_ELTS (ctor);
>>>> +  char *str = XNEWVEC (char, elts->length ());
>>>> +
>>>> +  FOR_EACH_CONSTRUCTOR_VALUE (elts, idx, value)
>>>> +    str[idx] = (char)tree_to_uhwi (value);
>>>> +
>>>> +  tree init = build_string_literal (elts->length (),
>>>> +                                   ggc_alloc_string (str,
>elts->length ()),
>>>> +                                   false);
>>>> +  free (str);
>>>>
>>>> why alloc str on the heap, copy it to gc and the copy it again?
>>>> That is, you can pass 'str' to build_string_literal directly I
>think.
>>>>
>>>> In fact as you are refactoring build_string_literal please
>>>> refactor build_string into a build_string_raw taking just 'len'
>>>> (thus leaving out the actual string initialization apart from
>>>> 'appending' '\0') and then avoid the heap allocation.
>>>>
>>>> Refactor build_string_literal to take a tree STRING_CST
>>>> and build_string_literal_addr to build it's address.
>>>>
>>>> Thanks,
>>>> Richard.
>>>>
>>>>
>>>>
>>>>
>>>>> Martin
>>>>>
>>>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] Convert character arrays to string csts
  2017-08-09  9:43               ` Richard Biener
@ 2017-08-09 11:39                 ` Martin Liška
  2017-08-14 11:58                   ` Richard Biener
  0 siblings, 1 reply; 15+ messages in thread
From: Martin Liška @ 2017-08-09 11:39 UTC (permalink / raw)
  To: Richard Biener; +Cc: Jan Hubicka, Bernd Schmidt, GCC Patches

[-- Attachment #1: Type: text/plain, Size: 354 bytes --]

On 08/09/2017 11:43 AM, Richard Biener wrote:
> I only have the patch I sent you so I can't re-diff.
> 
> Richard.

Hi.

I'm sending rebased version of the patch. However the patch
eats all my memory when e.g. building ../../../libgcc/libgcov-merge.c.
If you have time, please try to make it working for &STRING_CST. I can continue
then.

Thanks,
Martin

[-- Attachment #2: fix-pr50199-rebased.patch --]
[-- Type: text/x-patch, Size: 3751 bytes --]

diff --git a/gcc/gimple-expr.c b/gcc/gimple-expr.c
index c1771fcf1d0..eb22456c241 100644
--- a/gcc/gimple-expr.c
+++ b/gcc/gimple-expr.c
@@ -631,7 +631,7 @@ is_gimple_address (const_tree t)
       op = TREE_OPERAND (op, 0);
     }
 
-  if (CONSTANT_CLASS_P (op) || TREE_CODE (op) == MEM_REF)
+  if (TREE_CODE (op) == MEM_REF)
     return true;
 
   switch (TREE_CODE (op))
@@ -667,11 +667,10 @@ is_gimple_invariant_address (const_tree t)
     {
       const_tree op0 = TREE_OPERAND (op, 0);
       return (TREE_CODE (op0) == ADDR_EXPR
-	      && (CONSTANT_CLASS_P (TREE_OPERAND (op0, 0))
-		  || decl_address_invariant_p (TREE_OPERAND (op0, 0))));
+	      && decl_address_invariant_p (TREE_OPERAND (op0, 0)));
     }
 
-  return CONSTANT_CLASS_P (op) || decl_address_invariant_p (op);
+  return decl_address_invariant_p (op);
 }
 
 /* Return true if T is a gimple invariant address at IPA level
@@ -693,11 +692,10 @@ is_gimple_ip_invariant_address (const_tree t)
     {
       const_tree op0 = TREE_OPERAND (op, 0);
       return (TREE_CODE (op0) == ADDR_EXPR
-	      && (CONSTANT_CLASS_P (TREE_OPERAND (op0, 0))
-		  || decl_address_ip_invariant_p (TREE_OPERAND (op0, 0))));
+	      && decl_address_ip_invariant_p (TREE_OPERAND (op0, 0)));
     }
 
-  return CONSTANT_CLASS_P (op) || decl_address_ip_invariant_p (op);
+  return decl_address_ip_invariant_p (op);
 }
 
 /* Return true if T is a GIMPLE minimal invariant.  It's a restricted
@@ -732,7 +730,7 @@ is_gimple_reg (tree t)
   if (virtual_operand_p (t))
     return false;
 
-  if (TREE_CODE (t) == SSA_NAME)
+  if (TREE_CODE (t) == SSA_NAME || TREE_CODE (t) == CONST_DECL)
     return true;
 
   if (!is_gimple_variable (t))
@@ -825,8 +823,7 @@ is_gimple_mem_ref_addr (tree t)
   return (is_gimple_reg (t)
 	  || TREE_CODE (t) == INTEGER_CST
 	  || (TREE_CODE (t) == ADDR_EXPR
-	      && (CONSTANT_CLASS_P (TREE_OPERAND (t, 0))
-		  || decl_address_invariant_p (TREE_OPERAND (t, 0)))));
+	      && decl_address_invariant_p (TREE_OPERAND (t, 0))));
 }
 
 /* Hold trees marked addressable during expand.  */
diff --git a/gcc/gimple-expr.h b/gcc/gimple-expr.h
index 6e969164a37..04de209c092 100644
--- a/gcc/gimple-expr.h
+++ b/gcc/gimple-expr.h
@@ -94,9 +94,7 @@ is_gimple_id (tree t)
   return (is_gimple_variable (t)
 	  || TREE_CODE (t) == FUNCTION_DECL
 	  || TREE_CODE (t) == LABEL_DECL
-	  || TREE_CODE (t) == CONST_DECL
-	  /* Allow string constants, since they are addressable.  */
-	  || TREE_CODE (t) == STRING_CST);
+	  || TREE_CODE (t) == CONST_DECL);
 }
 
 /* Return true if OP, an SSA name or a DECL is a virtual operand.  */
diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index 86623e09f5d..d56d1dfe8ab 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -2871,7 +2871,8 @@ gimplify_compound_lval (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p,
      so as to match the min_lval predicate.  Failure to do so may result
      in the creation of large aggregate temporaries.  */
   tret = gimplify_expr (p, pre_p, post_p, is_gimple_min_lval,
-			fallback | fb_lvalue);
+			expr_stack.length () > 0
+			? fb_lvalue : fallback | fb_lvalue);
   ret = MIN (ret, tret);
 
   /* And finally, the indices and operands of ARRAY_REF.  During this
@@ -11503,7 +11504,17 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p,
 	     that in the GIMPLE IL.  */
 	  if (TREE_OVERFLOW_P (*expr_p))
 	    *expr_p = drop_tree_overflow (*expr_p);
-	  ret = GS_ALL_DONE;
+
+	  if (fallback & fb_rvalue)
+	    ret = GS_ALL_DONE;
+	  else
+	    {
+	      tree cdecl = build_decl (UNKNOWN_LOCATION, CONST_DECL,
+				       NULL_TREE, TREE_TYPE (*expr_p));
+	      DECL_INITIAL (cdecl) = *expr_p;
+	      *expr_p = cdecl;
+	      ret = GS_OK;
+	    }
 	  break;
 
 	case CONST_DECL:

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] Convert character arrays to string csts
  2017-08-09 11:39                 ` Martin Liška
@ 2017-08-14 11:58                   ` Richard Biener
  2017-08-14 12:31                     ` Richard Biener
  0 siblings, 1 reply; 15+ messages in thread
From: Richard Biener @ 2017-08-14 11:58 UTC (permalink / raw)
  To: Martin Liška; +Cc: Jan Hubicka, Bernd Schmidt, GCC Patches

On Wed, Aug 9, 2017 at 1:39 PM, Martin Liška <mliska@suse.cz> wrote:
> On 08/09/2017 11:43 AM, Richard Biener wrote:
>> I only have the patch I sent you so I can't re-diff.
>>
>> Richard.
>
> Hi.
>
> I'm sending rebased version of the patch. However the patch
> eats all my memory when e.g. building ../../../libgcc/libgcov-merge.c.
> If you have time, please try to make it working for &STRING_CST. I can continue
> then.

Note I didn't send the patch to make you use it -- it changes too much and I
never fixed all the fall out.  It was meant as an exercise on how to
use a CONST_DECL.

Richard.

> Thanks,
> Martin

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] Convert character arrays to string csts
  2017-08-14 11:58                   ` Richard Biener
@ 2017-08-14 12:31                     ` Richard Biener
  2017-08-14 13:07                       ` Richard Biener
  0 siblings, 1 reply; 15+ messages in thread
From: Richard Biener @ 2017-08-14 12:31 UTC (permalink / raw)
  To: Martin Liška; +Cc: Jan Hubicka, Bernd Schmidt, GCC Patches

On Mon, Aug 14, 2017 at 1:23 PM, Richard Biener
<richard.guenther@gmail.com> wrote:
> On Wed, Aug 9, 2017 at 1:39 PM, Martin Liška <mliska@suse.cz> wrote:
>> On 08/09/2017 11:43 AM, Richard Biener wrote:
>>> I only have the patch I sent you so I can't re-diff.
>>>
>>> Richard.
>>
>> Hi.
>>
>> I'm sending rebased version of the patch. However the patch
>> eats all my memory when e.g. building ../../../libgcc/libgcov-merge.c.
>> If you have time, please try to make it working for &STRING_CST. I can continue
>> then.
>
> Note I didn't send the patch to make you use it -- it changes too much and I
> never fixed all the fall out.  It was meant as an exercise on how to
> use a CONST_DECL.

Btw. the

@@ -732,7 +730,7 @@ is_gimple_reg (tree t)
   if (virtual_operand_p (t))
     return false;

-  if (TREE_CODE (t) == SSA_NAME)
+  if (TREE_CODE (t) == SSA_NAME || TREE_CODE (t) == CONST_DECL)
     return true;

   if (!is_gimple_variable (t))

hunk looks wrong.

Richard.

> Richard.
>
>> Thanks,
>> Martin

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] Convert character arrays to string csts
  2017-08-14 12:31                     ` Richard Biener
@ 2017-08-14 13:07                       ` Richard Biener
  0 siblings, 0 replies; 15+ messages in thread
From: Richard Biener @ 2017-08-14 13:07 UTC (permalink / raw)
  To: Martin Liška; +Cc: Jan Hubicka, GCC Patches

[-- Attachment #1: Type: text/plain, Size: 1381 bytes --]

On Mon, Aug 14, 2017 at 1:25 PM, Richard Biener
<richard.guenther@gmail.com> wrote:
> On Mon, Aug 14, 2017 at 1:23 PM, Richard Biener
> <richard.guenther@gmail.com> wrote:
>> On Wed, Aug 9, 2017 at 1:39 PM, Martin Liška <mliska@suse.cz> wrote:
>>> On 08/09/2017 11:43 AM, Richard Biener wrote:
>>>> I only have the patch I sent you so I can't re-diff.
>>>>
>>>> Richard.
>>>
>>> Hi.
>>>
>>> I'm sending rebased version of the patch. However the patch
>>> eats all my memory when e.g. building ../../../libgcc/libgcov-merge.c.
>>> If you have time, please try to make it working for &STRING_CST. I can continue
>>> then.
>>
>> Note I didn't send the patch to make you use it -- it changes too much and I
>> never fixed all the fall out.  It was meant as an exercise on how to
>> use a CONST_DECL.
>
> Btw. the
>
> @@ -732,7 +730,7 @@ is_gimple_reg (tree t)
>    if (virtual_operand_p (t))
>      return false;
>
> -  if (TREE_CODE (t) == SSA_NAME)
> +  if (TREE_CODE (t) == SSA_NAME || TREE_CODE (t) == CONST_DECL)
>      return true;
>
>    if (!is_gimple_variable (t))
>
> hunk looks wrong.

Oh, and the gimplifier.c hunk was misapplied - the gimplification to CONST_DECL
is supposed to only happen for STRING_CSTs, not arbitrary constants.

Updated patch attached.

Richard.

>
> Richard.
>
>> Richard.
>>
>>> Thanks,
>>> Martin

[-- Attachment #2: fix-pr50199 --]
[-- Type: application/octet-stream, Size: 3605 bytes --]

diff --git a/gcc/gimple-expr.c b/gcc/gimple-expr.c
index c1771fcf1d0..eb22456c241 100644
--- a/gcc/gimple-expr.c
+++ b/gcc/gimple-expr.c
@@ -631,7 +631,7 @@ is_gimple_address (const_tree t)
       op = TREE_OPERAND (op, 0);
     }
 
-  if (CONSTANT_CLASS_P (op) || TREE_CODE (op) == MEM_REF)
+  if (TREE_CODE (op) == MEM_REF)
     return true;
 
   switch (TREE_CODE (op))
@@ -667,11 +667,10 @@ is_gimple_invariant_address (const_tree t)
     {
       const_tree op0 = TREE_OPERAND (op, 0);
       return (TREE_CODE (op0) == ADDR_EXPR
-	      && (CONSTANT_CLASS_P (TREE_OPERAND (op0, 0))
-		  || decl_address_invariant_p (TREE_OPERAND (op0, 0))));
+	      && decl_address_invariant_p (TREE_OPERAND (op0, 0)));
     }
 
-  return CONSTANT_CLASS_P (op) || decl_address_invariant_p (op);
+  return decl_address_invariant_p (op);
 }
 
 /* Return true if T is a gimple invariant address at IPA level
@@ -693,11 +692,10 @@ is_gimple_ip_invariant_address (const_tree t)
     {
       const_tree op0 = TREE_OPERAND (op, 0);
       return (TREE_CODE (op0) == ADDR_EXPR
-	      && (CONSTANT_CLASS_P (TREE_OPERAND (op0, 0))
-		  || decl_address_ip_invariant_p (TREE_OPERAND (op0, 0))));
+	      && decl_address_ip_invariant_p (TREE_OPERAND (op0, 0)));
     }
 
-  return CONSTANT_CLASS_P (op) || decl_address_ip_invariant_p (op);
+  return decl_address_ip_invariant_p (op);
 }
 
 /* Return true if T is a GIMPLE minimal invariant.  It's a restricted
@@ -825,8 +823,7 @@ is_gimple_mem_ref_addr (tree t)
   return (is_gimple_reg (t)
 	  || TREE_CODE (t) == INTEGER_CST
 	  || (TREE_CODE (t) == ADDR_EXPR
-	      && (CONSTANT_CLASS_P (TREE_OPERAND (t, 0))
-		  || decl_address_invariant_p (TREE_OPERAND (t, 0)))));
+	      && decl_address_invariant_p (TREE_OPERAND (t, 0))));
 }
 
 /* Hold trees marked addressable during expand.  */
diff --git a/gcc/gimple-expr.h b/gcc/gimple-expr.h
index 6e969164a37..04de209c092 100644
--- a/gcc/gimple-expr.h
+++ b/gcc/gimple-expr.h
@@ -94,9 +94,7 @@ is_gimple_id (tree t)
   return (is_gimple_variable (t)
 	  || TREE_CODE (t) == FUNCTION_DECL
 	  || TREE_CODE (t) == LABEL_DECL
-	  || TREE_CODE (t) == CONST_DECL
-	  /* Allow string constants, since they are addressable.  */
-	  || TREE_CODE (t) == STRING_CST);
+	  || TREE_CODE (t) == CONST_DECL);
 }
 
 /* Return true if OP, an SSA name or a DECL is a virtual operand.  */
diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index 86623e09f5d..d56d1dfe8ab 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -2871,7 +2871,8 @@ gimplify_compound_lval (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p,
      so as to match the min_lval predicate.  Failure to do so may result
      in the creation of large aggregate temporaries.  */
   tret = gimplify_expr (p, pre_p, post_p, is_gimple_min_lval,
-			fallback | fb_lvalue);
+			expr_stack.length () > 0
+			? fb_lvalue : fallback | fb_lvalue);
   ret = MIN (ret, tret);
 
   /* And finally, the indices and operands of ARRAY_REF.  During this
@@ -11503,7 +11504,17 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p,
 	     that in the GIMPLE IL.  */
 	  if (TREE_OVERFLOW_P (*expr_p))
 	    *expr_p = drop_tree_overflow (*expr_p);
-	  ret = GS_ALL_DONE;
+
+	  if (fallback & fb_rvalue)
+	    ret = GS_ALL_DONE;
+	  else
+	    {
+	      tree cdecl = build_decl (UNKNOWN_LOCATION, CONST_DECL,
+				       NULL_TREE, TREE_TYPE (*expr_p));
+	      DECL_INITIAL (cdecl) = *expr_p;
+	      *expr_p = cdecl;
+	      ret = GS_OK;
+	    }
 	  break;
 
 	case CONST_DECL:

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2017-08-14 11:58 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-11-03 12:12 [PATCH] Convert character arrays to string csts Martin Liška
2016-11-03 12:47 ` Richard Biener
2016-11-04 13:32   ` Martin Liška
2016-11-03 12:49 ` Bernd Schmidt
2016-11-03 13:01   ` Jan Hubicka
2016-11-04 13:34     ` Martin Liška
2016-11-09 10:23       ` Richard Biener
2017-08-08 11:47         ` Martin Liška
2017-08-08 13:19           ` Richard Biener
2017-08-09  9:15             ` Martin Liška
2017-08-09  9:43               ` Richard Biener
2017-08-09 11:39                 ` Martin Liška
2017-08-14 11:58                   ` Richard Biener
2017-08-14 12:31                     ` Richard Biener
2017-08-14 13:07                       ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).