public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Optimize var = STRING_CST
@ 2007-08-23 23:48 Jakub Jelinek
  2007-08-24  9:59 ` Richard Guenther
  0 siblings, 1 reply; 3+ messages in thread
From: Jakub Jelinek @ 2007-08-23 23:48 UTC (permalink / raw)
  To: gcc-patches

Hi!

The following patch optimizes initialization of an array from a STRING_CST.
Without this patch store_expr will always force the STRING_CST into memory
and then do a block move from there, followed by optional clear_storage
if the STRING_CST is shorter than the array.  This patch uses
store_by_pieces if possible.
With the patch:
struct A { char c[10]; };

void
foo (void)
{
  struct A a = { "abcdefghi" };
  baz (&a);
}

void
bar (void)
{
  struct A a;
  __builtin_strcpy (&a.c[0], "abcdefghi");
  baz (&a);
}
both routines are the same on x86_64 except for
slightly different register allocation, without that patch
for foo a .LC0 constant with the string literal is emitted and foo
copies over from that string into a.c array.

Tested on x86_64-linux, ok for trunk?

BTW, I wonder if we shouldn't use alias set 0 for all the initialization
stores.  If yes, this is something that has been wrong already before,
both emit_block_move and clear_storage store_expr uses are done with
the alias set computed for target by the caller.

2007-08-23  Jakub Jelinek  <jakub@redhat.com>

	* expr.c (store_expr): Optimize initialization of an array
	with STRING_CST.
	* expr.h (builtin_strncpy_read_str): New prototype.
	* builtins.c (builtin_strncpy_read_str): Remove prototype.
	No longer static.

--- gcc/expr.c.jj	2007-08-15 15:36:32.000000000 +0200
+++ gcc/expr.c	2007-08-23 21:51:57.000000000 +0200
@@ -4472,10 +4472,52 @@ store_expr (tree exp, rtx target, int ca
 
       return NULL_RTX;
     }
+  else if (TREE_CODE (exp) == STRING_CST
+	   && !nontemporal && !call_param_p
+	   && TREE_STRING_LENGTH (exp) > 0
+	   && TYPE_MODE (TREE_TYPE (exp)) == BLKmode)
+    {
+      /* Optimize initialization of an array with a STRING_CST.  */
+      HOST_WIDE_INT exp_len, str_copy_len;
+      rtx dest_mem;
+
+      exp_len = int_expr_size (exp);
+      if (exp_len <= 0)
+	goto normal_expr;
+
+      str_copy_len = strlen (TREE_STRING_POINTER (exp));
+      if (str_copy_len < TREE_STRING_LENGTH (exp) - 1)
+	goto normal_expr;
+
+      str_copy_len = TREE_STRING_LENGTH (exp);
+      if ((STORE_MAX_PIECES & (STORE_MAX_PIECES - 1)) == 0)
+	{
+	  str_copy_len += STORE_MAX_PIECES - 1;
+	  str_copy_len &= ~(STORE_MAX_PIECES - 1);
+	}
+      str_copy_len = MIN (str_copy_len, exp_len);
+      if (!can_store_by_pieces (str_copy_len, builtin_strncpy_read_str,
+				(void *) TREE_STRING_POINTER (exp),
+				MEM_ALIGN (target)))
+	goto normal_expr;
+
+      dest_mem = target;
+
+      dest_mem = store_by_pieces (dest_mem,
+				  str_copy_len, builtin_strncpy_read_str,
+				  (void *) TREE_STRING_POINTER (exp),
+				  MEM_ALIGN (target),
+				  exp_len > str_copy_len ? 1 : 0);
+      if (exp_len > str_copy_len)
+	clear_storage (dest_mem, GEN_INT (exp_len - str_copy_len),
+		       BLOCK_OP_NORMAL);
+      return NULL_RTX;
+    }
   else
     {
       rtx tmp_target;
 
+  normal_expr:
       /* If we want to use a nontemporal store, force the value to
 	 register first.  */
       tmp_target = nontemporal ? NULL_RTX : target;
--- gcc/builtins.c.jj	2007-08-13 15:11:18.000000000 +0200
+++ gcc/builtins.c	2007-08-23 21:33:18.000000000 +0200
@@ -125,7 +125,6 @@ static rtx expand_builtin_bcopy (tree, i
 static rtx expand_builtin_strcpy (tree, tree, rtx, enum machine_mode);
 static rtx expand_builtin_strcpy_args (tree, tree, tree, rtx, enum machine_mode);
 static rtx expand_builtin_stpcpy (tree, rtx, enum machine_mode);
-static rtx builtin_strncpy_read_str (void *, HOST_WIDE_INT, enum machine_mode);
 static rtx expand_builtin_strncpy (tree, rtx, enum machine_mode);
 static rtx builtin_memset_gen_str (void *, HOST_WIDE_INT, enum machine_mode);
 static rtx expand_builtin_memset (tree, rtx, enum machine_mode);
@@ -3773,7 +3772,7 @@ expand_builtin_stpcpy (tree exp, rtx tar
    bytes from constant string DATA + OFFSET and return it as target
    constant.  */
 
-static rtx
+rtx
 builtin_strncpy_read_str (void *data, HOST_WIDE_INT offset,
 			  enum machine_mode mode)
 {
--- gcc/expr.h.jj	2007-08-15 15:36:32.000000000 +0200
+++ gcc/expr.h	2007-08-23 21:33:46.000000000 +0200
@@ -341,6 +341,7 @@ extern void expand_builtin_setjmp_setup 
 extern void expand_builtin_setjmp_receiver (rtx);
 extern rtx expand_builtin_saveregs (void);
 extern void expand_builtin_trap (void);
+extern rtx builtin_strncpy_read_str (void *, HOST_WIDE_INT, enum machine_mode);
 \f
 /* Functions from expr.c:  */
 

	Jakub

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] Optimize var = STRING_CST
  2007-08-23 23:48 [PATCH] Optimize var = STRING_CST Jakub Jelinek
@ 2007-08-24  9:59 ` Richard Guenther
  2007-08-24 13:42   ` Jakub Jelinek
  0 siblings, 1 reply; 3+ messages in thread
From: Richard Guenther @ 2007-08-24  9:59 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches

On 8/23/07, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> The following patch optimizes initialization of an array from a STRING_CST.
> Without this patch store_expr will always force the STRING_CST into memory
> and then do a block move from there, followed by optional clear_storage
> if the STRING_CST is shorter than the array.  This patch uses
> store_by_pieces if possible.
> With the patch:
> struct A { char c[10]; };
>
> void
> foo (void)
> {
>   struct A a = { "abcdefghi" };
>   baz (&a);
> }
>
> void
> bar (void)
> {
>   struct A a;
>   __builtin_strcpy (&a.c[0], "abcdefghi");
>   baz (&a);
> }
> both routines are the same on x86_64 except for
> slightly different register allocation, without that patch
> for foo a .LC0 constant with the string literal is emitted and foo
> copies over from that string into a.c array.
>
> Tested on x86_64-linux, ok for trunk?

This is ok if you add a testcase.  I also bet we have a PR  somewhere
for this one...

Thanks,
Richard.

> 2007-08-23  Jakub Jelinek  <jakub@redhat.com>
>
>         * expr.c (store_expr): Optimize initialization of an array
>         with STRING_CST.
>         * expr.h (builtin_strncpy_read_str): New prototype.
>         * builtins.c (builtin_strncpy_read_str): Remove prototype.
>         No longer static.

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] Optimize var = STRING_CST
  2007-08-24  9:59 ` Richard Guenther
@ 2007-08-24 13:42   ` Jakub Jelinek
  0 siblings, 0 replies; 3+ messages in thread
From: Jakub Jelinek @ 2007-08-24 13:42 UTC (permalink / raw)
  To: Richard Guenther; +Cc: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 435 bytes --]

On Fri, Aug 24, 2007 at 11:37:25AM +0200, Richard Guenther wrote:
> This is ok if you add a testcase.

Thanks, attached patch is what I have committed.

>  I also bet we have a PR  somewhere for this one...

PR31150 is related, but this patch is just a very partial improvement
for that case and only on some arches (e.g. both ppc32 and ppc64
won't use store_by_pieces on the testcase below for neither
foo nor bar function).


	Jakub

[-- Attachment #2: M2 --]
[-- Type: text/plain, Size: 4166 bytes --]

2007-08-24  Jakub Jelinek  <jakub@redhat.com>

	* expr.c (store_expr): Optimize initialization of an array
	with STRING_CST.
	* expr.h (builtin_strncpy_read_str): New prototype.
	* builtins.c (builtin_strncpy_read_str): Remove prototype.
	No longer static.

	* gcc.dg/array-init-1.c: New test.

--- gcc/builtins.c.jj	2007-08-23 23:31:24.000000000 +0200
+++ gcc/builtins.c	2007-08-23 23:38:22.000000000 +0200
@@ -125,7 +125,6 @@ static rtx expand_builtin_bcopy (tree, i
 static rtx expand_builtin_strcpy (tree, tree, rtx, enum machine_mode);
 static rtx expand_builtin_strcpy_args (tree, tree, tree, rtx, enum machine_mode);
 static rtx expand_builtin_stpcpy (tree, rtx, enum machine_mode);
-static rtx builtin_strncpy_read_str (void *, HOST_WIDE_INT, enum machine_mode);
 static rtx expand_builtin_strncpy (tree, rtx, enum machine_mode);
 static rtx builtin_memset_gen_str (void *, HOST_WIDE_INT, enum machine_mode);
 static rtx expand_builtin_memset (tree, rtx, enum machine_mode);
@@ -3734,7 +3733,7 @@ expand_builtin_stpcpy (tree exp, rtx tar
    bytes from constant string DATA + OFFSET and return it as target
    constant.  */
 
-static rtx
+rtx
 builtin_strncpy_read_str (void *data, HOST_WIDE_INT offset,
 			  enum machine_mode mode)
 {
--- gcc/expr.h.jj	2007-08-23 23:31:24.000000000 +0200
+++ gcc/expr.h	2007-08-23 23:38:22.000000000 +0200
@@ -341,6 +341,7 @@ extern void expand_builtin_setjmp_setup 
 extern void expand_builtin_setjmp_receiver (rtx);
 extern rtx expand_builtin_saveregs (void);
 extern void expand_builtin_trap (void);
+extern rtx builtin_strncpy_read_str (void *, HOST_WIDE_INT, enum machine_mode);
 \f
 /* Functions from expr.c:  */
 
--- gcc/expr.c.jj	2007-08-23 23:31:02.000000000 +0200
+++ gcc/expr.c	2007-08-23 23:38:22.000000000 +0200
@@ -4472,10 +4472,52 @@ store_expr (tree exp, rtx target, int ca
 
       return NULL_RTX;
     }
+  else if (TREE_CODE (exp) == STRING_CST
+	   && !nontemporal && !call_param_p
+	   && TREE_STRING_LENGTH (exp) > 0
+	   && TYPE_MODE (TREE_TYPE (exp)) == BLKmode)
+    {
+      /* Optimize initialization of an array with a STRING_CST.  */
+      HOST_WIDE_INT exp_len, str_copy_len;
+      rtx dest_mem;
+
+      exp_len = int_expr_size (exp);
+      if (exp_len <= 0)
+	goto normal_expr;
+
+      str_copy_len = strlen (TREE_STRING_POINTER (exp));
+      if (str_copy_len < TREE_STRING_LENGTH (exp) - 1)
+	goto normal_expr;
+
+      str_copy_len = TREE_STRING_LENGTH (exp);
+      if ((STORE_MAX_PIECES & (STORE_MAX_PIECES - 1)) == 0)
+	{
+	  str_copy_len += STORE_MAX_PIECES - 1;
+	  str_copy_len &= ~(STORE_MAX_PIECES - 1);
+	}
+      str_copy_len = MIN (str_copy_len, exp_len);
+      if (!can_store_by_pieces (str_copy_len, builtin_strncpy_read_str,
+				(void *) TREE_STRING_POINTER (exp),
+				MEM_ALIGN (target)))
+	goto normal_expr;
+
+      dest_mem = target;
+
+      dest_mem = store_by_pieces (dest_mem,
+				  str_copy_len, builtin_strncpy_read_str,
+				  (void *) TREE_STRING_POINTER (exp),
+				  MEM_ALIGN (target),
+				  exp_len > str_copy_len ? 1 : 0);
+      if (exp_len > str_copy_len)
+	clear_storage (dest_mem, GEN_INT (exp_len - str_copy_len),
+		       BLOCK_OP_NORMAL);
+      return NULL_RTX;
+    }
   else
     {
       rtx tmp_target;
 
+  normal_expr:
       /* If we want to use a nontemporal store, force the value to
 	 register first.  */
       tmp_target = nontemporal ? NULL_RTX : target;
--- gcc/testsuite/gcc.dg/array-init-1.c.jj	2007-08-24 14:12:19.000000000 +0200
+++ gcc/testsuite/gcc.dg/array-init-1.c	2007-08-24 14:22:50.000000000 +0200
@@ -0,0 +1,24 @@
+/* Test that both arrays are initialized by store_by_pieces.  */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+struct A { char c[10]; };
+extern void baz (struct A *);
+
+void
+foo (void)
+{
+  struct A a = { "abcdefghi" };
+  baz (&a);
+}
+
+void
+bar (void)
+{
+  struct A a;
+  __builtin_strcpy (&a.c[0], "abcdefghi");
+  baz (&a);
+}
+
+/* { dg-final { scan-assembler-not "abcdefghi" { target i?86-*-* x86_64-*-* ia64-*-* } } } */
+/* { dg-final { scan-assembler-times "7523094288207667809\|6867666564636261\|1684234849\|64636261" 2 { target i?86-*-* x86_64-*-* ia64-*-* } } } */

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2007-08-24 12:34 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-08-23 23:48 [PATCH] Optimize var = STRING_CST Jakub Jelinek
2007-08-24  9:59 ` Richard Guenther
2007-08-24 13:42   ` Jakub Jelinek

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).