* [PATCH] Optimize var = STRING_CST
@ 2007-08-23 23:48 Jakub Jelinek
2007-08-24 9:59 ` Richard Guenther
0 siblings, 1 reply; 3+ messages in thread
From: Jakub Jelinek @ 2007-08-23 23:48 UTC (permalink / raw)
To: gcc-patches
Hi!
The following patch optimizes initialization of an array from a STRING_CST.
Without this patch store_expr will always force the STRING_CST into memory
and then do a block move from there, followed by optional clear_storage
if the STRING_CST is shorter than the array. This patch uses
store_by_pieces if possible.
With the patch:
struct A { char c[10]; };
void
foo (void)
{
struct A a = { "abcdefghi" };
baz (&a);
}
void
bar (void)
{
struct A a;
__builtin_strcpy (&a.c[0], "abcdefghi");
baz (&a);
}
both routines are the same on x86_64 except for
slightly different register allocation, without that patch
for foo a .LC0 constant with the string literal is emitted and foo
copies over from that string into a.c array.
Tested on x86_64-linux, ok for trunk?
BTW, I wonder if we shouldn't use alias set 0 for all the initialization
stores. If yes, this is something that has been wrong already before,
both emit_block_move and clear_storage store_expr uses are done with
the alias set computed for target by the caller.
2007-08-23 Jakub Jelinek <jakub@redhat.com>
* expr.c (store_expr): Optimize initialization of an array
with STRING_CST.
* expr.h (builtin_strncpy_read_str): New prototype.
* builtins.c (builtin_strncpy_read_str): Remove prototype.
No longer static.
--- gcc/expr.c.jj 2007-08-15 15:36:32.000000000 +0200
+++ gcc/expr.c 2007-08-23 21:51:57.000000000 +0200
@@ -4472,10 +4472,52 @@ store_expr (tree exp, rtx target, int ca
return NULL_RTX;
}
+ else if (TREE_CODE (exp) == STRING_CST
+ && !nontemporal && !call_param_p
+ && TREE_STRING_LENGTH (exp) > 0
+ && TYPE_MODE (TREE_TYPE (exp)) == BLKmode)
+ {
+ /* Optimize initialization of an array with a STRING_CST. */
+ HOST_WIDE_INT exp_len, str_copy_len;
+ rtx dest_mem;
+
+ exp_len = int_expr_size (exp);
+ if (exp_len <= 0)
+ goto normal_expr;
+
+ str_copy_len = strlen (TREE_STRING_POINTER (exp));
+ if (str_copy_len < TREE_STRING_LENGTH (exp) - 1)
+ goto normal_expr;
+
+ str_copy_len = TREE_STRING_LENGTH (exp);
+ if ((STORE_MAX_PIECES & (STORE_MAX_PIECES - 1)) == 0)
+ {
+ str_copy_len += STORE_MAX_PIECES - 1;
+ str_copy_len &= ~(STORE_MAX_PIECES - 1);
+ }
+ str_copy_len = MIN (str_copy_len, exp_len);
+ if (!can_store_by_pieces (str_copy_len, builtin_strncpy_read_str,
+ (void *) TREE_STRING_POINTER (exp),
+ MEM_ALIGN (target)))
+ goto normal_expr;
+
+ dest_mem = target;
+
+ dest_mem = store_by_pieces (dest_mem,
+ str_copy_len, builtin_strncpy_read_str,
+ (void *) TREE_STRING_POINTER (exp),
+ MEM_ALIGN (target),
+ exp_len > str_copy_len ? 1 : 0);
+ if (exp_len > str_copy_len)
+ clear_storage (dest_mem, GEN_INT (exp_len - str_copy_len),
+ BLOCK_OP_NORMAL);
+ return NULL_RTX;
+ }
else
{
rtx tmp_target;
+ normal_expr:
/* If we want to use a nontemporal store, force the value to
register first. */
tmp_target = nontemporal ? NULL_RTX : target;
--- gcc/builtins.c.jj 2007-08-13 15:11:18.000000000 +0200
+++ gcc/builtins.c 2007-08-23 21:33:18.000000000 +0200
@@ -125,7 +125,6 @@ static rtx expand_builtin_bcopy (tree, i
static rtx expand_builtin_strcpy (tree, tree, rtx, enum machine_mode);
static rtx expand_builtin_strcpy_args (tree, tree, tree, rtx, enum machine_mode);
static rtx expand_builtin_stpcpy (tree, rtx, enum machine_mode);
-static rtx builtin_strncpy_read_str (void *, HOST_WIDE_INT, enum machine_mode);
static rtx expand_builtin_strncpy (tree, rtx, enum machine_mode);
static rtx builtin_memset_gen_str (void *, HOST_WIDE_INT, enum machine_mode);
static rtx expand_builtin_memset (tree, rtx, enum machine_mode);
@@ -3773,7 +3772,7 @@ expand_builtin_stpcpy (tree exp, rtx tar
bytes from constant string DATA + OFFSET and return it as target
constant. */
-static rtx
+rtx
builtin_strncpy_read_str (void *data, HOST_WIDE_INT offset,
enum machine_mode mode)
{
--- gcc/expr.h.jj 2007-08-15 15:36:32.000000000 +0200
+++ gcc/expr.h 2007-08-23 21:33:46.000000000 +0200
@@ -341,6 +341,7 @@ extern void expand_builtin_setjmp_setup
extern void expand_builtin_setjmp_receiver (rtx);
extern rtx expand_builtin_saveregs (void);
extern void expand_builtin_trap (void);
+extern rtx builtin_strncpy_read_str (void *, HOST_WIDE_INT, enum machine_mode);
\f
/* Functions from expr.c: */
Jakub
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] Optimize var = STRING_CST
2007-08-23 23:48 [PATCH] Optimize var = STRING_CST Jakub Jelinek
@ 2007-08-24 9:59 ` Richard Guenther
2007-08-24 13:42 ` Jakub Jelinek
0 siblings, 1 reply; 3+ messages in thread
From: Richard Guenther @ 2007-08-24 9:59 UTC (permalink / raw)
To: Jakub Jelinek; +Cc: gcc-patches
On 8/23/07, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> The following patch optimizes initialization of an array from a STRING_CST.
> Without this patch store_expr will always force the STRING_CST into memory
> and then do a block move from there, followed by optional clear_storage
> if the STRING_CST is shorter than the array. This patch uses
> store_by_pieces if possible.
> With the patch:
> struct A { char c[10]; };
>
> void
> foo (void)
> {
> struct A a = { "abcdefghi" };
> baz (&a);
> }
>
> void
> bar (void)
> {
> struct A a;
> __builtin_strcpy (&a.c[0], "abcdefghi");
> baz (&a);
> }
> both routines are the same on x86_64 except for
> slightly different register allocation, without that patch
> for foo a .LC0 constant with the string literal is emitted and foo
> copies over from that string into a.c array.
>
> Tested on x86_64-linux, ok for trunk?
This is ok if you add a testcase. I also bet we have a PR somewhere
for this one...
Thanks,
Richard.
> 2007-08-23 Jakub Jelinek <jakub@redhat.com>
>
> * expr.c (store_expr): Optimize initialization of an array
> with STRING_CST.
> * expr.h (builtin_strncpy_read_str): New prototype.
> * builtins.c (builtin_strncpy_read_str): Remove prototype.
> No longer static.
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] Optimize var = STRING_CST
2007-08-24 9:59 ` Richard Guenther
@ 2007-08-24 13:42 ` Jakub Jelinek
0 siblings, 0 replies; 3+ messages in thread
From: Jakub Jelinek @ 2007-08-24 13:42 UTC (permalink / raw)
To: Richard Guenther; +Cc: gcc-patches
[-- Attachment #1: Type: text/plain, Size: 435 bytes --]
On Fri, Aug 24, 2007 at 11:37:25AM +0200, Richard Guenther wrote:
> This is ok if you add a testcase.
Thanks, attached patch is what I have committed.
> I also bet we have a PR somewhere for this one...
PR31150 is related, but this patch is just a very partial improvement
for that case and only on some arches (e.g. both ppc32 and ppc64
won't use store_by_pieces on the testcase below for neither
foo nor bar function).
Jakub
[-- Attachment #2: M2 --]
[-- Type: text/plain, Size: 4166 bytes --]
2007-08-24 Jakub Jelinek <jakub@redhat.com>
* expr.c (store_expr): Optimize initialization of an array
with STRING_CST.
* expr.h (builtin_strncpy_read_str): New prototype.
* builtins.c (builtin_strncpy_read_str): Remove prototype.
No longer static.
* gcc.dg/array-init-1.c: New test.
--- gcc/builtins.c.jj 2007-08-23 23:31:24.000000000 +0200
+++ gcc/builtins.c 2007-08-23 23:38:22.000000000 +0200
@@ -125,7 +125,6 @@ static rtx expand_builtin_bcopy (tree, i
static rtx expand_builtin_strcpy (tree, tree, rtx, enum machine_mode);
static rtx expand_builtin_strcpy_args (tree, tree, tree, rtx, enum machine_mode);
static rtx expand_builtin_stpcpy (tree, rtx, enum machine_mode);
-static rtx builtin_strncpy_read_str (void *, HOST_WIDE_INT, enum machine_mode);
static rtx expand_builtin_strncpy (tree, rtx, enum machine_mode);
static rtx builtin_memset_gen_str (void *, HOST_WIDE_INT, enum machine_mode);
static rtx expand_builtin_memset (tree, rtx, enum machine_mode);
@@ -3734,7 +3733,7 @@ expand_builtin_stpcpy (tree exp, rtx tar
bytes from constant string DATA + OFFSET and return it as target
constant. */
-static rtx
+rtx
builtin_strncpy_read_str (void *data, HOST_WIDE_INT offset,
enum machine_mode mode)
{
--- gcc/expr.h.jj 2007-08-23 23:31:24.000000000 +0200
+++ gcc/expr.h 2007-08-23 23:38:22.000000000 +0200
@@ -341,6 +341,7 @@ extern void expand_builtin_setjmp_setup
extern void expand_builtin_setjmp_receiver (rtx);
extern rtx expand_builtin_saveregs (void);
extern void expand_builtin_trap (void);
+extern rtx builtin_strncpy_read_str (void *, HOST_WIDE_INT, enum machine_mode);
\f
/* Functions from expr.c: */
--- gcc/expr.c.jj 2007-08-23 23:31:02.000000000 +0200
+++ gcc/expr.c 2007-08-23 23:38:22.000000000 +0200
@@ -4472,10 +4472,52 @@ store_expr (tree exp, rtx target, int ca
return NULL_RTX;
}
+ else if (TREE_CODE (exp) == STRING_CST
+ && !nontemporal && !call_param_p
+ && TREE_STRING_LENGTH (exp) > 0
+ && TYPE_MODE (TREE_TYPE (exp)) == BLKmode)
+ {
+ /* Optimize initialization of an array with a STRING_CST. */
+ HOST_WIDE_INT exp_len, str_copy_len;
+ rtx dest_mem;
+
+ exp_len = int_expr_size (exp);
+ if (exp_len <= 0)
+ goto normal_expr;
+
+ str_copy_len = strlen (TREE_STRING_POINTER (exp));
+ if (str_copy_len < TREE_STRING_LENGTH (exp) - 1)
+ goto normal_expr;
+
+ str_copy_len = TREE_STRING_LENGTH (exp);
+ if ((STORE_MAX_PIECES & (STORE_MAX_PIECES - 1)) == 0)
+ {
+ str_copy_len += STORE_MAX_PIECES - 1;
+ str_copy_len &= ~(STORE_MAX_PIECES - 1);
+ }
+ str_copy_len = MIN (str_copy_len, exp_len);
+ if (!can_store_by_pieces (str_copy_len, builtin_strncpy_read_str,
+ (void *) TREE_STRING_POINTER (exp),
+ MEM_ALIGN (target)))
+ goto normal_expr;
+
+ dest_mem = target;
+
+ dest_mem = store_by_pieces (dest_mem,
+ str_copy_len, builtin_strncpy_read_str,
+ (void *) TREE_STRING_POINTER (exp),
+ MEM_ALIGN (target),
+ exp_len > str_copy_len ? 1 : 0);
+ if (exp_len > str_copy_len)
+ clear_storage (dest_mem, GEN_INT (exp_len - str_copy_len),
+ BLOCK_OP_NORMAL);
+ return NULL_RTX;
+ }
else
{
rtx tmp_target;
+ normal_expr:
/* If we want to use a nontemporal store, force the value to
register first. */
tmp_target = nontemporal ? NULL_RTX : target;
--- gcc/testsuite/gcc.dg/array-init-1.c.jj 2007-08-24 14:12:19.000000000 +0200
+++ gcc/testsuite/gcc.dg/array-init-1.c 2007-08-24 14:22:50.000000000 +0200
@@ -0,0 +1,24 @@
+/* Test that both arrays are initialized by store_by_pieces. */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+struct A { char c[10]; };
+extern void baz (struct A *);
+
+void
+foo (void)
+{
+ struct A a = { "abcdefghi" };
+ baz (&a);
+}
+
+void
+bar (void)
+{
+ struct A a;
+ __builtin_strcpy (&a.c[0], "abcdefghi");
+ baz (&a);
+}
+
+/* { dg-final { scan-assembler-not "abcdefghi" { target i?86-*-* x86_64-*-* ia64-*-* } } } */
+/* { dg-final { scan-assembler-times "7523094288207667809\|6867666564636261\|1684234849\|64636261" 2 { target i?86-*-* x86_64-*-* ia64-*-* } } } */
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2007-08-24 12:34 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-08-23 23:48 [PATCH] Optimize var = STRING_CST Jakub Jelinek
2007-08-24 9:59 ` Richard Guenther
2007-08-24 13:42 ` Jakub Jelinek
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).