* [PATCH] Fix dr_explicit_realign vectorization (PR tree-optimization/65369)
@ 2015-03-14 9:05 Jakub Jelinek
2015-03-14 9:49 ` Richard Biener
0 siblings, 1 reply; 2+ messages in thread
From: Jakub Jelinek @ 2015-03-14 9:05 UTC (permalink / raw)
To: Richard Biener; +Cc: gcc-patches
Hi!
This issue is practically the same as PR63341, except in this case it is for
dr_explicit_realign rather than dr_explicit_realign_optimized, and the bump
isn't passed through multiple functions and thus is easier to fix.
Without the patch we use (dataptr & -16) for the first load and
((dataptr + 12) & -16) for the second load, which works just fine if the
elements are properly aligned (4 byte at least), but in this case we have
underaligned accesses (coming from folding of memcpy in this testcase, and
from 4 byte loads combined together recognized by bswap pass in the original
source), and so we really want to use ((dataptr + 15) & -16), otherwise
if we are unlucky we might read the same memory twice even when dataptr
is not 16 byte aligned.
Bootstrapped/regtested on
{x86_64,i686,aarch64,powerpc64{,le},s390{,x}}-linux, ok for trunk?
2015-03-14 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/65369
* tree-vect-stmts.c (vectorizable_load) <case dr_explicit_realign>:
Set bump to vs * TYPE_SIZE_UNIT (elem_type) - 1 instead of
(vs - 1) * TYPE_SIZE_UNIT (elem_type).
* gcc.c-torture/execute/pr65369.c: New test.
--- gcc/tree-vect-stmts.c.jj 2015-03-09 08:05:13.000000000 +0100
+++ gcc/tree-vect-stmts.c 2015-03-13 17:27:30.613529768 +0100
@@ -6468,9 +6468,8 @@ vectorizable_load (gimple stmt, gimple_s
case dr_explicit_realign:
{
tree ptr, bump;
- tree vs_minus_1;
- vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
+ tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
if (compute_in_loop)
msq = vect_setup_realignment (first_stmt, gsi,
@@ -6499,8 +6498,9 @@ vectorizable_load (gimple stmt, gimple_s
vect_finish_stmt_generation (stmt, new_stmt, gsi);
msq = new_temp;
- bump = size_binop (MULT_EXPR, vs_minus_1,
+ bump = size_binop (MULT_EXPR, vs,
TYPE_SIZE_UNIT (elem_type));
+ bump = size_binop (MINUS_EXPR, bump, size_one_node);
ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
new_stmt = gimple_build_assign
(NULL_TREE, BIT_AND_EXPR, ptr,
--- gcc/testsuite/gcc.c-torture/execute/pr65369.c.jj 2015-03-13 17:37:10.926175685 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr65369.c 2015-03-13 17:35:40.000000000 +0100
@@ -0,0 +1,45 @@
+/* PR tree-optimization/65369 */
+
+static const char data[] =
+ "12345678901234567890123456789012345678901234567890"
+ "123456789012345678901234567890";
+
+__attribute__ ((noinline))
+static void foo (const unsigned int *buf)
+{
+ if (__builtin_memcmp (buf, data, 64))
+ __builtin_abort ();
+}
+
+__attribute__ ((noinline))
+static void bar (const unsigned char *block)
+{
+ unsigned int buf[16];
+ __builtin_memcpy (buf + 0, block + 0, 4);
+ __builtin_memcpy (buf + 1, block + 4, 4);
+ __builtin_memcpy (buf + 2, block + 8, 4);
+ __builtin_memcpy (buf + 3, block + 12, 4);
+ __builtin_memcpy (buf + 4, block + 16, 4);
+ __builtin_memcpy (buf + 5, block + 20, 4);
+ __builtin_memcpy (buf + 6, block + 24, 4);
+ __builtin_memcpy (buf + 7, block + 28, 4);
+ __builtin_memcpy (buf + 8, block + 32, 4);
+ __builtin_memcpy (buf + 9, block + 36, 4);
+ __builtin_memcpy (buf + 10, block + 40, 4);
+ __builtin_memcpy (buf + 11, block + 44, 4);
+ __builtin_memcpy (buf + 12, block + 48, 4);
+ __builtin_memcpy (buf + 13, block + 52, 4);
+ __builtin_memcpy (buf + 14, block + 56, 4);
+ __builtin_memcpy (buf + 15, block + 60, 4);
+ foo (buf);
+}
+
+int
+main ()
+{
+ unsigned char input[sizeof data + 16] __attribute__((aligned (16)));
+ __builtin_memset (input, 0, sizeof input);
+ __builtin_memcpy (input + 1, data, sizeof data);
+ bar (input + 1);
+ return 0;
+}
Jakub
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [PATCH] Fix dr_explicit_realign vectorization (PR tree-optimization/65369)
2015-03-14 9:05 [PATCH] Fix dr_explicit_realign vectorization (PR tree-optimization/65369) Jakub Jelinek
@ 2015-03-14 9:49 ` Richard Biener
0 siblings, 0 replies; 2+ messages in thread
From: Richard Biener @ 2015-03-14 9:49 UTC (permalink / raw)
To: Jakub Jelinek; +Cc: gcc-patches
On March 14, 2015 10:04:53 AM GMT+01:00, Jakub Jelinek <jakub@redhat.com> wrote:
>Hi!
>
>This issue is practically the same as PR63341, except in this case it
>is for
>dr_explicit_realign rather than dr_explicit_realign_optimized, and the
>bump
>isn't passed through multiple functions and thus is easier to fix.
>
>Without the patch we use (dataptr & -16) for the first load and
>((dataptr + 12) & -16) for the second load, which works just fine if
>the
>elements are properly aligned (4 byte at least), but in this case we
>have
>underaligned accesses (coming from folding of memcpy in this testcase,
>and
>from 4 byte loads combined together recognized by bswap pass in the
>original
>source), and so we really want to use ((dataptr + 15) & -16), otherwise
>if we are unlucky we might read the same memory twice even when dataptr
>is not 16 byte aligned.
>
>Bootstrapped/regtested on
>{x86_64,i686,aarch64,powerpc64{,le},s390{,x}}-linux, ok for trunk?
OK.
Thanks,
Richard.
>2015-03-14 Jakub Jelinek <jakub@redhat.com>
>
> PR tree-optimization/65369
> * tree-vect-stmts.c (vectorizable_load) <case dr_explicit_realign>:
> Set bump to vs * TYPE_SIZE_UNIT (elem_type) - 1 instead of
> (vs - 1) * TYPE_SIZE_UNIT (elem_type).
>
> * gcc.c-torture/execute/pr65369.c: New test.
>
>--- gcc/tree-vect-stmts.c.jj 2015-03-09 08:05:13.000000000 +0100
>+++ gcc/tree-vect-stmts.c 2015-03-13 17:27:30.613529768 +0100
>@@ -6468,9 +6468,8 @@ vectorizable_load (gimple stmt, gimple_s
> case dr_explicit_realign:
> {
> tree ptr, bump;
>- tree vs_minus_1;
>
>- vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
>+ tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
>
> if (compute_in_loop)
> msq = vect_setup_realignment (first_stmt, gsi,
>@@ -6499,8 +6498,9 @@ vectorizable_load (gimple stmt, gimple_s
> vect_finish_stmt_generation (stmt, new_stmt, gsi);
> msq = new_temp;
>
>- bump = size_binop (MULT_EXPR, vs_minus_1,
>+ bump = size_binop (MULT_EXPR, vs,
> TYPE_SIZE_UNIT (elem_type));
>+ bump = size_binop (MINUS_EXPR, bump, size_one_node);
> ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
> new_stmt = gimple_build_assign
> (NULL_TREE, BIT_AND_EXPR, ptr,
>--- gcc/testsuite/gcc.c-torture/execute/pr65369.c.jj 2015-03-13
>17:37:10.926175685 +0100
>+++ gcc/testsuite/gcc.c-torture/execute/pr65369.c 2015-03-13
>17:35:40.000000000 +0100
>@@ -0,0 +1,45 @@
>+/* PR tree-optimization/65369 */
>+
>+static const char data[] =
>+ "12345678901234567890123456789012345678901234567890"
>+ "123456789012345678901234567890";
>+
>+__attribute__ ((noinline))
>+static void foo (const unsigned int *buf)
>+{
>+ if (__builtin_memcmp (buf, data, 64))
>+ __builtin_abort ();
>+}
>+
>+__attribute__ ((noinline))
>+static void bar (const unsigned char *block)
>+{
>+ unsigned int buf[16];
>+ __builtin_memcpy (buf + 0, block + 0, 4);
>+ __builtin_memcpy (buf + 1, block + 4, 4);
>+ __builtin_memcpy (buf + 2, block + 8, 4);
>+ __builtin_memcpy (buf + 3, block + 12, 4);
>+ __builtin_memcpy (buf + 4, block + 16, 4);
>+ __builtin_memcpy (buf + 5, block + 20, 4);
>+ __builtin_memcpy (buf + 6, block + 24, 4);
>+ __builtin_memcpy (buf + 7, block + 28, 4);
>+ __builtin_memcpy (buf + 8, block + 32, 4);
>+ __builtin_memcpy (buf + 9, block + 36, 4);
>+ __builtin_memcpy (buf + 10, block + 40, 4);
>+ __builtin_memcpy (buf + 11, block + 44, 4);
>+ __builtin_memcpy (buf + 12, block + 48, 4);
>+ __builtin_memcpy (buf + 13, block + 52, 4);
>+ __builtin_memcpy (buf + 14, block + 56, 4);
>+ __builtin_memcpy (buf + 15, block + 60, 4);
>+ foo (buf);
>+}
>+
>+int
>+main ()
>+{
>+ unsigned char input[sizeof data + 16] __attribute__((aligned (16)));
>+ __builtin_memset (input, 0, sizeof input);
>+ __builtin_memcpy (input + 1, data, sizeof data);
>+ bar (input + 1);
>+ return 0;
>+}
>
> Jakub
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2015-03-14 9:49 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-03-14 9:05 [PATCH] Fix dr_explicit_realign vectorization (PR tree-optimization/65369) Jakub Jelinek
2015-03-14 9:49 ` Richard Biener
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).