public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Fix dr_explicit_realign vectorization (PR tree-optimization/65369)
@ 2015-03-14  9:05 Jakub Jelinek
  2015-03-14  9:49 ` Richard Biener
  0 siblings, 1 reply; 2+ messages in thread
From: Jakub Jelinek @ 2015-03-14  9:05 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches

Hi!

This issue is practically the same as PR63341, except in this case it is for
dr_explicit_realign rather than dr_explicit_realign_optimized, and the bump
isn't passed through multiple functions and thus is easier to fix.

Without the patch we use (dataptr & -16) for the first load and
((dataptr + 12) & -16) for the second load, which works just fine if the
elements are properly aligned (4 byte at least), but in this case we have
underaligned accesses (coming from folding of memcpy in this testcase, and
from 4 byte loads combined together recognized by bswap pass in the original
source), and so we really want to use ((dataptr + 15) & -16), otherwise
if we are unlucky we might read the same memory twice even when dataptr
is not 16 byte aligned.

Bootstrapped/regtested on
{x86_64,i686,aarch64,powerpc64{,le},s390{,x}}-linux, ok for trunk?

2015-03-14  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/65369
	* tree-vect-stmts.c (vectorizable_load) <case dr_explicit_realign>:
	Set bump to vs * TYPE_SIZE_UNIT (elem_type) - 1 instead of
	(vs - 1) * TYPE_SIZE_UNIT (elem_type).

	* gcc.c-torture/execute/pr65369.c: New test.

--- gcc/tree-vect-stmts.c.jj	2015-03-09 08:05:13.000000000 +0100
+++ gcc/tree-vect-stmts.c	2015-03-13 17:27:30.613529768 +0100
@@ -6468,9 +6468,8 @@ vectorizable_load (gimple stmt, gimple_s
 		case dr_explicit_realign:
 		  {
 		    tree ptr, bump;
-		    tree vs_minus_1;
 
-		    vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
+		    tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
 
 		    if (compute_in_loop)
 		      msq = vect_setup_realignment (first_stmt, gsi,
@@ -6499,8 +6498,9 @@ vectorizable_load (gimple stmt, gimple_s
 		    vect_finish_stmt_generation (stmt, new_stmt, gsi);
 		    msq = new_temp;
 
-		    bump = size_binop (MULT_EXPR, vs_minus_1,
+		    bump = size_binop (MULT_EXPR, vs,
 				       TYPE_SIZE_UNIT (elem_type));
+		    bump = size_binop (MINUS_EXPR, bump, size_one_node);
 		    ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
 		    new_stmt = gimple_build_assign
 				 (NULL_TREE, BIT_AND_EXPR, ptr,
--- gcc/testsuite/gcc.c-torture/execute/pr65369.c.jj	2015-03-13 17:37:10.926175685 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr65369.c	2015-03-13 17:35:40.000000000 +0100
@@ -0,0 +1,45 @@
+/* PR tree-optimization/65369 */
+
+static const char data[] =
+  "12345678901234567890123456789012345678901234567890"
+  "123456789012345678901234567890";
+
+__attribute__ ((noinline))
+static void foo (const unsigned int *buf)
+{
+  if (__builtin_memcmp (buf, data, 64))
+    __builtin_abort ();
+}
+
+__attribute__ ((noinline))
+static void bar (const unsigned char *block)
+{
+  unsigned int buf[16];
+  __builtin_memcpy (buf +  0, block +  0, 4);
+  __builtin_memcpy (buf +  1, block +  4, 4);
+  __builtin_memcpy (buf +  2, block +  8, 4);
+  __builtin_memcpy (buf +  3, block + 12, 4);
+  __builtin_memcpy (buf +  4, block + 16, 4);
+  __builtin_memcpy (buf +  5, block + 20, 4);
+  __builtin_memcpy (buf +  6, block + 24, 4);
+  __builtin_memcpy (buf +  7, block + 28, 4);
+  __builtin_memcpy (buf +  8, block + 32, 4);
+  __builtin_memcpy (buf +  9, block + 36, 4);
+  __builtin_memcpy (buf + 10, block + 40, 4);
+  __builtin_memcpy (buf + 11, block + 44, 4);
+  __builtin_memcpy (buf + 12, block + 48, 4);
+  __builtin_memcpy (buf + 13, block + 52, 4);
+  __builtin_memcpy (buf + 14, block + 56, 4);
+  __builtin_memcpy (buf + 15, block + 60, 4);
+  foo (buf);
+}
+
+int
+main ()
+{
+  unsigned char input[sizeof data + 16] __attribute__((aligned (16)));
+  __builtin_memset (input, 0, sizeof input);
+  __builtin_memcpy (input + 1, data, sizeof data);
+  bar (input + 1);
+  return 0;
+}

	Jakub

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] Fix dr_explicit_realign vectorization (PR tree-optimization/65369)
  2015-03-14  9:05 [PATCH] Fix dr_explicit_realign vectorization (PR tree-optimization/65369) Jakub Jelinek
@ 2015-03-14  9:49 ` Richard Biener
  0 siblings, 0 replies; 2+ messages in thread
From: Richard Biener @ 2015-03-14  9:49 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches

On March 14, 2015 10:04:53 AM GMT+01:00, Jakub Jelinek <jakub@redhat.com> wrote:
>Hi!
>
>This issue is practically the same as PR63341, except in this case it
>is for
>dr_explicit_realign rather than dr_explicit_realign_optimized, and the
>bump
>isn't passed through multiple functions and thus is easier to fix.
>
>Without the patch we use (dataptr & -16) for the first load and
>((dataptr + 12) & -16) for the second load, which works just fine if
>the
>elements are properly aligned (4 byte at least), but in this case we
>have
>underaligned accesses (coming from folding of memcpy in this testcase,
>and
>from 4 byte loads combined together recognized by bswap pass in the
>original
>source), and so we really want to use ((dataptr + 15) & -16), otherwise
>if we are unlucky we might read the same memory twice even when dataptr
>is not 16 byte aligned.
>
>Bootstrapped/regtested on
>{x86_64,i686,aarch64,powerpc64{,le},s390{,x}}-linux, ok for trunk?

OK.

Thanks,
Richard.

>2015-03-14  Jakub Jelinek  <jakub@redhat.com>
>
>	PR tree-optimization/65369
>	* tree-vect-stmts.c (vectorizable_load) <case dr_explicit_realign>:
>	Set bump to vs * TYPE_SIZE_UNIT (elem_type) - 1 instead of
>	(vs - 1) * TYPE_SIZE_UNIT (elem_type).
>
>	* gcc.c-torture/execute/pr65369.c: New test.
>
>--- gcc/tree-vect-stmts.c.jj	2015-03-09 08:05:13.000000000 +0100
>+++ gcc/tree-vect-stmts.c	2015-03-13 17:27:30.613529768 +0100
>@@ -6468,9 +6468,8 @@ vectorizable_load (gimple stmt, gimple_s
> 		case dr_explicit_realign:
> 		  {
> 		    tree ptr, bump;
>-		    tree vs_minus_1;
> 
>-		    vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
>+		    tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
> 
> 		    if (compute_in_loop)
> 		      msq = vect_setup_realignment (first_stmt, gsi,
>@@ -6499,8 +6498,9 @@ vectorizable_load (gimple stmt, gimple_s
> 		    vect_finish_stmt_generation (stmt, new_stmt, gsi);
> 		    msq = new_temp;
> 
>-		    bump = size_binop (MULT_EXPR, vs_minus_1,
>+		    bump = size_binop (MULT_EXPR, vs,
> 				       TYPE_SIZE_UNIT (elem_type));
>+		    bump = size_binop (MINUS_EXPR, bump, size_one_node);
> 		    ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
> 		    new_stmt = gimple_build_assign
> 				 (NULL_TREE, BIT_AND_EXPR, ptr,
>--- gcc/testsuite/gcc.c-torture/execute/pr65369.c.jj	2015-03-13
>17:37:10.926175685 +0100
>+++ gcc/testsuite/gcc.c-torture/execute/pr65369.c	2015-03-13
>17:35:40.000000000 +0100
>@@ -0,0 +1,45 @@
>+/* PR tree-optimization/65369 */
>+
>+static const char data[] =
>+  "12345678901234567890123456789012345678901234567890"
>+  "123456789012345678901234567890";
>+
>+__attribute__ ((noinline))
>+static void foo (const unsigned int *buf)
>+{
>+  if (__builtin_memcmp (buf, data, 64))
>+    __builtin_abort ();
>+}
>+
>+__attribute__ ((noinline))
>+static void bar (const unsigned char *block)
>+{
>+  unsigned int buf[16];
>+  __builtin_memcpy (buf +  0, block +  0, 4);
>+  __builtin_memcpy (buf +  1, block +  4, 4);
>+  __builtin_memcpy (buf +  2, block +  8, 4);
>+  __builtin_memcpy (buf +  3, block + 12, 4);
>+  __builtin_memcpy (buf +  4, block + 16, 4);
>+  __builtin_memcpy (buf +  5, block + 20, 4);
>+  __builtin_memcpy (buf +  6, block + 24, 4);
>+  __builtin_memcpy (buf +  7, block + 28, 4);
>+  __builtin_memcpy (buf +  8, block + 32, 4);
>+  __builtin_memcpy (buf +  9, block + 36, 4);
>+  __builtin_memcpy (buf + 10, block + 40, 4);
>+  __builtin_memcpy (buf + 11, block + 44, 4);
>+  __builtin_memcpy (buf + 12, block + 48, 4);
>+  __builtin_memcpy (buf + 13, block + 52, 4);
>+  __builtin_memcpy (buf + 14, block + 56, 4);
>+  __builtin_memcpy (buf + 15, block + 60, 4);
>+  foo (buf);
>+}
>+
>+int
>+main ()
>+{
>+  unsigned char input[sizeof data + 16] __attribute__((aligned (16)));
>+  __builtin_memset (input, 0, sizeof input);
>+  __builtin_memcpy (input + 1, data, sizeof data);
>+  bar (input + 1);
>+  return 0;
>+}
>
>	Jakub


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2015-03-14  9:49 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-03-14  9:05 [PATCH] Fix dr_explicit_realign vectorization (PR tree-optimization/65369) Jakub Jelinek
2015-03-14  9:49 ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).