public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Fix PR70130
@ 2016-04-14 11:22 Richard Biener
  2016-04-14 13:09 ` Bill Schmidt
  0 siblings, 1 reply; 3+ messages in thread
From: Richard Biener @ 2016-04-14 11:22 UTC (permalink / raw)
  To: gcc-patches


The following fixes PR70130 - improved SLP capabilities now run into
the realignment code on ppc which doesn't properly verify that all
vector loads emitted by vectorizable_load share the same alignment.

Bootstrap / regtest pending on x86_64-unknown-linux-gnu.

Bootstrapped / tested on ppc64le by Alan.

Richard.

2016-04-14  Richard Biener  <rguenther@suse.de>
	Alan Modra  <amodra@gmail.com>

	PR tree-optimization/70130
	* tree-vect-data-refs.c (vect_supportable_dr_alignment): Detect
	when alignment stays not the same and no not use the realign
	scheme then.

	* gcc.dg/vect/O3-pr70130.c: New testcase.

Index: gcc/tree-vect-data-refs.c
===================================================================
*** gcc/tree-vect-data-refs.c	(revision 234970)
--- gcc/tree-vect-data-refs.c	(working copy)
*************** vect_supportable_dr_alignment (struct da
*** 5983,5992 ****
  	      || targetm.vectorize.builtin_mask_for_load ()))
  	{
  	  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
! 	  if ((nested_in_vect_loop
! 	       && (TREE_INT_CST_LOW (DR_STEP (dr))
! 	 	   != GET_MODE_SIZE (TYPE_MODE (vectype))))
!               || !loop_vinfo)
  	    return dr_explicit_realign;
  	  else
  	    return dr_explicit_realign_optimized;
--- 5983,6001 ----
  	      || targetm.vectorize.builtin_mask_for_load ()))
  	{
  	  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
! 
! 	  /* If we are doing SLP then the accesses need not have the
! 	     same alignment, instead it depends on the SLP group size.  */
! 	  if (loop_vinfo
! 	      && STMT_SLP_TYPE (stmt_info)
! 	      && (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
! 		  * GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info))))
! 		  % TYPE_VECTOR_SUBPARTS (vectype) != 0)
! 	    ;
! 	  else if (!loop_vinfo
! 		   || (nested_in_vect_loop
! 		       && (TREE_INT_CST_LOW (DR_STEP (dr))
! 			   != GET_MODE_SIZE (TYPE_MODE (vectype)))))
  	    return dr_explicit_realign;
  	  else
  	    return dr_explicit_realign_optimized;
Index: gcc/testsuite/gcc.dg/vect/O3-pr70130.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/O3-pr70130.c	(revision 0)
--- gcc/testsuite/gcc.dg/vect/O3-pr70130.c	(working copy)
***************
*** 0 ****
--- 1,94 ----
+ /* { dg-do run } */
+ /* { dg-require-effective-target vsx_hw { target powerpc*-*-* } } */
+ /* { dg-additional-options "-mcpu=power7" { target powerpc*-*-* } } */
+ 
+ struct foo
+ {
+   short a[3][16][16];
+   short pad;
+ } images[8];
+ 
+ void __attribute__ ((noinline, noclone))
+ Loop_err (struct foo *img, const int s[16][2], int s0)
+ {
+   int i, j;
+ 
+   for (j = 0; j < 16; j++)
+     {
+       for (i=0; i < 16; i++)
+ 	{
+ 	  img->a[0][j][i] = s[i][0];
+ 	  img->a[1][j][i] = s[j][1];
+ 	  img->a[2][j][i] = s0;
+ 	}
+     }
+ }
+ 
+ const int s[16][2] = { { 1, 16 }, { 2, 15 }, { 3, 14 }, { 4, 13 },
+ 		       { 5, 12 }, { 6, 11 }, { 7, 10 }, { 8, 9 },
+ 		       { 9, 8 }, { 10, 7 }, { 11, 6 }, { 12, 5 },
+ 		       { 13, 4 }, { 14, 3 }, { 15, 2 }, { 16, 1 } };
+ const struct foo expected
+ = { { { { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
+ 	{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
+ 	{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
+ 	{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
+ 	{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
+ 	{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
+ 	{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
+ 	{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
+ 	{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
+ 	{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
+ 	{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
+ 	{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
+ 	{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
+ 	{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
+ 	{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
+ 	{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 } },
+       { { 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16 },
+ 	{ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15 },
+ 	{ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14 },
+ 	{ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13 },
+ 	{ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12 },
+ 	{ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11 },
+ 	{ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10 },
+ 	{ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9 },
+ 	{ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 },
+ 	{ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 },
+ 	{ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 },
+ 	{ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 },
+ 	{ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 },
+ 	{ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 },
+ 	{ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
+ 	{ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 } },
+       { { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
+ 	{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
+ 	{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
+ 	{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
+ 	{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
+ 	{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
+ 	{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
+ 	{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
+ 	{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
+ 	{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
+ 	{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
+ 	{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
+ 	{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
+ 	{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
+ 	{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
+ 	{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 } } },
+     0 };
+ 
+ int
+ main (void)
+ {
+   int i;
+ 
+   for (i = 0; i < 8; i++)
+     Loop_err (images + i, s, -1);
+ 
+   for (i = 0; i < 8; i++)
+     if (__builtin_memcmp (&expected, images + i, sizeof (expected)))
+       __builtin_abort ();
+   return 0;
+ }

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] Fix PR70130
  2016-04-14 11:22 [PATCH] Fix PR70130 Richard Biener
@ 2016-04-14 13:09 ` Bill Schmidt
  2016-04-14 13:19   ` Richard Biener
  0 siblings, 1 reply; 3+ messages in thread
From: Bill Schmidt @ 2016-04-14 13:09 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches

On Thu, 2016-04-14 at 13:22 +0200, Richard Biener wrote:
> The following fixes PR70130 - improved SLP capabilities now run into
> the realignment code on ppc which doesn't properly verify that all
> vector loads emitted by vectorizable_load share the same alignment.
> 
> Bootstrap / regtest pending on x86_64-unknown-linux-gnu.
> 
> Bootstrapped / tested on ppc64le by Alan.
> 
> Richard.
> 
> 2016-04-14  Richard Biener  <rguenther@suse.de>
> 	Alan Modra  <amodra@gmail.com>
> 
> 	PR tree-optimization/70130
> 	* tree-vect-data-refs.c (vect_supportable_dr_alignment): Detect
> 	when alignment stays not the same and no not use the realign
> 	scheme then.
> 
> 	* gcc.dg/vect/O3-pr70130.c: New testcase.
> 
> Index: gcc/tree-vect-data-refs.c
> ===================================================================
> *** gcc/tree-vect-data-refs.c	(revision 234970)
> --- gcc/tree-vect-data-refs.c	(working copy)
> *************** vect_supportable_dr_alignment (struct da
> *** 5983,5992 ****
>   	      || targetm.vectorize.builtin_mask_for_load ()))
>   	{
>   	  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
> ! 	  if ((nested_in_vect_loop
> ! 	       && (TREE_INT_CST_LOW (DR_STEP (dr))
> ! 	 	   != GET_MODE_SIZE (TYPE_MODE (vectype))))
> !               || !loop_vinfo)
>   	    return dr_explicit_realign;
>   	  else
>   	    return dr_explicit_realign_optimized;
> --- 5983,6001 ----
>   	      || targetm.vectorize.builtin_mask_for_load ()))
>   	{
>   	  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
> ! 
> ! 	  /* If we are doing SLP then the accesses need not have the
> ! 	     same alignment, instead it depends on the SLP group size.  */
> ! 	  if (loop_vinfo
> ! 	      && STMT_SLP_TYPE (stmt_info)
> ! 	      && (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
> ! 		  * GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info))))
> ! 		  % TYPE_VECTOR_SUBPARTS (vectype) != 0)

Parentheses here look wrong.  Should be one fewer ending paren on the "*
GROUP_SIZE" line and one more on the following line, right?

Bill

> ! 	    ;
> ! 	  else if (!loop_vinfo
> ! 		   || (nested_in_vect_loop
> ! 		       && (TREE_INT_CST_LOW (DR_STEP (dr))
> ! 			   != GET_MODE_SIZE (TYPE_MODE (vectype)))))
>   	    return dr_explicit_realign;
>   	  else
>   	    return dr_explicit_realign_optimized;
> Index: gcc/testsuite/gcc.dg/vect/O3-pr70130.c
> ===================================================================
> *** gcc/testsuite/gcc.dg/vect/O3-pr70130.c	(revision 0)
> --- gcc/testsuite/gcc.dg/vect/O3-pr70130.c	(working copy)
> ***************
> *** 0 ****
> --- 1,94 ----
> + /* { dg-do run } */
> + /* { dg-require-effective-target vsx_hw { target powerpc*-*-* } } */
> + /* { dg-additional-options "-mcpu=power7" { target powerpc*-*-* } } */
> + 
> + struct foo
> + {
> +   short a[3][16][16];
> +   short pad;
> + } images[8];
> + 
> + void __attribute__ ((noinline, noclone))
> + Loop_err (struct foo *img, const int s[16][2], int s0)
> + {
> +   int i, j;
> + 
> +   for (j = 0; j < 16; j++)
> +     {
> +       for (i=0; i < 16; i++)
> + 	{
> + 	  img->a[0][j][i] = s[i][0];
> + 	  img->a[1][j][i] = s[j][1];
> + 	  img->a[2][j][i] = s0;
> + 	}
> +     }
> + }
> + 
> + const int s[16][2] = { { 1, 16 }, { 2, 15 }, { 3, 14 }, { 4, 13 },
> + 		       { 5, 12 }, { 6, 11 }, { 7, 10 }, { 8, 9 },
> + 		       { 9, 8 }, { 10, 7 }, { 11, 6 }, { 12, 5 },
> + 		       { 13, 4 }, { 14, 3 }, { 15, 2 }, { 16, 1 } };
> + const struct foo expected
> + = { { { { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
> + 	{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
> + 	{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
> + 	{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
> + 	{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
> + 	{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
> + 	{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
> + 	{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
> + 	{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
> + 	{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
> + 	{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
> + 	{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
> + 	{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
> + 	{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
> + 	{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
> + 	{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 } },
> +       { { 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16 },
> + 	{ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15 },
> + 	{ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14 },
> + 	{ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13 },
> + 	{ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12 },
> + 	{ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11 },
> + 	{ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10 },
> + 	{ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9 },
> + 	{ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 },
> + 	{ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 },
> + 	{ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 },
> + 	{ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 },
> + 	{ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 },
> + 	{ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 },
> + 	{ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
> + 	{ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 } },
> +       { { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
> + 	{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
> + 	{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
> + 	{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
> + 	{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
> + 	{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
> + 	{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
> + 	{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
> + 	{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
> + 	{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
> + 	{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
> + 	{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
> + 	{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
> + 	{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
> + 	{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
> + 	{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 } } },
> +     0 };
> + 
> + int
> + main (void)
> + {
> +   int i;
> + 
> +   for (i = 0; i < 8; i++)
> +     Loop_err (images + i, s, -1);
> + 
> +   for (i = 0; i < 8; i++)
> +     if (__builtin_memcmp (&expected, images + i, sizeof (expected)))
> +       __builtin_abort ();
> +   return 0;
> + }
> 


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] Fix PR70130
  2016-04-14 13:09 ` Bill Schmidt
@ 2016-04-14 13:19   ` Richard Biener
  0 siblings, 0 replies; 3+ messages in thread
From: Richard Biener @ 2016-04-14 13:19 UTC (permalink / raw)
  To: Bill Schmidt; +Cc: gcc-patches

On Thu, 14 Apr 2016, Bill Schmidt wrote:

> On Thu, 2016-04-14 at 13:22 +0200, Richard Biener wrote:
> > The following fixes PR70130 - improved SLP capabilities now run into
> > the realignment code on ppc which doesn't properly verify that all
> > vector loads emitted by vectorizable_load share the same alignment.
> > 
> > Bootstrap / regtest pending on x86_64-unknown-linux-gnu.
> > 
> > Bootstrapped / tested on ppc64le by Alan.
> > 
> > Richard.
> > 
> > 2016-04-14  Richard Biener  <rguenther@suse.de>
> > 	Alan Modra  <amodra@gmail.com>
> > 
> > 	PR tree-optimization/70130
> > 	* tree-vect-data-refs.c (vect_supportable_dr_alignment): Detect
> > 	when alignment stays not the same and no not use the realign
> > 	scheme then.
> > 
> > 	* gcc.dg/vect/O3-pr70130.c: New testcase.
> > 
> > Index: gcc/tree-vect-data-refs.c
> > ===================================================================
> > *** gcc/tree-vect-data-refs.c	(revision 234970)
> > --- gcc/tree-vect-data-refs.c	(working copy)
> > *************** vect_supportable_dr_alignment (struct da
> > *** 5983,5992 ****
> >   	      || targetm.vectorize.builtin_mask_for_load ()))
> >   	{
> >   	  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
> > ! 	  if ((nested_in_vect_loop
> > ! 	       && (TREE_INT_CST_LOW (DR_STEP (dr))
> > ! 	 	   != GET_MODE_SIZE (TYPE_MODE (vectype))))
> > !               || !loop_vinfo)
> >   	    return dr_explicit_realign;
> >   	  else
> >   	    return dr_explicit_realign_optimized;
> > --- 5983,6001 ----
> >   	      || targetm.vectorize.builtin_mask_for_load ()))
> >   	{
> >   	  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
> > ! 
> > ! 	  /* If we are doing SLP then the accesses need not have the
> > ! 	     same alignment, instead it depends on the SLP group size.  */
> > ! 	  if (loop_vinfo
> > ! 	      && STMT_SLP_TYPE (stmt_info)
> > ! 	      && (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
> > ! 		  * GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info))))
> > ! 		  % TYPE_VECTOR_SUBPARTS (vectype) != 0)
> 
> Parentheses here look wrong.  Should be one fewer ending paren on the "*
> GROUP_SIZE" line and one more on the following line, right?

Yeah, though it shouldn't matter in practice.  I'll fix things up
before committing.

Richard.

> Bill
> 
> > ! 	    ;
> > ! 	  else if (!loop_vinfo
> > ! 		   || (nested_in_vect_loop
> > ! 		       && (TREE_INT_CST_LOW (DR_STEP (dr))
> > ! 			   != GET_MODE_SIZE (TYPE_MODE (vectype)))))
> >   	    return dr_explicit_realign;
> >   	  else
> >   	    return dr_explicit_realign_optimized;
> > Index: gcc/testsuite/gcc.dg/vect/O3-pr70130.c
> > ===================================================================
> > *** gcc/testsuite/gcc.dg/vect/O3-pr70130.c	(revision 0)
> > --- gcc/testsuite/gcc.dg/vect/O3-pr70130.c	(working copy)
> > ***************
> > *** 0 ****
> > --- 1,94 ----
> > + /* { dg-do run } */
> > + /* { dg-require-effective-target vsx_hw { target powerpc*-*-* } } */
> > + /* { dg-additional-options "-mcpu=power7" { target powerpc*-*-* } } */
> > + 
> > + struct foo
> > + {
> > +   short a[3][16][16];
> > +   short pad;
> > + } images[8];
> > + 
> > + void __attribute__ ((noinline, noclone))
> > + Loop_err (struct foo *img, const int s[16][2], int s0)
> > + {
> > +   int i, j;
> > + 
> > +   for (j = 0; j < 16; j++)
> > +     {
> > +       for (i=0; i < 16; i++)
> > + 	{
> > + 	  img->a[0][j][i] = s[i][0];
> > + 	  img->a[1][j][i] = s[j][1];
> > + 	  img->a[2][j][i] = s0;
> > + 	}
> > +     }
> > + }
> > + 
> > + const int s[16][2] = { { 1, 16 }, { 2, 15 }, { 3, 14 }, { 4, 13 },
> > + 		       { 5, 12 }, { 6, 11 }, { 7, 10 }, { 8, 9 },
> > + 		       { 9, 8 }, { 10, 7 }, { 11, 6 }, { 12, 5 },
> > + 		       { 13, 4 }, { 14, 3 }, { 15, 2 }, { 16, 1 } };
> > + const struct foo expected
> > + = { { { { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
> > + 	{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
> > + 	{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
> > + 	{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
> > + 	{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
> > + 	{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
> > + 	{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
> > + 	{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
> > + 	{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
> > + 	{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
> > + 	{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
> > + 	{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
> > + 	{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
> > + 	{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
> > + 	{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
> > + 	{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 } },
> > +       { { 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16 },
> > + 	{ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15 },
> > + 	{ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14 },
> > + 	{ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13 },
> > + 	{ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12 },
> > + 	{ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11 },
> > + 	{ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10 },
> > + 	{ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9 },
> > + 	{ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 },
> > + 	{ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 },
> > + 	{ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 },
> > + 	{ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 },
> > + 	{ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 },
> > + 	{ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 },
> > + 	{ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
> > + 	{ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 } },
> > +       { { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
> > + 	{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
> > + 	{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
> > + 	{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
> > + 	{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
> > + 	{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
> > + 	{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
> > + 	{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
> > + 	{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
> > + 	{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
> > + 	{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
> > + 	{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
> > + 	{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
> > + 	{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
> > + 	{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
> > + 	{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 } } },
> > +     0 };
> > + 
> > + int
> > + main (void)
> > + {
> > +   int i;
> > + 
> > +   for (i = 0; i < 8; i++)
> > +     Loop_err (images + i, s, -1);
> > + 
> > +   for (i = 0; i < 8; i++)
> > +     if (__builtin_memcmp (&expected, images + i, sizeof (expected)))
> > +       __builtin_abort ();
> > +   return 0;
> > + }
> > 
> 
> 
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2016-04-14 13:19 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-04-14 11:22 [PATCH] Fix PR70130 Richard Biener
2016-04-14 13:09 ` Bill Schmidt
2016-04-14 13:19   ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).