* [PATCH] Lift restrictions on SLP permutation for loop vect
@ 2015-06-03 12:01 Richard Biener
2015-06-08 13:03 ` Richard Biener
0 siblings, 1 reply; 7+ messages in thread
From: Richard Biener @ 2015-06-03 12:01 UTC (permalink / raw)
To: gcc-patches
This allows all permutations we can generate (according to the target).
Bootstrap and regtest pending on x86_64-unknown-linux-gnu.
Richard.
2015-06-03 Richard Biener <rguenther@suse.de>
* tree-vect-stmts.c (vectorizable_load): Compute the pointer
adjustment for gaps at the end of a SLP load group properly.
* tree-vect-slp.c (vect_supported_load_permutation_p): Allow
all permutations we can generate.
* gcc.dg/vect/slp-perm-10.c: New testcase.
* gcc.dg/vect/slp-23.c: Adjust.
Index: gcc/tree-vect-stmts.c
===================================================================
*** gcc/tree-vect-stmts.c (revision 224061)
--- gcc/tree-vect-stmts.c (working copy)
*************** vectorizable_load (gimple stmt, gimple_s
*** 5807,5813 ****
gimple ptr_incr = NULL;
int nunits = TYPE_VECTOR_SUBPARTS (vectype);
int ncopies;
! int i, j, group_size = -1, group_gap;
tree msq = NULL_TREE, lsq;
tree offset = NULL_TREE;
tree byte_offset = NULL_TREE;
--- 5807,5813 ----
gimple ptr_incr = NULL;
int nunits = TYPE_VECTOR_SUBPARTS (vectype);
int ncopies;
! int i, j, group_size = -1, group_gap_adj;
tree msq = NULL_TREE, lsq;
tree offset = NULL_TREE;
tree byte_offset = NULL_TREE;
*************** vectorizable_load (gimple stmt, gimple_s
*** 6402,6413 ****
{
grouped_load = false;
vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
! group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
}
else
{
vec_num = group_size;
! group_gap = 0;
}
}
else
--- 6402,6413 ----
{
grouped_load = false;
vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
! group_gap_adj = vf * group_size - nunits * vec_num;
}
else
{
vec_num = group_size;
! group_gap_adj = 0;
}
}
else
*************** vectorizable_load (gimple stmt, gimple_s
*** 6415,6421 ****
first_stmt = stmt;
first_dr = dr;
group_size = vec_num = 1;
! group_gap = 0;
}
alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
--- 6415,6421 ----
first_stmt = stmt;
first_dr = dr;
group_size = vec_num = 1;
! group_gap_adj = 0;
}
alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
*************** vectorizable_load (gimple stmt, gimple_s
*** 6832,6842 ****
SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
}
/* Bump the vector pointer to account for a gap. */
! if (slp && group_gap != 0)
{
tree bump = size_binop (MULT_EXPR,
TYPE_SIZE_UNIT (elem_type),
! size_int (group_gap));
dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
stmt, bump);
}
--- 6832,6842 ----
SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
}
/* Bump the vector pointer to account for a gap. */
! if (group_gap_adj != 0)
{
tree bump = size_binop (MULT_EXPR,
TYPE_SIZE_UNIT (elem_type),
! size_int (group_gap_adj));
dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
stmt, bump);
}
Index: gcc/tree-vect-slp.c
===================================================================
*** gcc/tree-vect-slp.c (revision 224061)
--- gcc/tree-vect-slp.c (working copy)
*************** vect_supported_load_permutation_p (slp_i
*** 1506,1552 ****
return true;
}
! /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
! GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
! well (unless it's reduction). */
! if (SLP_INSTANCE_LOADS (slp_instn).length () != group_size)
! return false;
! FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
! if (!node->load_permutation.exists ())
! return false;
!
! load_index = sbitmap_alloc (group_size);
! bitmap_clear (load_index);
! FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
! {
! unsigned int lidx = node->load_permutation[0];
! if (bitmap_bit_p (load_index, lidx))
! {
! sbitmap_free (load_index);
! return false;
! }
! bitmap_set_bit (load_index, lidx);
! FOR_EACH_VEC_ELT (node->load_permutation, j, k)
! if (k != lidx)
! {
! sbitmap_free (load_index);
! return false;
! }
! }
! for (i = 0; i < group_size; i++)
! if (!bitmap_bit_p (load_index, i))
! {
! sbitmap_free (load_index);
! return false;
! }
! sbitmap_free (load_index);
!
FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
if (node->load_permutation.exists ()
&& !vect_transform_slp_perm_load
(node, vNULL, NULL,
SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true))
return false;
return true;
}
--- 1504,1517 ----
return true;
}
! /* For loop vectorization verify we can generate the permutation. */
FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
if (node->load_permutation.exists ()
&& !vect_transform_slp_perm_load
(node, vNULL, NULL,
SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true))
return false;
+
return true;
}
Index: gcc/testsuite/gcc.dg/vect/slp-23.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/slp-23.c (revision 224061)
--- gcc/testsuite/gcc.dg/vect/slp-23.c (working copy)
*************** int main (void)
*** 108,112 ****
/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target { vect_strided8 && { ! { vect_no_align} } } } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! { vect_strided8 || vect_no_align } } } } } */
! /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */
--- 108,113 ----
/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target { vect_strided8 && { ! { vect_no_align} } } } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! { vect_strided8 || vect_no_align } } } } } */
! /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { ! vect_perm } } } } */
! /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_perm } } } */
Index: gcc/testsuite/gcc.dg/vect/slp-perm-10.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/slp-perm-10.c (revision 0)
--- gcc/testsuite/gcc.dg/vect/slp-perm-10.c (working copy)
***************
*** 0 ****
--- 1,53 ----
+ /* { dg-require-effective-target vect_int } */
+
+ #include "tree-vect.h"
+
+ int a[256], b[256];
+
+ void __attribute__((noinline))
+ foo (void)
+ {
+ int i;
+ for (i = 0; i < 32; ++i)
+ {
+ b[i*8+0] = a[i*8+0];
+ b[i*8+1] = a[i*8+0];
+ b[i*8+2] = a[i*8+3];
+ b[i*8+3] = a[i*8+3];
+ b[i*8+4] = a[i*8+4];
+ b[i*8+5] = a[i*8+6];
+ b[i*8+6] = a[i*8+4];
+ b[i*8+7] = a[i*8+6];
+ }
+ }
+
+ int main ()
+ {
+ int i;
+
+ check_vect ();
+
+ for (i = 0; i < 256; ++i)
+ {
+ a[i] = i;
+ __asm__ volatile ("");
+ }
+
+ foo ();
+
+ for (i = 0; i < 32; ++i)
+ if (b[i*8+0] != i*8+0
+ || b[i*8+1] != i*8+0
+ || b[i*8+2] != i*8+3
+ || b[i*8+3] != i*8+3
+ || b[i*8+4] != i*8+4
+ || b[i*8+5] != i*8+6
+ || b[i*8+6] != i*8+4
+ || b[i*8+7] != i*8+6)
+ abort ();
+
+ return 0;
+ }
+
+ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */
+ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm } } } */
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] Lift restrictions on SLP permutation for loop vect
2015-06-03 12:01 [PATCH] Lift restrictions on SLP permutation for loop vect Richard Biener
@ 2015-06-08 13:03 ` Richard Biener
0 siblings, 0 replies; 7+ messages in thread
From: Richard Biener @ 2015-06-08 13:03 UTC (permalink / raw)
To: gcc-patches
On Wed, 3 Jun 2015, Richard Biener wrote:
>
> This allows all permutations we can generate (according to the target).
>
> Bootstrap and regtest pending on x86_64-unknown-linux-gnu.
So this turned up other issues thus the following is what I have
committed after bootstrapping and testing on x86_64-unknown-linux-gnu.
Richard.
2015-06-08 Richard Biener <rguenther@suse.de>
* tree-vect-stmts.c (vectorizable_load): Compute the pointer
adjustment for gaps at the end of a SLP load group properly.
* tree-vect-slp.c (vect_supported_load_permutation_p): Allow
all permutations we can generate.
(vect_transform_slp_perm_load): Use the correct group-size.
* gcc.dg/vect/slp-perm-10.c: New testcase.
* gcc.dg/vect/slp-23.c: Adjust.
* gcc.dg/torture/pr53366-2.c: Also verify cross-iteration
vector pointer update.
Index: gcc/tree-vect-stmts.c
===================================================================
*** gcc/tree-vect-stmts.c (revision 224077)
--- gcc/tree-vect-stmts.c (working copy)
*************** vectorizable_load (gimple stmt, gimple_s
*** 5807,5813 ****
gimple ptr_incr = NULL;
int nunits = TYPE_VECTOR_SUBPARTS (vectype);
int ncopies;
! int i, j, group_size = -1, group_gap;
tree msq = NULL_TREE, lsq;
tree offset = NULL_TREE;
tree byte_offset = NULL_TREE;
--- 5807,5813 ----
gimple ptr_incr = NULL;
int nunits = TYPE_VECTOR_SUBPARTS (vectype);
int ncopies;
! int i, j, group_size = -1, group_gap_adj;
tree msq = NULL_TREE, lsq;
tree offset = NULL_TREE;
tree byte_offset = NULL_TREE;
*************** vectorizable_load (gimple stmt, gimple_s
*** 6396,6421 ****
}
first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
/* VEC_NUM is the number of vect stmts to be created for this group. */
if (slp)
{
grouped_load = false;
vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
! group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
}
else
! {
! vec_num = group_size;
! group_gap = 0;
! }
}
else
{
first_stmt = stmt;
first_dr = dr;
group_size = vec_num = 1;
! group_gap = 0;
}
alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
--- 6396,6419 ----
}
first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
+ group_gap_adj = 0;
/* VEC_NUM is the number of vect stmts to be created for this group. */
if (slp)
{
grouped_load = false;
vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
! group_gap_adj = vf * group_size - nunits * vec_num;
}
else
! vec_num = group_size;
}
else
{
first_stmt = stmt;
first_dr = dr;
group_size = vec_num = 1;
! group_gap_adj = 0;
}
alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
*************** vectorizable_load (gimple stmt, gimple_s
*** 6831,6842 ****
if (slp && !slp_perm)
SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
}
! /* Bump the vector pointer to account for a gap. */
! if (slp && group_gap != 0)
{
! tree bump = size_binop (MULT_EXPR,
! TYPE_SIZE_UNIT (elem_type),
! size_int (group_gap));
dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
stmt, bump);
}
--- 6829,6843 ----
if (slp && !slp_perm)
SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
}
! /* Bump the vector pointer to account for a gap or for excess
! elements loaded for a permuted SLP load. */
! if (group_gap_adj != 0)
{
! bool ovf;
! tree bump
! = wide_int_to_tree (sizetype,
! wi::smul (TYPE_SIZE_UNIT (elem_type),
! group_gap_adj, &ovf));
dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
stmt, bump);
}
Index: gcc/tree-vect-slp.c
===================================================================
*** gcc/tree-vect-slp.c (revision 224077)
--- gcc/tree-vect-slp.c (working copy)
*************** vect_supported_load_permutation_p (slp_i
*** 1502,1548 ****
return true;
}
! /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
! GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
! well (unless it's reduction). */
! if (SLP_INSTANCE_LOADS (slp_instn).length () != group_size)
! return false;
! FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
! if (!node->load_permutation.exists ())
! return false;
!
! load_index = sbitmap_alloc (group_size);
! bitmap_clear (load_index);
! FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
! {
! unsigned int lidx = node->load_permutation[0];
! if (bitmap_bit_p (load_index, lidx))
! {
! sbitmap_free (load_index);
! return false;
! }
! bitmap_set_bit (load_index, lidx);
! FOR_EACH_VEC_ELT (node->load_permutation, j, k)
! if (k != lidx)
! {
! sbitmap_free (load_index);
! return false;
! }
! }
! for (i = 0; i < group_size; i++)
! if (!bitmap_bit_p (load_index, i))
! {
! sbitmap_free (load_index);
! return false;
! }
! sbitmap_free (load_index);
!
FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
if (node->load_permutation.exists ()
&& !vect_transform_slp_perm_load
(node, vNULL, NULL,
SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true))
return false;
return true;
}
--- 1517,1530 ----
return true;
}
! /* For loop vectorization verify we can generate the permutation. */
FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
if (node->load_permutation.exists ()
&& !vect_transform_slp_perm_load
(node, vNULL, NULL,
SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true))
return false;
+
return true;
}
*************** vect_transform_slp_perm_load (slp_tree n
*** 3287,3292 ****
--- 3269,3276 ----
if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
return false;
+ stmt_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
+
/* Generate permutation masks for every NODE. Number of masks for each NODE
is equal to GROUP_SIZE.
E.g., we have a group of three nodes with three loads from the same
*************** vect_transform_slp_perm_load (slp_tree n
*** 3321,3327 ****
for (k = 0; k < group_size; k++)
{
i = SLP_TREE_LOAD_PERMUTATION (node)[k];
! first_mask_element = i + j * group_size;
if (!vect_get_mask_element (stmt, first_mask_element, 0,
nunits, only_one_vec, index,
mask, ¤t_mask_element,
--- 3305,3311 ----
for (k = 0; k < group_size; k++)
{
i = SLP_TREE_LOAD_PERMUTATION (node)[k];
! first_mask_element = i + j * STMT_VINFO_GROUP_SIZE (stmt_info);
if (!vect_get_mask_element (stmt, first_mask_element, 0,
nunits, only_one_vec, index,
mask, ¤t_mask_element,
Index: gcc/testsuite/gcc.dg/vect/slp-perm-10.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/slp-perm-10.c (revision 0)
--- gcc/testsuite/gcc.dg/vect/slp-perm-10.c (working copy)
***************
*** 0 ****
--- 1,53 ----
+ /* { dg-require-effective-target vect_int } */
+
+ #include "tree-vect.h"
+
+ int a[256], b[256];
+
+ void __attribute__((noinline))
+ foo (void)
+ {
+ int i;
+ for (i = 0; i < 32; ++i)
+ {
+ b[i*8+0] = a[i*8+0];
+ b[i*8+1] = a[i*8+0];
+ b[i*8+2] = a[i*8+3];
+ b[i*8+3] = a[i*8+3];
+ b[i*8+4] = a[i*8+4];
+ b[i*8+5] = a[i*8+6];
+ b[i*8+6] = a[i*8+4];
+ b[i*8+7] = a[i*8+6];
+ }
+ }
+
+ int main ()
+ {
+ int i;
+
+ check_vect ();
+
+ for (i = 0; i < 256; ++i)
+ {
+ a[i] = i;
+ __asm__ volatile ("");
+ }
+
+ foo ();
+
+ for (i = 0; i < 32; ++i)
+ if (b[i*8+0] != i*8+0
+ || b[i*8+1] != i*8+0
+ || b[i*8+2] != i*8+3
+ || b[i*8+3] != i*8+3
+ || b[i*8+4] != i*8+4
+ || b[i*8+5] != i*8+6
+ || b[i*8+6] != i*8+4
+ || b[i*8+7] != i*8+6)
+ abort ();
+
+ return 0;
+ }
+
+ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */
+ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm } } } */
Index: gcc/testsuite/gcc.dg/vect/slp-23.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/slp-23.c (revision 224077)
--- gcc/testsuite/gcc.dg/vect/slp-23.c (working copy)
*************** int main (void)
*** 108,112 ****
/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target { vect_strided8 && { ! { vect_no_align} } } } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! { vect_strided8 || vect_no_align } } } } } */
! /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */
--- 108,113 ----
/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target { vect_strided8 && { ! { vect_no_align} } } } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! { vect_strided8 || vect_no_align } } } } } */
! /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { ! vect_perm } } } } */
! /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_perm } } } */
Index: gcc/testsuite/gcc.dg/torture/pr53366-2.c
===================================================================
*** gcc/testsuite/gcc.dg/torture/pr53366-2.c (revision 224077)
--- gcc/testsuite/gcc.dg/torture/pr53366-2.c (working copy)
***************
*** 4,21 ****
extern void abort (void);
struct T { float r[3], i[3]; };
! struct U { struct T j[2]; };
void __attribute__ ((noinline))
foo (struct U *__restrict y, const float _Complex *__restrict x)
{
int i, j;
! for (j = 0; j < 2; ++j)
{
float a = __real__ x[j];
float b = __imag__ x[j];
! float c = __real__ x[j + 2];
! float d = __imag__ x[j + 2];
for (i = 0; i < 3; ++i)
{
y->j[j].r[i] = y->j[j].r[i] + a + c;
--- 4,21 ----
extern void abort (void);
struct T { float r[3], i[3]; };
! struct U { struct T j[4]; };
void __attribute__ ((noinline))
foo (struct U *__restrict y, const float _Complex *__restrict x)
{
int i, j;
! for (j = 0; j < 4; ++j)
{
float a = __real__ x[j];
float b = __imag__ x[j];
! float c = __real__ x[j + 4];
! float d = __imag__ x[j + 4];
for (i = 0; i < 3; ++i)
{
y->j[j].r[i] = y->j[j].r[i] + a + c;
*************** foo (struct U *__restrict y, const float
*** 24,43 ****
}
}
! _Complex float x[4];
struct U y;
int
main ()
{
int i, j;
! for (i = 0; i < 4; ++i)
! x[i] = i + 1.0iF * (2 * i);
foo (&y, x);
! for (j = 0; j < 2; ++j)
for (i = 0; i < 3; ++i)
! if (y.j[j].r[i] != __real__ (x[j] + x[j + 2])
! || y.j[j].i[i] != __imag__ (x[j] + x[j + 2]))
__builtin_abort ();
return 0;
}
--- 24,46 ----
}
}
! _Complex float x[8];
struct U y;
int
main ()
{
int i, j;
! for (i = 0; i < 8; ++i)
! {
! x[i] = i + 1.0iF * (2 * i);
! __asm__ volatile ("");
! }
foo (&y, x);
! for (j = 0; j < 4; ++j)
for (i = 0; i < 3; ++i)
! if (y.j[j].r[i] != __real__ (x[j] + x[j + 4])
! || y.j[j].i[i] != __imag__ (x[j] + x[j + 4]))
__builtin_abort ();
return 0;
}
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] Lift restrictions on SLP permutation for loop vect
2015-06-11 14:30 ` Richard Biener
@ 2015-06-11 14:34 ` James Greenhalgh
0 siblings, 0 replies; 7+ messages in thread
From: James Greenhalgh @ 2015-06-11 14:34 UTC (permalink / raw)
To: Richard Biener; +Cc: Uros Bizjak, gcc-patches
On Thu, Jun 11, 2015 at 03:23:21PM +0100, Richard Biener wrote:
> I will have a look next week - mind opening a bugreport for this so
> I dont' forget?
Of course, I've opened https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66510 .
Cheers,
James
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] Lift restrictions on SLP permutation for loop vect
2015-06-11 14:23 ` James Greenhalgh
@ 2015-06-11 14:30 ` Richard Biener
2015-06-11 14:34 ` James Greenhalgh
0 siblings, 1 reply; 7+ messages in thread
From: Richard Biener @ 2015-06-11 14:30 UTC (permalink / raw)
To: James Greenhalgh; +Cc: Uros Bizjak, gcc-patches
On Thu, 11 Jun 2015, James Greenhalgh wrote:
> On Thu, Jun 11, 2015 at 03:08:59PM +0100, Richard Biener wrote:
> > On Thu, 11 Jun 2015, Uros Bizjak wrote:
> >
> > > > So this turned up other issues thus the following is what I have
> > > > committed after bootstrapping and testing on x86_64-unknown-linux-gnu.
> > > >
> > > > Richard.
> > > >
> > > > 2015-06-08 Richard Biener <rguenther@suse.de>
> > > >
> > > > * tree-vect-stmts.c (vectorizable_load): Compute the pointer
> > > > adjustment for gaps at the end of a SLP load group properly.
> > > > * tree-vect-slp.c (vect_supported_load_permutation_p): Allow
> > > > all permutations we can generate.
> > > > (vect_transform_slp_perm_load): Use the correct group-size.
> > > >
> > > > * gcc.dg/vect/slp-perm-10.c: New testcase.
> > > > * gcc.dg/vect/slp-23.c: Adjust.
> > > > * gcc.dg/torture/pr53366-2.c: Also verify cross-iteration vector pointer update.
> > >
> > > This patch caused:
> > >
> > > FAIL: gcc.target/i386/pr61403.c scan-assembler blend
> >
> > Yeah, I noticed. We now want to vectorize this differently but
> > fail due to the cost model. I'm working on enhancing the vectorizer
> > here.
>
> It also caused an ICE in the ARM port (arm-none-eabi,
> arm-none-linux-gnueabihf):
>
> FAIL: gcc.target/arm/pr53636.c (internal compiler error)
>
> Full ICE text below, and reduced testcase attached, compile with:
>
> arm-none-eabi-gcc -O -ftree-vectorize -mfpu=neon -mcpu=cortex-a9 bug.c
>
> I tried to take a look to see what was happening, but I couldn't see
> the root of the problem. The access to dr_chain in
> vect_create_mask_and_perm:
>
> second_vec = dr_chain[second_vec_indx];
>
> Fails as dr_chain has length 1, and second_vec_indx is 2.
>
> I think that the mask that the code is trying to produce is { 1, 2, 3, 4 }.
>
> bug.c:4:3: note: add new stmt: vect__8.6_108 = VEC_PERM_EXPR <vect__8.4_104, vect__8.5_106, { 1, 2, 3, 4 }>;
>
> But that's about as far as I got.
I will have a look next week - mind opening a bugreport for this so
I dont' forget?
Thanks,
Richard.
> Thanks,
> James
>
> ---
> bug.c: In function 'test':
> bug.c:1:6: internal compiler error: in operator[], at vec.h:738
> void test(unsigned char *dst) {
> ^
> 0xd759fe vec<tree_node*, va_heap, vl_embed>::operator[](unsigned int)
> .../src/gcc/gcc/vec.h:738
> 0xd759fe vec<tree_node*, va_heap, vl_ptr>::operator[](unsigned int)
> .../src/gcc/gcc/vec.h:1204
> 0xd759fe vect_create_mask_and_perm
> .../src/gcc/gcc/tree-vect-slp.c:3072
> 0xd759fe vect_transform_slp_perm_load(_slp_tree*, vec<tree_node*, va_heap, vl_ptr>, gimple_stmt_iterator*, int, _slp_instance*, bool)
> .../src/gcc/gcc/tree-vect-slp.c:3350
> 0xd51613 vectorizable_load
> .../src/gcc/gcc/tree-vect-stmts.c:6847
> 0xd57ad2 vect_transform_stmt(gimple_statement_base*, gimple_stmt_iterator*, bool*, _slp_tree*, _slp_instance*)
> .../src/gcc/gcc/tree-vect-stmts.c:7490
> 0xd7aac1 vect_schedule_slp_instance
> .../src/gcc/gcc/tree-vect-slp.c:3500
> 0xd7a117 vect_schedule_slp_instance
> .../src/gcc/gcc/tree-vect-slp.c:3381
> 0xd7a117 vect_schedule_slp_instance
> .../src/gcc/gcc/tree-vect-slp.c:3381
> 0xd7a117 vect_schedule_slp_instance
> .../src/gcc/gcc/tree-vect-slp.c:3381
> 0xd7a117 vect_schedule_slp_instance
> .../src/gcc/gcc/tree-vect-slp.c:3381
> 0xd7a117 vect_schedule_slp_instance
> .../src/gcc/gcc/tree-vect-slp.c:3381
> 0xd7a117 vect_schedule_slp_instance
> .../src/gcc/gcc/tree-vect-slp.c:3381
> 0xd7a117 vect_schedule_slp_instance
> .../src/gcc/gcc/tree-vect-slp.c:3381
> 0xd7abce vect_schedule_slp(_loop_vec_info*, _bb_vec_info*)
> .../src/gcc/gcc/tree-vect-slp.c:3570
> 0xd5e564 vect_transform_loop(_loop_vec_info*)
> .../src/gcc/gcc/tree-vect-loop.c:6223
> 0xd7eca8 vectorize_loops()
> .../src/gcc/gcc/tree-vectorizer.c:499
> 0xc88c54 execute
> .../src/gcc/gcc/tree-ssa-loop.c:292
> Please submit a full bug report,
> with preprocessed source if appropriate.
> Please include the complete backtrace with any bug report.
> See <http://gcc.gnu.org/bugs.html> for instructions.
>
>
--
Richard Biener <rguenther@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Dilip Upmanyu, Graham Norton, HRB 21284 (AG Nuernberg)
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] Lift restrictions on SLP permutation for loop vect
2015-06-11 14:20 ` Richard Biener
@ 2015-06-11 14:23 ` James Greenhalgh
2015-06-11 14:30 ` Richard Biener
0 siblings, 1 reply; 7+ messages in thread
From: James Greenhalgh @ 2015-06-11 14:23 UTC (permalink / raw)
To: Richard Biener; +Cc: Uros Bizjak, gcc-patches
[-- Attachment #1: Type: text/plain, Size: 3723 bytes --]
On Thu, Jun 11, 2015 at 03:08:59PM +0100, Richard Biener wrote:
> On Thu, 11 Jun 2015, Uros Bizjak wrote:
>
> > > So this turned up other issues thus the following is what I have
> > > committed after bootstrapping and testing on x86_64-unknown-linux-gnu.
> > >
> > > Richard.
> > >
> > > 2015-06-08 Richard Biener <rguenther@suse.de>
> > >
> > > * tree-vect-stmts.c (vectorizable_load): Compute the pointer
> > > adjustment for gaps at the end of a SLP load group properly.
> > > * tree-vect-slp.c (vect_supported_load_permutation_p): Allow
> > > all permutations we can generate.
> > > (vect_transform_slp_perm_load): Use the correct group-size.
> > >
> > > * gcc.dg/vect/slp-perm-10.c: New testcase.
> > > * gcc.dg/vect/slp-23.c: Adjust.
> > > * gcc.dg/torture/pr53366-2.c: Also verify cross-iteration vector pointer update.
> >
> > This patch caused:
> >
> > FAIL: gcc.target/i386/pr61403.c scan-assembler blend
>
> Yeah, I noticed. We now want to vectorize this differently but
> fail due to the cost model. I'm working on enhancing the vectorizer
> here.
It also caused an ICE in the ARM port (arm-none-eabi,
arm-none-linux-gnueabihf):
FAIL: gcc.target/arm/pr53636.c (internal compiler error)
Full ICE text below, and reduced testcase attached, compile with:
arm-none-eabi-gcc -O -ftree-vectorize -mfpu=neon -mcpu=cortex-a9 bug.c
I tried to take a look to see what was happening, but I couldn't see
the root of the problem. The access to dr_chain in
vect_create_mask_and_perm:
second_vec = dr_chain[second_vec_indx];
Fails as dr_chain has length 1, and second_vec_indx is 2.
I think that the mask that the code is trying to produce is { 1, 2, 3, 4 }.
bug.c:4:3: note: add new stmt: vect__8.6_108 = VEC_PERM_EXPR <vect__8.4_104, vect__8.5_106, { 1, 2, 3, 4 }>;
But that's about as far as I got.
Thanks,
James
---
bug.c: In function 'test':
bug.c:1:6: internal compiler error: in operator[], at vec.h:738
void test(unsigned char *dst) {
^
0xd759fe vec<tree_node*, va_heap, vl_embed>::operator[](unsigned int)
.../src/gcc/gcc/vec.h:738
0xd759fe vec<tree_node*, va_heap, vl_ptr>::operator[](unsigned int)
.../src/gcc/gcc/vec.h:1204
0xd759fe vect_create_mask_and_perm
.../src/gcc/gcc/tree-vect-slp.c:3072
0xd759fe vect_transform_slp_perm_load(_slp_tree*, vec<tree_node*, va_heap, vl_ptr>, gimple_stmt_iterator*, int, _slp_instance*, bool)
.../src/gcc/gcc/tree-vect-slp.c:3350
0xd51613 vectorizable_load
.../src/gcc/gcc/tree-vect-stmts.c:6847
0xd57ad2 vect_transform_stmt(gimple_statement_base*, gimple_stmt_iterator*, bool*, _slp_tree*, _slp_instance*)
.../src/gcc/gcc/tree-vect-stmts.c:7490
0xd7aac1 vect_schedule_slp_instance
.../src/gcc/gcc/tree-vect-slp.c:3500
0xd7a117 vect_schedule_slp_instance
.../src/gcc/gcc/tree-vect-slp.c:3381
0xd7a117 vect_schedule_slp_instance
.../src/gcc/gcc/tree-vect-slp.c:3381
0xd7a117 vect_schedule_slp_instance
.../src/gcc/gcc/tree-vect-slp.c:3381
0xd7a117 vect_schedule_slp_instance
.../src/gcc/gcc/tree-vect-slp.c:3381
0xd7a117 vect_schedule_slp_instance
.../src/gcc/gcc/tree-vect-slp.c:3381
0xd7a117 vect_schedule_slp_instance
.../src/gcc/gcc/tree-vect-slp.c:3381
0xd7a117 vect_schedule_slp_instance
.../src/gcc/gcc/tree-vect-slp.c:3381
0xd7abce vect_schedule_slp(_loop_vec_info*, _bb_vec_info*)
.../src/gcc/gcc/tree-vect-slp.c:3570
0xd5e564 vect_transform_loop(_loop_vec_info*)
.../src/gcc/gcc/tree-vect-loop.c:6223
0xd7eca8 vectorize_loops()
.../src/gcc/gcc/tree-vectorizer.c:499
0xc88c54 execute
.../src/gcc/gcc/tree-ssa-loop.c:292
Please submit a full bug report,
with preprocessed source if appropriate.
Please include the complete backtrace with any bug report.
See <http://gcc.gnu.org/bugs.html> for instructions.
[-- Attachment #2: bug.c --]
[-- Type: text/x-csrc, Size: 775 bytes --]
void test(unsigned char *dst) {
short tmp[11 * 8], *tptr;
int i;
for (i = 0; i < 8; i++)
{
dst[0] = (-tptr[0] + 9 * tptr[0 + 1] + 9 * tptr[0 + 2] - tptr[0 + 3]) >> 7;
dst[1] = (-tptr[1] + 9 * tptr[1 + 1] + 9 * tptr[1 + 2] - tptr[1 + 3]) >> 7;
dst[2] = (-tptr[2] + 9 * tptr[2 + 1] + 9 * tptr[2 + 2] - tptr[2 + 3]) >> 7;
dst[3] = (-tptr[3] + 9 * tptr[3 + 1] + 9 * tptr[3 + 2] - tptr[3 + 3]) >> 7;
dst[4] = (-tptr[4] + 9 * tptr[4 + 1] + 9 * tptr[4 + 2] - tptr[4 + 3]) >> 7;
dst[5] = (-tptr[5] + 9 * tptr[5 + 1] + 9 * tptr[5 + 2] - tptr[5 + 3]) >> 7;
dst[6] = (-tptr[6] + 9 * tptr[6 + 1] + 9 * tptr[6 + 2] - tptr[6 + 3]) >> 7;
dst[7] = (-tptr[7] + 9 * tptr[7 + 1] + 9 * tptr[7 + 2] - tptr[7 + 3]) >> 7;
dst += 8;
tptr += 11;
}
}
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] Lift restrictions on SLP permutation for loop vect
2015-06-11 14:09 Uros Bizjak
@ 2015-06-11 14:20 ` Richard Biener
2015-06-11 14:23 ` James Greenhalgh
0 siblings, 1 reply; 7+ messages in thread
From: Richard Biener @ 2015-06-11 14:20 UTC (permalink / raw)
To: Uros Bizjak; +Cc: gcc-patches
On Thu, 11 Jun 2015, Uros Bizjak wrote:
> > So this turned up other issues thus the following is what I have
> > committed after bootstrapping and testing on x86_64-unknown-linux-gnu.
> >
> > Richard.
> >
> > 2015-06-08 Richard Biener <rguenther@suse.de>
> >
> > * tree-vect-stmts.c (vectorizable_load): Compute the pointer
> > adjustment for gaps at the end of a SLP load group properly.
> > * tree-vect-slp.c (vect_supported_load_permutation_p): Allow
> > all permutations we can generate.
> > (vect_transform_slp_perm_load): Use the correct group-size.
> >
> > * gcc.dg/vect/slp-perm-10.c: New testcase.
> > * gcc.dg/vect/slp-23.c: Adjust.
> > * gcc.dg/torture/pr53366-2.c: Also verify cross-iteration vector pointer update.
>
> This patch caused:
>
> FAIL: gcc.target/i386/pr61403.c scan-assembler blend
Yeah, I noticed. We now want to vectorize this differently but
fail due to the cost model. I'm working on enhancing the vectorizer
here.
Richard.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] Lift restrictions on SLP permutation for loop vect
@ 2015-06-11 14:09 Uros Bizjak
2015-06-11 14:20 ` Richard Biener
0 siblings, 1 reply; 7+ messages in thread
From: Uros Bizjak @ 2015-06-11 14:09 UTC (permalink / raw)
To: gcc-patches; +Cc: Richard Biener
> So this turned up other issues thus the following is what I have
> committed after bootstrapping and testing on x86_64-unknown-linux-gnu.
>
> Richard.
>
> 2015-06-08 Richard Biener <rguenther@suse.de>
>
> * tree-vect-stmts.c (vectorizable_load): Compute the pointer
> adjustment for gaps at the end of a SLP load group properly.
> * tree-vect-slp.c (vect_supported_load_permutation_p): Allow
> all permutations we can generate.
> (vect_transform_slp_perm_load): Use the correct group-size.
>
> * gcc.dg/vect/slp-perm-10.c: New testcase.
> * gcc.dg/vect/slp-23.c: Adjust.
> * gcc.dg/torture/pr53366-2.c: Also verify cross-iteration vector pointer update.
This patch caused:
FAIL: gcc.target/i386/pr61403.c scan-assembler blend
Uros.
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2015-06-11 14:30 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-06-03 12:01 [PATCH] Lift restrictions on SLP permutation for loop vect Richard Biener
2015-06-08 13:03 ` Richard Biener
2015-06-11 14:09 Uros Bizjak
2015-06-11 14:20 ` Richard Biener
2015-06-11 14:23 ` James Greenhalgh
2015-06-11 14:30 ` Richard Biener
2015-06-11 14:34 ` James Greenhalgh
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).