* [PATCH] Lift restrictions on SLP permutation for loop vect @ 2015-06-03 12:01 Richard Biener 2015-06-08 13:03 ` Richard Biener 0 siblings, 1 reply; 7+ messages in thread From: Richard Biener @ 2015-06-03 12:01 UTC (permalink / raw) To: gcc-patches This allows all permutations we can generate (according to the target). Bootstrap and regtest pending on x86_64-unknown-linux-gnu. Richard. 2015-06-03 Richard Biener <rguenther@suse.de> * tree-vect-stmts.c (vectorizable_load): Compute the pointer adjustment for gaps at the end of a SLP load group properly. * tree-vect-slp.c (vect_supported_load_permutation_p): Allow all permutations we can generate. * gcc.dg/vect/slp-perm-10.c: New testcase. * gcc.dg/vect/slp-23.c: Adjust. Index: gcc/tree-vect-stmts.c =================================================================== *** gcc/tree-vect-stmts.c (revision 224061) --- gcc/tree-vect-stmts.c (working copy) *************** vectorizable_load (gimple stmt, gimple_s *** 5807,5813 **** gimple ptr_incr = NULL; int nunits = TYPE_VECTOR_SUBPARTS (vectype); int ncopies; ! int i, j, group_size = -1, group_gap; tree msq = NULL_TREE, lsq; tree offset = NULL_TREE; tree byte_offset = NULL_TREE; --- 5807,5813 ---- gimple ptr_incr = NULL; int nunits = TYPE_VECTOR_SUBPARTS (vectype); int ncopies; ! int i, j, group_size = -1, group_gap_adj; tree msq = NULL_TREE, lsq; tree offset = NULL_TREE; tree byte_offset = NULL_TREE; *************** vectorizable_load (gimple stmt, gimple_s *** 6402,6413 **** { grouped_load = false; vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ! group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt)); } else { vec_num = group_size; ! group_gap = 0; } } else --- 6402,6413 ---- { grouped_load = false; vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ! group_gap_adj = vf * group_size - nunits * vec_num; } else { vec_num = group_size; ! group_gap_adj = 0; } } else *************** vectorizable_load (gimple stmt, gimple_s *** 6415,6421 **** first_stmt = stmt; first_dr = dr; group_size = vec_num = 1; ! group_gap = 0; } alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false); --- 6415,6421 ---- first_stmt = stmt; first_dr = dr; group_size = vec_num = 1; ! group_gap_adj = 0; } alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false); *************** vectorizable_load (gimple stmt, gimple_s *** 6832,6842 **** SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); } /* Bump the vector pointer to account for a gap. */ ! if (slp && group_gap != 0) { tree bump = size_binop (MULT_EXPR, TYPE_SIZE_UNIT (elem_type), ! size_int (group_gap)); dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, bump); } --- 6832,6842 ---- SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); } /* Bump the vector pointer to account for a gap. */ ! if (group_gap_adj != 0) { tree bump = size_binop (MULT_EXPR, TYPE_SIZE_UNIT (elem_type), ! size_int (group_gap_adj)); dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, bump); } Index: gcc/tree-vect-slp.c =================================================================== *** gcc/tree-vect-slp.c (revision 224061) --- gcc/tree-vect-slp.c (working copy) *************** vect_supported_load_permutation_p (slp_i *** 1506,1552 **** return true; } ! /* FORNOW: the only supported permutation is 0..01..1.. of length equal to ! GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as ! well (unless it's reduction). */ ! if (SLP_INSTANCE_LOADS (slp_instn).length () != group_size) ! return false; ! FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) ! if (!node->load_permutation.exists ()) ! return false; ! ! load_index = sbitmap_alloc (group_size); ! bitmap_clear (load_index); ! FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) ! { ! unsigned int lidx = node->load_permutation[0]; ! if (bitmap_bit_p (load_index, lidx)) ! { ! sbitmap_free (load_index); ! return false; ! } ! bitmap_set_bit (load_index, lidx); ! FOR_EACH_VEC_ELT (node->load_permutation, j, k) ! if (k != lidx) ! { ! sbitmap_free (load_index); ! return false; ! } ! } ! for (i = 0; i < group_size; i++) ! if (!bitmap_bit_p (load_index, i)) ! { ! sbitmap_free (load_index); ! return false; ! } ! sbitmap_free (load_index); ! FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) if (node->load_permutation.exists () && !vect_transform_slp_perm_load (node, vNULL, NULL, SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true)) return false; return true; } --- 1504,1517 ---- return true; } ! /* For loop vectorization verify we can generate the permutation. */ FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) if (node->load_permutation.exists () && !vect_transform_slp_perm_load (node, vNULL, NULL, SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true)) return false; + return true; } Index: gcc/testsuite/gcc.dg/vect/slp-23.c =================================================================== *** gcc/testsuite/gcc.dg/vect/slp-23.c (revision 224061) --- gcc/testsuite/gcc.dg/vect/slp-23.c (working copy) *************** int main (void) *** 108,112 **** /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target { vect_strided8 && { ! { vect_no_align} } } } } } */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! { vect_strided8 || vect_no_align } } } } } */ ! /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */ --- 108,113 ---- /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target { vect_strided8 && { ! { vect_no_align} } } } } } */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! { vect_strided8 || vect_no_align } } } } } */ ! /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { ! vect_perm } } } } */ ! /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_perm } } } */ Index: gcc/testsuite/gcc.dg/vect/slp-perm-10.c =================================================================== *** gcc/testsuite/gcc.dg/vect/slp-perm-10.c (revision 0) --- gcc/testsuite/gcc.dg/vect/slp-perm-10.c (working copy) *************** *** 0 **** --- 1,53 ---- + /* { dg-require-effective-target vect_int } */ + + #include "tree-vect.h" + + int a[256], b[256]; + + void __attribute__((noinline)) + foo (void) + { + int i; + for (i = 0; i < 32; ++i) + { + b[i*8+0] = a[i*8+0]; + b[i*8+1] = a[i*8+0]; + b[i*8+2] = a[i*8+3]; + b[i*8+3] = a[i*8+3]; + b[i*8+4] = a[i*8+4]; + b[i*8+5] = a[i*8+6]; + b[i*8+6] = a[i*8+4]; + b[i*8+7] = a[i*8+6]; + } + } + + int main () + { + int i; + + check_vect (); + + for (i = 0; i < 256; ++i) + { + a[i] = i; + __asm__ volatile (""); + } + + foo (); + + for (i = 0; i < 32; ++i) + if (b[i*8+0] != i*8+0 + || b[i*8+1] != i*8+0 + || b[i*8+2] != i*8+3 + || b[i*8+3] != i*8+3 + || b[i*8+4] != i*8+4 + || b[i*8+5] != i*8+6 + || b[i*8+6] != i*8+4 + || b[i*8+7] != i*8+6) + abort (); + + return 0; + } + + /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */ + /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm } } } */ ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] Lift restrictions on SLP permutation for loop vect 2015-06-03 12:01 [PATCH] Lift restrictions on SLP permutation for loop vect Richard Biener @ 2015-06-08 13:03 ` Richard Biener 0 siblings, 0 replies; 7+ messages in thread From: Richard Biener @ 2015-06-08 13:03 UTC (permalink / raw) To: gcc-patches On Wed, 3 Jun 2015, Richard Biener wrote: > > This allows all permutations we can generate (according to the target). > > Bootstrap and regtest pending on x86_64-unknown-linux-gnu. So this turned up other issues thus the following is what I have committed after bootstrapping and testing on x86_64-unknown-linux-gnu. Richard. 2015-06-08 Richard Biener <rguenther@suse.de> * tree-vect-stmts.c (vectorizable_load): Compute the pointer adjustment for gaps at the end of a SLP load group properly. * tree-vect-slp.c (vect_supported_load_permutation_p): Allow all permutations we can generate. (vect_transform_slp_perm_load): Use the correct group-size. * gcc.dg/vect/slp-perm-10.c: New testcase. * gcc.dg/vect/slp-23.c: Adjust. * gcc.dg/torture/pr53366-2.c: Also verify cross-iteration vector pointer update. Index: gcc/tree-vect-stmts.c =================================================================== *** gcc/tree-vect-stmts.c (revision 224077) --- gcc/tree-vect-stmts.c (working copy) *************** vectorizable_load (gimple stmt, gimple_s *** 5807,5813 **** gimple ptr_incr = NULL; int nunits = TYPE_VECTOR_SUBPARTS (vectype); int ncopies; ! int i, j, group_size = -1, group_gap; tree msq = NULL_TREE, lsq; tree offset = NULL_TREE; tree byte_offset = NULL_TREE; --- 5807,5813 ---- gimple ptr_incr = NULL; int nunits = TYPE_VECTOR_SUBPARTS (vectype); int ncopies; ! int i, j, group_size = -1, group_gap_adj; tree msq = NULL_TREE, lsq; tree offset = NULL_TREE; tree byte_offset = NULL_TREE; *************** vectorizable_load (gimple stmt, gimple_s *** 6396,6421 **** } first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)); group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt)); /* VEC_NUM is the number of vect stmts to be created for this group. */ if (slp) { grouped_load = false; vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ! group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt)); } else ! { ! vec_num = group_size; ! group_gap = 0; ! } } else { first_stmt = stmt; first_dr = dr; group_size = vec_num = 1; ! group_gap = 0; } alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false); --- 6396,6419 ---- } first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)); group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt)); + group_gap_adj = 0; /* VEC_NUM is the number of vect stmts to be created for this group. */ if (slp) { grouped_load = false; vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ! group_gap_adj = vf * group_size - nunits * vec_num; } else ! vec_num = group_size; } else { first_stmt = stmt; first_dr = dr; group_size = vec_num = 1; ! group_gap_adj = 0; } alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false); *************** vectorizable_load (gimple stmt, gimple_s *** 6831,6842 **** if (slp && !slp_perm) SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); } ! /* Bump the vector pointer to account for a gap. */ ! if (slp && group_gap != 0) { ! tree bump = size_binop (MULT_EXPR, ! TYPE_SIZE_UNIT (elem_type), ! size_int (group_gap)); dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, bump); } --- 6829,6843 ---- if (slp && !slp_perm) SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); } ! /* Bump the vector pointer to account for a gap or for excess ! elements loaded for a permuted SLP load. */ ! if (group_gap_adj != 0) { ! bool ovf; ! tree bump ! = wide_int_to_tree (sizetype, ! wi::smul (TYPE_SIZE_UNIT (elem_type), ! group_gap_adj, &ovf)); dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, bump); } Index: gcc/tree-vect-slp.c =================================================================== *** gcc/tree-vect-slp.c (revision 224077) --- gcc/tree-vect-slp.c (working copy) *************** vect_supported_load_permutation_p (slp_i *** 1502,1548 **** return true; } ! /* FORNOW: the only supported permutation is 0..01..1.. of length equal to ! GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as ! well (unless it's reduction). */ ! if (SLP_INSTANCE_LOADS (slp_instn).length () != group_size) ! return false; ! FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) ! if (!node->load_permutation.exists ()) ! return false; ! ! load_index = sbitmap_alloc (group_size); ! bitmap_clear (load_index); ! FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) ! { ! unsigned int lidx = node->load_permutation[0]; ! if (bitmap_bit_p (load_index, lidx)) ! { ! sbitmap_free (load_index); ! return false; ! } ! bitmap_set_bit (load_index, lidx); ! FOR_EACH_VEC_ELT (node->load_permutation, j, k) ! if (k != lidx) ! { ! sbitmap_free (load_index); ! return false; ! } ! } ! for (i = 0; i < group_size; i++) ! if (!bitmap_bit_p (load_index, i)) ! { ! sbitmap_free (load_index); ! return false; ! } ! sbitmap_free (load_index); ! FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) if (node->load_permutation.exists () && !vect_transform_slp_perm_load (node, vNULL, NULL, SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true)) return false; return true; } --- 1517,1530 ---- return true; } ! /* For loop vectorization verify we can generate the permutation. */ FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) if (node->load_permutation.exists () && !vect_transform_slp_perm_load (node, vNULL, NULL, SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true)) return false; + return true; } *************** vect_transform_slp_perm_load (slp_tree n *** 3287,3292 **** --- 3269,3276 ---- if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)) return false; + stmt_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)); + /* Generate permutation masks for every NODE. Number of masks for each NODE is equal to GROUP_SIZE. E.g., we have a group of three nodes with three loads from the same *************** vect_transform_slp_perm_load (slp_tree n *** 3321,3327 **** for (k = 0; k < group_size; k++) { i = SLP_TREE_LOAD_PERMUTATION (node)[k]; ! first_mask_element = i + j * group_size; if (!vect_get_mask_element (stmt, first_mask_element, 0, nunits, only_one_vec, index, mask, ¤t_mask_element, --- 3305,3311 ---- for (k = 0; k < group_size; k++) { i = SLP_TREE_LOAD_PERMUTATION (node)[k]; ! first_mask_element = i + j * STMT_VINFO_GROUP_SIZE (stmt_info); if (!vect_get_mask_element (stmt, first_mask_element, 0, nunits, only_one_vec, index, mask, ¤t_mask_element, Index: gcc/testsuite/gcc.dg/vect/slp-perm-10.c =================================================================== *** gcc/testsuite/gcc.dg/vect/slp-perm-10.c (revision 0) --- gcc/testsuite/gcc.dg/vect/slp-perm-10.c (working copy) *************** *** 0 **** --- 1,53 ---- + /* { dg-require-effective-target vect_int } */ + + #include "tree-vect.h" + + int a[256], b[256]; + + void __attribute__((noinline)) + foo (void) + { + int i; + for (i = 0; i < 32; ++i) + { + b[i*8+0] = a[i*8+0]; + b[i*8+1] = a[i*8+0]; + b[i*8+2] = a[i*8+3]; + b[i*8+3] = a[i*8+3]; + b[i*8+4] = a[i*8+4]; + b[i*8+5] = a[i*8+6]; + b[i*8+6] = a[i*8+4]; + b[i*8+7] = a[i*8+6]; + } + } + + int main () + { + int i; + + check_vect (); + + for (i = 0; i < 256; ++i) + { + a[i] = i; + __asm__ volatile (""); + } + + foo (); + + for (i = 0; i < 32; ++i) + if (b[i*8+0] != i*8+0 + || b[i*8+1] != i*8+0 + || b[i*8+2] != i*8+3 + || b[i*8+3] != i*8+3 + || b[i*8+4] != i*8+4 + || b[i*8+5] != i*8+6 + || b[i*8+6] != i*8+4 + || b[i*8+7] != i*8+6) + abort (); + + return 0; + } + + /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */ + /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm } } } */ Index: gcc/testsuite/gcc.dg/vect/slp-23.c =================================================================== *** gcc/testsuite/gcc.dg/vect/slp-23.c (revision 224077) --- gcc/testsuite/gcc.dg/vect/slp-23.c (working copy) *************** int main (void) *** 108,112 **** /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target { vect_strided8 && { ! { vect_no_align} } } } } } */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! { vect_strided8 || vect_no_align } } } } } */ ! /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */ --- 108,113 ---- /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target { vect_strided8 && { ! { vect_no_align} } } } } } */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! { vect_strided8 || vect_no_align } } } } } */ ! /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { ! vect_perm } } } } */ ! /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_perm } } } */ Index: gcc/testsuite/gcc.dg/torture/pr53366-2.c =================================================================== *** gcc/testsuite/gcc.dg/torture/pr53366-2.c (revision 224077) --- gcc/testsuite/gcc.dg/torture/pr53366-2.c (working copy) *************** *** 4,21 **** extern void abort (void); struct T { float r[3], i[3]; }; ! struct U { struct T j[2]; }; void __attribute__ ((noinline)) foo (struct U *__restrict y, const float _Complex *__restrict x) { int i, j; ! for (j = 0; j < 2; ++j) { float a = __real__ x[j]; float b = __imag__ x[j]; ! float c = __real__ x[j + 2]; ! float d = __imag__ x[j + 2]; for (i = 0; i < 3; ++i) { y->j[j].r[i] = y->j[j].r[i] + a + c; --- 4,21 ---- extern void abort (void); struct T { float r[3], i[3]; }; ! struct U { struct T j[4]; }; void __attribute__ ((noinline)) foo (struct U *__restrict y, const float _Complex *__restrict x) { int i, j; ! for (j = 0; j < 4; ++j) { float a = __real__ x[j]; float b = __imag__ x[j]; ! float c = __real__ x[j + 4]; ! float d = __imag__ x[j + 4]; for (i = 0; i < 3; ++i) { y->j[j].r[i] = y->j[j].r[i] + a + c; *************** foo (struct U *__restrict y, const float *** 24,43 **** } } ! _Complex float x[4]; struct U y; int main () { int i, j; ! for (i = 0; i < 4; ++i) ! x[i] = i + 1.0iF * (2 * i); foo (&y, x); ! for (j = 0; j < 2; ++j) for (i = 0; i < 3; ++i) ! if (y.j[j].r[i] != __real__ (x[j] + x[j + 2]) ! || y.j[j].i[i] != __imag__ (x[j] + x[j + 2])) __builtin_abort (); return 0; } --- 24,46 ---- } } ! _Complex float x[8]; struct U y; int main () { int i, j; ! for (i = 0; i < 8; ++i) ! { ! x[i] = i + 1.0iF * (2 * i); ! __asm__ volatile (""); ! } foo (&y, x); ! for (j = 0; j < 4; ++j) for (i = 0; i < 3; ++i) ! if (y.j[j].r[i] != __real__ (x[j] + x[j + 4]) ! || y.j[j].i[i] != __imag__ (x[j] + x[j + 4])) __builtin_abort (); return 0; } ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] Lift restrictions on SLP permutation for loop vect
@ 2015-06-11 14:09 Uros Bizjak
2015-06-11 14:20 ` Richard Biener
0 siblings, 1 reply; 7+ messages in thread
From: Uros Bizjak @ 2015-06-11 14:09 UTC (permalink / raw)
To: gcc-patches; +Cc: Richard Biener
> So this turned up other issues thus the following is what I have
> committed after bootstrapping and testing on x86_64-unknown-linux-gnu.
>
> Richard.
>
> 2015-06-08 Richard Biener <rguenther@suse.de>
>
> * tree-vect-stmts.c (vectorizable_load): Compute the pointer
> adjustment for gaps at the end of a SLP load group properly.
> * tree-vect-slp.c (vect_supported_load_permutation_p): Allow
> all permutations we can generate.
> (vect_transform_slp_perm_load): Use the correct group-size.
>
> * gcc.dg/vect/slp-perm-10.c: New testcase.
> * gcc.dg/vect/slp-23.c: Adjust.
> * gcc.dg/torture/pr53366-2.c: Also verify cross-iteration vector pointer update.
This patch caused:
FAIL: gcc.target/i386/pr61403.c scan-assembler blend
Uros.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] Lift restrictions on SLP permutation for loop vect 2015-06-11 14:09 Uros Bizjak @ 2015-06-11 14:20 ` Richard Biener 2015-06-11 14:23 ` James Greenhalgh 0 siblings, 1 reply; 7+ messages in thread From: Richard Biener @ 2015-06-11 14:20 UTC (permalink / raw) To: Uros Bizjak; +Cc: gcc-patches On Thu, 11 Jun 2015, Uros Bizjak wrote: > > So this turned up other issues thus the following is what I have > > committed after bootstrapping and testing on x86_64-unknown-linux-gnu. > > > > Richard. > > > > 2015-06-08 Richard Biener <rguenther@suse.de> > > > > * tree-vect-stmts.c (vectorizable_load): Compute the pointer > > adjustment for gaps at the end of a SLP load group properly. > > * tree-vect-slp.c (vect_supported_load_permutation_p): Allow > > all permutations we can generate. > > (vect_transform_slp_perm_load): Use the correct group-size. > > > > * gcc.dg/vect/slp-perm-10.c: New testcase. > > * gcc.dg/vect/slp-23.c: Adjust. > > * gcc.dg/torture/pr53366-2.c: Also verify cross-iteration vector pointer update. > > This patch caused: > > FAIL: gcc.target/i386/pr61403.c scan-assembler blend Yeah, I noticed. We now want to vectorize this differently but fail due to the cost model. I'm working on enhancing the vectorizer here. Richard. ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] Lift restrictions on SLP permutation for loop vect 2015-06-11 14:20 ` Richard Biener @ 2015-06-11 14:23 ` James Greenhalgh 2015-06-11 14:30 ` Richard Biener 0 siblings, 1 reply; 7+ messages in thread From: James Greenhalgh @ 2015-06-11 14:23 UTC (permalink / raw) To: Richard Biener; +Cc: Uros Bizjak, gcc-patches [-- Attachment #1: Type: text/plain, Size: 3723 bytes --] On Thu, Jun 11, 2015 at 03:08:59PM +0100, Richard Biener wrote: > On Thu, 11 Jun 2015, Uros Bizjak wrote: > > > > So this turned up other issues thus the following is what I have > > > committed after bootstrapping and testing on x86_64-unknown-linux-gnu. > > > > > > Richard. > > > > > > 2015-06-08 Richard Biener <rguenther@suse.de> > > > > > > * tree-vect-stmts.c (vectorizable_load): Compute the pointer > > > adjustment for gaps at the end of a SLP load group properly. > > > * tree-vect-slp.c (vect_supported_load_permutation_p): Allow > > > all permutations we can generate. > > > (vect_transform_slp_perm_load): Use the correct group-size. > > > > > > * gcc.dg/vect/slp-perm-10.c: New testcase. > > > * gcc.dg/vect/slp-23.c: Adjust. > > > * gcc.dg/torture/pr53366-2.c: Also verify cross-iteration vector pointer update. > > > > This patch caused: > > > > FAIL: gcc.target/i386/pr61403.c scan-assembler blend > > Yeah, I noticed. We now want to vectorize this differently but > fail due to the cost model. I'm working on enhancing the vectorizer > here. It also caused an ICE in the ARM port (arm-none-eabi, arm-none-linux-gnueabihf): FAIL: gcc.target/arm/pr53636.c (internal compiler error) Full ICE text below, and reduced testcase attached, compile with: arm-none-eabi-gcc -O -ftree-vectorize -mfpu=neon -mcpu=cortex-a9 bug.c I tried to take a look to see what was happening, but I couldn't see the root of the problem. The access to dr_chain in vect_create_mask_and_perm: second_vec = dr_chain[second_vec_indx]; Fails as dr_chain has length 1, and second_vec_indx is 2. I think that the mask that the code is trying to produce is { 1, 2, 3, 4 }. bug.c:4:3: note: add new stmt: vect__8.6_108 = VEC_PERM_EXPR <vect__8.4_104, vect__8.5_106, { 1, 2, 3, 4 }>; But that's about as far as I got. Thanks, James --- bug.c: In function 'test': bug.c:1:6: internal compiler error: in operator[], at vec.h:738 void test(unsigned char *dst) { ^ 0xd759fe vec<tree_node*, va_heap, vl_embed>::operator[](unsigned int) .../src/gcc/gcc/vec.h:738 0xd759fe vec<tree_node*, va_heap, vl_ptr>::operator[](unsigned int) .../src/gcc/gcc/vec.h:1204 0xd759fe vect_create_mask_and_perm .../src/gcc/gcc/tree-vect-slp.c:3072 0xd759fe vect_transform_slp_perm_load(_slp_tree*, vec<tree_node*, va_heap, vl_ptr>, gimple_stmt_iterator*, int, _slp_instance*, bool) .../src/gcc/gcc/tree-vect-slp.c:3350 0xd51613 vectorizable_load .../src/gcc/gcc/tree-vect-stmts.c:6847 0xd57ad2 vect_transform_stmt(gimple_statement_base*, gimple_stmt_iterator*, bool*, _slp_tree*, _slp_instance*) .../src/gcc/gcc/tree-vect-stmts.c:7490 0xd7aac1 vect_schedule_slp_instance .../src/gcc/gcc/tree-vect-slp.c:3500 0xd7a117 vect_schedule_slp_instance .../src/gcc/gcc/tree-vect-slp.c:3381 0xd7a117 vect_schedule_slp_instance .../src/gcc/gcc/tree-vect-slp.c:3381 0xd7a117 vect_schedule_slp_instance .../src/gcc/gcc/tree-vect-slp.c:3381 0xd7a117 vect_schedule_slp_instance .../src/gcc/gcc/tree-vect-slp.c:3381 0xd7a117 vect_schedule_slp_instance .../src/gcc/gcc/tree-vect-slp.c:3381 0xd7a117 vect_schedule_slp_instance .../src/gcc/gcc/tree-vect-slp.c:3381 0xd7a117 vect_schedule_slp_instance .../src/gcc/gcc/tree-vect-slp.c:3381 0xd7abce vect_schedule_slp(_loop_vec_info*, _bb_vec_info*) .../src/gcc/gcc/tree-vect-slp.c:3570 0xd5e564 vect_transform_loop(_loop_vec_info*) .../src/gcc/gcc/tree-vect-loop.c:6223 0xd7eca8 vectorize_loops() .../src/gcc/gcc/tree-vectorizer.c:499 0xc88c54 execute .../src/gcc/gcc/tree-ssa-loop.c:292 Please submit a full bug report, with preprocessed source if appropriate. Please include the complete backtrace with any bug report. See <http://gcc.gnu.org/bugs.html> for instructions. [-- Attachment #2: bug.c --] [-- Type: text/x-csrc, Size: 775 bytes --] void test(unsigned char *dst) { short tmp[11 * 8], *tptr; int i; for (i = 0; i < 8; i++) { dst[0] = (-tptr[0] + 9 * tptr[0 + 1] + 9 * tptr[0 + 2] - tptr[0 + 3]) >> 7; dst[1] = (-tptr[1] + 9 * tptr[1 + 1] + 9 * tptr[1 + 2] - tptr[1 + 3]) >> 7; dst[2] = (-tptr[2] + 9 * tptr[2 + 1] + 9 * tptr[2 + 2] - tptr[2 + 3]) >> 7; dst[3] = (-tptr[3] + 9 * tptr[3 + 1] + 9 * tptr[3 + 2] - tptr[3 + 3]) >> 7; dst[4] = (-tptr[4] + 9 * tptr[4 + 1] + 9 * tptr[4 + 2] - tptr[4 + 3]) >> 7; dst[5] = (-tptr[5] + 9 * tptr[5 + 1] + 9 * tptr[5 + 2] - tptr[5 + 3]) >> 7; dst[6] = (-tptr[6] + 9 * tptr[6 + 1] + 9 * tptr[6 + 2] - tptr[6 + 3]) >> 7; dst[7] = (-tptr[7] + 9 * tptr[7 + 1] + 9 * tptr[7 + 2] - tptr[7 + 3]) >> 7; dst += 8; tptr += 11; } } ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] Lift restrictions on SLP permutation for loop vect 2015-06-11 14:23 ` James Greenhalgh @ 2015-06-11 14:30 ` Richard Biener 2015-06-11 14:34 ` James Greenhalgh 0 siblings, 1 reply; 7+ messages in thread From: Richard Biener @ 2015-06-11 14:30 UTC (permalink / raw) To: James Greenhalgh; +Cc: Uros Bizjak, gcc-patches On Thu, 11 Jun 2015, James Greenhalgh wrote: > On Thu, Jun 11, 2015 at 03:08:59PM +0100, Richard Biener wrote: > > On Thu, 11 Jun 2015, Uros Bizjak wrote: > > > > > > So this turned up other issues thus the following is what I have > > > > committed after bootstrapping and testing on x86_64-unknown-linux-gnu. > > > > > > > > Richard. > > > > > > > > 2015-06-08 Richard Biener <rguenther@suse.de> > > > > > > > > * tree-vect-stmts.c (vectorizable_load): Compute the pointer > > > > adjustment for gaps at the end of a SLP load group properly. > > > > * tree-vect-slp.c (vect_supported_load_permutation_p): Allow > > > > all permutations we can generate. > > > > (vect_transform_slp_perm_load): Use the correct group-size. > > > > > > > > * gcc.dg/vect/slp-perm-10.c: New testcase. > > > > * gcc.dg/vect/slp-23.c: Adjust. > > > > * gcc.dg/torture/pr53366-2.c: Also verify cross-iteration vector pointer update. > > > > > > This patch caused: > > > > > > FAIL: gcc.target/i386/pr61403.c scan-assembler blend > > > > Yeah, I noticed. We now want to vectorize this differently but > > fail due to the cost model. I'm working on enhancing the vectorizer > > here. > > It also caused an ICE in the ARM port (arm-none-eabi, > arm-none-linux-gnueabihf): > > FAIL: gcc.target/arm/pr53636.c (internal compiler error) > > Full ICE text below, and reduced testcase attached, compile with: > > arm-none-eabi-gcc -O -ftree-vectorize -mfpu=neon -mcpu=cortex-a9 bug.c > > I tried to take a look to see what was happening, but I couldn't see > the root of the problem. The access to dr_chain in > vect_create_mask_and_perm: > > second_vec = dr_chain[second_vec_indx]; > > Fails as dr_chain has length 1, and second_vec_indx is 2. > > I think that the mask that the code is trying to produce is { 1, 2, 3, 4 }. > > bug.c:4:3: note: add new stmt: vect__8.6_108 = VEC_PERM_EXPR <vect__8.4_104, vect__8.5_106, { 1, 2, 3, 4 }>; > > But that's about as far as I got. I will have a look next week - mind opening a bugreport for this so I dont' forget? Thanks, Richard. > Thanks, > James > > --- > bug.c: In function 'test': > bug.c:1:6: internal compiler error: in operator[], at vec.h:738 > void test(unsigned char *dst) { > ^ > 0xd759fe vec<tree_node*, va_heap, vl_embed>::operator[](unsigned int) > .../src/gcc/gcc/vec.h:738 > 0xd759fe vec<tree_node*, va_heap, vl_ptr>::operator[](unsigned int) > .../src/gcc/gcc/vec.h:1204 > 0xd759fe vect_create_mask_and_perm > .../src/gcc/gcc/tree-vect-slp.c:3072 > 0xd759fe vect_transform_slp_perm_load(_slp_tree*, vec<tree_node*, va_heap, vl_ptr>, gimple_stmt_iterator*, int, _slp_instance*, bool) > .../src/gcc/gcc/tree-vect-slp.c:3350 > 0xd51613 vectorizable_load > .../src/gcc/gcc/tree-vect-stmts.c:6847 > 0xd57ad2 vect_transform_stmt(gimple_statement_base*, gimple_stmt_iterator*, bool*, _slp_tree*, _slp_instance*) > .../src/gcc/gcc/tree-vect-stmts.c:7490 > 0xd7aac1 vect_schedule_slp_instance > .../src/gcc/gcc/tree-vect-slp.c:3500 > 0xd7a117 vect_schedule_slp_instance > .../src/gcc/gcc/tree-vect-slp.c:3381 > 0xd7a117 vect_schedule_slp_instance > .../src/gcc/gcc/tree-vect-slp.c:3381 > 0xd7a117 vect_schedule_slp_instance > .../src/gcc/gcc/tree-vect-slp.c:3381 > 0xd7a117 vect_schedule_slp_instance > .../src/gcc/gcc/tree-vect-slp.c:3381 > 0xd7a117 vect_schedule_slp_instance > .../src/gcc/gcc/tree-vect-slp.c:3381 > 0xd7a117 vect_schedule_slp_instance > .../src/gcc/gcc/tree-vect-slp.c:3381 > 0xd7a117 vect_schedule_slp_instance > .../src/gcc/gcc/tree-vect-slp.c:3381 > 0xd7abce vect_schedule_slp(_loop_vec_info*, _bb_vec_info*) > .../src/gcc/gcc/tree-vect-slp.c:3570 > 0xd5e564 vect_transform_loop(_loop_vec_info*) > .../src/gcc/gcc/tree-vect-loop.c:6223 > 0xd7eca8 vectorize_loops() > .../src/gcc/gcc/tree-vectorizer.c:499 > 0xc88c54 execute > .../src/gcc/gcc/tree-ssa-loop.c:292 > Please submit a full bug report, > with preprocessed source if appropriate. > Please include the complete backtrace with any bug report. > See <http://gcc.gnu.org/bugs.html> for instructions. > > -- Richard Biener <rguenther@suse.de> SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Dilip Upmanyu, Graham Norton, HRB 21284 (AG Nuernberg) ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] Lift restrictions on SLP permutation for loop vect 2015-06-11 14:30 ` Richard Biener @ 2015-06-11 14:34 ` James Greenhalgh 0 siblings, 0 replies; 7+ messages in thread From: James Greenhalgh @ 2015-06-11 14:34 UTC (permalink / raw) To: Richard Biener; +Cc: Uros Bizjak, gcc-patches On Thu, Jun 11, 2015 at 03:23:21PM +0100, Richard Biener wrote: > I will have a look next week - mind opening a bugreport for this so > I dont' forget? Of course, I've opened https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66510 . Cheers, James ^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2015-06-11 14:30 UTC | newest] Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2015-06-03 12:01 [PATCH] Lift restrictions on SLP permutation for loop vect Richard Biener 2015-06-08 13:03 ` Richard Biener 2015-06-11 14:09 Uros Bizjak 2015-06-11 14:20 ` Richard Biener 2015-06-11 14:23 ` James Greenhalgh 2015-06-11 14:30 ` Richard Biener 2015-06-11 14:34 ` James Greenhalgh
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).