* [PATCH] Fix PR66623 @ 2017-06-08 9:18 Richard Biener 2017-06-08 13:49 ` Richard Biener 0 siblings, 1 reply; 6+ messages in thread From: Richard Biener @ 2017-06-08 9:18 UTC (permalink / raw) To: gcc-patches The following fixes unsafe vectorization of reductions in outer loop vectorization. It combines it with a bit of cleanup in the affected function. Bootstrap and regtest running on x86_64-unknown-linux-gnu. Richard. 2017-06-08 Richard Biener <rguenther@suse.de> PR tree-optimization/66623 * tree-vect-loop.c (vect_is_simple_reduction): Cleanup, refactor check_reduction into two parts, properly computing whether we have to check reduction validity for outer loop vectorization. * gcc.dg/vect/pr66623.c: New testcase. Index: gcc/tree-vect-loop.c =================================================================== --- gcc/tree-vect-loop.c (revision 249003) +++ gcc/tree-vect-loop.c (working copy) @@ -2727,8 +2727,6 @@ vect_is_simple_reduction (loop_vec_info { struct loop *loop = (gimple_bb (phi))->loop_father; struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info); - edge latch_e = loop_latch_edge (loop); - tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e); gimple *def_stmt, *def1 = NULL, *def2 = NULL, *phi_use_stmt = NULL; enum tree_code orig_code, code; tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE; @@ -2742,11 +2740,6 @@ vect_is_simple_reduction (loop_vec_info *double_reduc = false; *v_reduc_type = TREE_CODE_REDUCTION; - /* Check validity of the reduction only for the innermost loop. */ - bool check_reduction = ! flow_loop_nested_p (vect_loop, loop); - gcc_assert ((check_reduction && loop == vect_loop) - || (!check_reduction && flow_loop_nested_p (vect_loop, loop))); - name = PHI_RESULT (phi); /* ??? If there are no uses of the PHI result the inner loop reduction won't be detected as possibly double-reduction by vectorizable_reduction @@ -2775,13 +2768,15 @@ vect_is_simple_reduction (loop_vec_info { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "reduction used in loop.\n"); + "reduction value used in loop.\n"); return NULL; } phi_use_stmt = use_stmt; } + edge latch_e = loop_latch_edge (loop); + tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e); if (TREE_CODE (loop_arg) != SSA_NAME) { if (dump_enabled_p ()) @@ -2795,18 +2790,22 @@ vect_is_simple_reduction (loop_vec_info } def_stmt = SSA_NAME_DEF_STMT (loop_arg); - if (!def_stmt) + if (gimple_nop_p (def_stmt)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "reduction: no def_stmt.\n"); + "reduction: no def_stmt\n"); return NULL; } if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != GIMPLE_PHI) { if (dump_enabled_p ()) - dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0); + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "reduction: unhandled reduction operation: "); + dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def_stmt, 0); + } return NULL; } @@ -2822,6 +2821,7 @@ vect_is_simple_reduction (loop_vec_info } nloop_uses = 0; + gphi *lcphi = NULL; FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name) { gimple *use_stmt = USE_STMT (use_p); @@ -2829,6 +2829,11 @@ vect_is_simple_reduction (loop_vec_info continue; if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))) nloop_uses++; + else + { + gcc_assert (!lcphi); + lcphi = as_a <gphi *> (use_stmt); + } if (nloop_uses > 1) { if (dump_enabled_p ()) @@ -2873,6 +2878,24 @@ vect_is_simple_reduction (loop_vec_info return NULL; } + /* If we are vectorizing an inner reduction we are executing that + in the original order only in case we are not dealing with a + double reduction. */ + bool check_reduction = true; + if (flow_loop_nested_p (vect_loop, loop)) + { + check_reduction = false; + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result (lcphi)) + { + gimple *use_stmt = USE_STMT (use_p); + if (is_gimple_debug (use_stmt)) + continue; + if (! flow_bb_inside_loop_p (vect_loop, gimple_bb (use_stmt))) + check_reduction = true; + } + } + + bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop); code = orig_code = gimple_assign_rhs_code (def_stmt); /* We can handle "res -= x[i]", which is non-associative by @@ -2887,27 +2910,8 @@ vect_is_simple_reduction (loop_vec_info if (code == COND_EXPR) { - if (check_reduction) + if (! nested_in_vect_loop) *v_reduc_type = COND_REDUCTION; - } - else if (!commutative_tree_code (code) || !associative_tree_code (code)) - { - if (dump_enabled_p ()) - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, - "reduction: not commutative/associative: "); - return NULL; - } - - if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS) - { - if (code != COND_EXPR) - { - if (dump_enabled_p ()) - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, - "reduction: not binary operation: "); - - return NULL; - } op3 = gimple_assign_rhs1 (def_stmt); if (COMPARISON_CLASS_P (op3)) @@ -2918,30 +2922,35 @@ vect_is_simple_reduction (loop_vec_info op1 = gimple_assign_rhs2 (def_stmt); op2 = gimple_assign_rhs3 (def_stmt); - - if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME) - { - if (dump_enabled_p ()) - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, - "reduction: uses not ssa_names: "); - - return NULL; - } } - else + else if (!commutative_tree_code (code) || !associative_tree_code (code)) + { + if (dump_enabled_p ()) + report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, + "reduction: not commutative/associative: "); + return NULL; + } + else if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS) { op1 = gimple_assign_rhs1 (def_stmt); op2 = gimple_assign_rhs2 (def_stmt); + } + else + { + if (dump_enabled_p ()) + report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, + "reduction: not handled operation: "); + return NULL; + } - if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME) - { - if (dump_enabled_p ()) - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, - "reduction: uses not ssa_names: "); + if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME) + { + if (dump_enabled_p ()) + report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, + "reduction: both uses not ssa_names: "); - return NULL; - } - } + return NULL; + } type = TREE_TYPE (gimple_assign_lhs (def_stmt)); if ((TREE_CODE (op1) == SSA_NAME @@ -3091,7 +3100,7 @@ vect_is_simple_reduction (loop_vec_info == vect_internal_def && !is_loop_header_bb_p (gimple_bb (def2))))))) { - if (check_reduction && orig_code != MINUS_EXPR) + if (! nested_in_vect_loop && orig_code != MINUS_EXPR) { /* Check if we can swap operands (just for simplicity - so that the rest of the code can assume that the reduction variable @@ -3145,7 +3154,8 @@ vect_is_simple_reduction (loop_vec_info } /* Try to find SLP reduction chain. */ - if (check_reduction && code != COND_EXPR + if (! nested_in_vect_loop + && code != COND_EXPR && vect_is_slp_reduction (loop_info, phi, def_stmt)) { if (dump_enabled_p ()) Index: gcc/testsuite/gcc.dg/vect/pr66623.c =================================================================== --- gcc/testsuite/gcc.dg/vect/pr66623.c (nonexistent) +++ gcc/testsuite/gcc.dg/vect/pr66623.c (working copy) @@ -0,0 +1,86 @@ +/* { dg-require-effective-target vect_float } */ + +#include "tree-vect.h" + +extern void abort (void); + +#define OP * +#define INIT 1.0 + +float __attribute__((noinline,noclone)) +foo (float *__restrict__ i) +{ + float l = INIT; + int a; + int b; + + for (a = 0; a < 4; a++) + for (b = 0; b < 4; b++) + l = l OP i[b]; + + return l; +} + +float __attribute__((noinline,noclone)) +foo_ref (float *__restrict__ i) +{ + float l = INIT; + + l = l OP i[0]; + l = l OP i[1]; + l = l OP i[2]; + l = l OP i[3]; + + l = l OP i[0]; + l = l OP i[1]; + l = l OP i[2]; + l = l OP i[3]; + + l = l OP i[0]; + l = l OP i[1]; + l = l OP i[2]; + l = l OP i[3]; + + l = l OP i[0]; + l = l OP i[1]; + l = l OP i[2]; + l = l OP i[3]; + + return l; +} + +union u +{ + float f; + unsigned int u; +}; + +int +main (void) +{ + union u res, res2; + float a[4]; + + if (sizeof (float) != sizeof (unsigned int)) + return 0; + + check_vect (); + + a[0] = 0.01; + a[1] = 0.01; + a[2] = 0.01; + a[3] = 1.0; + + res.f = foo_ref (a); + + res2.f = foo (a); + + if (res.u != res2.u) + abort (); + + return 0; +} + +/* need -ffast-math to vectorize this loop. */ +/* ARM NEON passes -ffast-math to these tests, so expect this to fail. */ +/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail arm_neon_ok } } } */ ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] Fix PR66623 2017-06-08 9:18 [PATCH] Fix PR66623 Richard Biener @ 2017-06-08 13:49 ` Richard Biener 2017-06-09 15:32 ` Christophe Lyon 0 siblings, 1 reply; 6+ messages in thread From: Richard Biener @ 2017-06-08 13:49 UTC (permalink / raw) To: gcc-patches On Thu, 8 Jun 2017, Richard Biener wrote: > > The following fixes unsafe vectorization of reductions in outer loop > vectorization. It combines it with a bit of cleanup in the affected > function. > > Bootstrap and regtest running on x86_64-unknown-linux-gnu. Re-testing the following variant -- multiple loop-closed PHI nodes for the same SSA name happen. Richard. 2017-06-08 Richard Biener <rguenther@suse.de> PR tree-optimization/66623 * tree-vect-loop.c (vect_is_simple_reduction): Cleanup, refactor check_reduction into two parts, properly computing whether we have to check reduction validity for outer loop vectorization. * gcc.dg/vect/pr66623.c: New testcase. Index: gcc/tree-vect-loop.c =================================================================== --- gcc/tree-vect-loop.c (revision 249007) +++ gcc/tree-vect-loop.c (working copy) @@ -2727,8 +2727,6 @@ vect_is_simple_reduction (loop_vec_info { struct loop *loop = (gimple_bb (phi))->loop_father; struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info); - edge latch_e = loop_latch_edge (loop); - tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e); gimple *def_stmt, *def1 = NULL, *def2 = NULL, *phi_use_stmt = NULL; enum tree_code orig_code, code; tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE; @@ -2742,11 +2740,6 @@ vect_is_simple_reduction (loop_vec_info *double_reduc = false; *v_reduc_type = TREE_CODE_REDUCTION; - /* Check validity of the reduction only for the innermost loop. */ - bool check_reduction = ! flow_loop_nested_p (vect_loop, loop); - gcc_assert ((check_reduction && loop == vect_loop) - || (!check_reduction && flow_loop_nested_p (vect_loop, loop))); - name = PHI_RESULT (phi); /* ??? If there are no uses of the PHI result the inner loop reduction won't be detected as possibly double-reduction by vectorizable_reduction @@ -2775,13 +2768,15 @@ vect_is_simple_reduction (loop_vec_info { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "reduction used in loop.\n"); + "reduction value used in loop.\n"); return NULL; } phi_use_stmt = use_stmt; } + edge latch_e = loop_latch_edge (loop); + tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e); if (TREE_CODE (loop_arg) != SSA_NAME) { if (dump_enabled_p ()) @@ -2795,18 +2790,22 @@ vect_is_simple_reduction (loop_vec_info } def_stmt = SSA_NAME_DEF_STMT (loop_arg); - if (!def_stmt) + if (gimple_nop_p (def_stmt)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "reduction: no def_stmt.\n"); + "reduction: no def_stmt\n"); return NULL; } if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != GIMPLE_PHI) { if (dump_enabled_p ()) - dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0); + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "reduction: unhandled reduction operation: "); + dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def_stmt, 0); + } return NULL; } @@ -2822,6 +2821,7 @@ vect_is_simple_reduction (loop_vec_info } nloop_uses = 0; + auto_vec<gphi *, 3> lcphis; FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name) { gimple *use_stmt = USE_STMT (use_p); @@ -2829,6 +2829,9 @@ vect_is_simple_reduction (loop_vec_info continue; if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))) nloop_uses++; + else + /* We can have more than one loop-closed PHI. */ + lcphis.safe_push (as_a <gphi *> (use_stmt)); if (nloop_uses > 1) { if (dump_enabled_p ()) @@ -2873,6 +2876,27 @@ vect_is_simple_reduction (loop_vec_info return NULL; } + /* If we are vectorizing an inner reduction we are executing that + in the original order only in case we are not dealing with a + double reduction. */ + bool check_reduction = true; + if (flow_loop_nested_p (vect_loop, loop)) + { + gphi *lcphi; + unsigned i; + check_reduction = false; + FOR_EACH_VEC_ELT (lcphis, i, lcphi) + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result (lcphi)) + { + gimple *use_stmt = USE_STMT (use_p); + if (is_gimple_debug (use_stmt)) + continue; + if (! flow_bb_inside_loop_p (vect_loop, gimple_bb (use_stmt))) + check_reduction = true; + } + } + + bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop); code = orig_code = gimple_assign_rhs_code (def_stmt); /* We can handle "res -= x[i]", which is non-associative by @@ -2887,27 +2911,8 @@ vect_is_simple_reduction (loop_vec_info if (code == COND_EXPR) { - if (check_reduction) + if (! nested_in_vect_loop) *v_reduc_type = COND_REDUCTION; - } - else if (!commutative_tree_code (code) || !associative_tree_code (code)) - { - if (dump_enabled_p ()) - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, - "reduction: not commutative/associative: "); - return NULL; - } - - if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS) - { - if (code != COND_EXPR) - { - if (dump_enabled_p ()) - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, - "reduction: not binary operation: "); - - return NULL; - } op3 = gimple_assign_rhs1 (def_stmt); if (COMPARISON_CLASS_P (op3)) @@ -2918,30 +2923,35 @@ vect_is_simple_reduction (loop_vec_info op1 = gimple_assign_rhs2 (def_stmt); op2 = gimple_assign_rhs3 (def_stmt); - - if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME) - { - if (dump_enabled_p ()) - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, - "reduction: uses not ssa_names: "); - - return NULL; - } } - else + else if (!commutative_tree_code (code) || !associative_tree_code (code)) + { + if (dump_enabled_p ()) + report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, + "reduction: not commutative/associative: "); + return NULL; + } + else if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS) { op1 = gimple_assign_rhs1 (def_stmt); op2 = gimple_assign_rhs2 (def_stmt); + } + else + { + if (dump_enabled_p ()) + report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, + "reduction: not handled operation: "); + return NULL; + } - if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME) - { - if (dump_enabled_p ()) - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, - "reduction: uses not ssa_names: "); + if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME) + { + if (dump_enabled_p ()) + report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, + "reduction: both uses not ssa_names: "); - return NULL; - } - } + return NULL; + } type = TREE_TYPE (gimple_assign_lhs (def_stmt)); if ((TREE_CODE (op1) == SSA_NAME @@ -3091,7 +3101,7 @@ vect_is_simple_reduction (loop_vec_info == vect_internal_def && !is_loop_header_bb_p (gimple_bb (def2))))))) { - if (check_reduction && orig_code != MINUS_EXPR) + if (! nested_in_vect_loop && orig_code != MINUS_EXPR) { /* Check if we can swap operands (just for simplicity - so that the rest of the code can assume that the reduction variable @@ -3145,7 +3155,8 @@ vect_is_simple_reduction (loop_vec_info } /* Try to find SLP reduction chain. */ - if (check_reduction && code != COND_EXPR + if (! nested_in_vect_loop + && code != COND_EXPR && vect_is_slp_reduction (loop_info, phi, def_stmt)) { if (dump_enabled_p ()) Index: gcc/testsuite/gcc.dg/vect/pr66623.c =================================================================== --- gcc/testsuite/gcc.dg/vect/pr66623.c (revision 0) +++ gcc/testsuite/gcc.dg/vect/pr66623.c (working copy) @@ -0,0 +1,86 @@ +/* { dg-require-effective-target vect_float } */ + +#include "tree-vect.h" + +extern void abort (void); + +#define OP * +#define INIT 1.0 + +float __attribute__((noinline,noclone)) +foo (float *__restrict__ i) +{ + float l = INIT; + int a; + int b; + + for (a = 0; a < 4; a++) + for (b = 0; b < 4; b++) + l = l OP i[b]; + + return l; +} + +float __attribute__((noinline,noclone)) +foo_ref (float *__restrict__ i) +{ + float l = INIT; + + l = l OP i[0]; + l = l OP i[1]; + l = l OP i[2]; + l = l OP i[3]; + + l = l OP i[0]; + l = l OP i[1]; + l = l OP i[2]; + l = l OP i[3]; + + l = l OP i[0]; + l = l OP i[1]; + l = l OP i[2]; + l = l OP i[3]; + + l = l OP i[0]; + l = l OP i[1]; + l = l OP i[2]; + l = l OP i[3]; + + return l; +} + +union u +{ + float f; + unsigned int u; +}; + +int +main (void) +{ + union u res, res2; + float a[4]; + + if (sizeof (float) != sizeof (unsigned int)) + return 0; + + check_vect (); + + a[0] = 0.01; + a[1] = 0.01; + a[2] = 0.01; + a[3] = 1.0; + + res.f = foo_ref (a); + + res2.f = foo (a); + + if (res.u != res2.u) + abort (); + + return 0; +} + +/* need -ffast-math to vectorize this loop. */ +/* ARM NEON passes -ffast-math to these tests, so expect this to fail. */ +/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail arm_neon_ok } } } */ ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] Fix PR66623 2017-06-08 13:49 ` Richard Biener @ 2017-06-09 15:32 ` Christophe Lyon 2017-06-09 15:48 ` Richard Biener 0 siblings, 1 reply; 6+ messages in thread From: Christophe Lyon @ 2017-06-09 15:32 UTC (permalink / raw) To: Richard Biener; +Cc: gcc-patches On 8 June 2017 at 15:49, Richard Biener <rguenther@suse.de> wrote: > On Thu, 8 Jun 2017, Richard Biener wrote: > >> >> The following fixes unsafe vectorization of reductions in outer loop >> vectorization. It combines it with a bit of cleanup in the affected >> function. >> >> Bootstrap and regtest running on x86_64-unknown-linux-gnu. > > Re-testing the following variant -- multiple loop-closed PHI nodes > for the same SSA name happen. > > Richard. > > 2017-06-08 Richard Biener <rguenther@suse.de> > > PR tree-optimization/66623 > * tree-vect-loop.c (vect_is_simple_reduction): Cleanup, > refactor check_reduction into two parts, properly computing > whether we have to check reduction validity for outer loop > vectorization. > > * gcc.dg/vect/pr66623.c: New testcase. > Hi Richard, The new testcase fails on armeb* targets: FAIL: gcc.dg/vect/pr66623.c -flto -ffat-lto-objects execution test FAIL: gcc.dg/vect/pr66623.c execution test Christophe > Index: gcc/tree-vect-loop.c > =================================================================== > --- gcc/tree-vect-loop.c (revision 249007) > +++ gcc/tree-vect-loop.c (working copy) > @@ -2727,8 +2727,6 @@ vect_is_simple_reduction (loop_vec_info > { > struct loop *loop = (gimple_bb (phi))->loop_father; > struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info); > - edge latch_e = loop_latch_edge (loop); > - tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e); > gimple *def_stmt, *def1 = NULL, *def2 = NULL, *phi_use_stmt = NULL; > enum tree_code orig_code, code; > tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE; > @@ -2742,11 +2740,6 @@ vect_is_simple_reduction (loop_vec_info > *double_reduc = false; > *v_reduc_type = TREE_CODE_REDUCTION; > > - /* Check validity of the reduction only for the innermost loop. */ > - bool check_reduction = ! flow_loop_nested_p (vect_loop, loop); > - gcc_assert ((check_reduction && loop == vect_loop) > - || (!check_reduction && flow_loop_nested_p (vect_loop, loop))); > - > name = PHI_RESULT (phi); > /* ??? If there are no uses of the PHI result the inner loop reduction > won't be detected as possibly double-reduction by vectorizable_reduction > @@ -2775,13 +2768,15 @@ vect_is_simple_reduction (loop_vec_info > { > if (dump_enabled_p ()) > dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > - "reduction used in loop.\n"); > + "reduction value used in loop.\n"); > return NULL; > } > > phi_use_stmt = use_stmt; > } > > + edge latch_e = loop_latch_edge (loop); > + tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e); > if (TREE_CODE (loop_arg) != SSA_NAME) > { > if (dump_enabled_p ()) > @@ -2795,18 +2790,22 @@ vect_is_simple_reduction (loop_vec_info > } > > def_stmt = SSA_NAME_DEF_STMT (loop_arg); > - if (!def_stmt) > + if (gimple_nop_p (def_stmt)) > { > if (dump_enabled_p ()) > dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > - "reduction: no def_stmt.\n"); > + "reduction: no def_stmt\n"); > return NULL; > } > > if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != GIMPLE_PHI) > { > if (dump_enabled_p ()) > - dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0); > + { > + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > + "reduction: unhandled reduction operation: "); > + dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def_stmt, 0); > + } > return NULL; > } > > @@ -2822,6 +2821,7 @@ vect_is_simple_reduction (loop_vec_info > } > > nloop_uses = 0; > + auto_vec<gphi *, 3> lcphis; > FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name) > { > gimple *use_stmt = USE_STMT (use_p); > @@ -2829,6 +2829,9 @@ vect_is_simple_reduction (loop_vec_info > continue; > if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))) > nloop_uses++; > + else > + /* We can have more than one loop-closed PHI. */ > + lcphis.safe_push (as_a <gphi *> (use_stmt)); > if (nloop_uses > 1) > { > if (dump_enabled_p ()) > @@ -2873,6 +2876,27 @@ vect_is_simple_reduction (loop_vec_info > return NULL; > } > > + /* If we are vectorizing an inner reduction we are executing that > + in the original order only in case we are not dealing with a > + double reduction. */ > + bool check_reduction = true; > + if (flow_loop_nested_p (vect_loop, loop)) > + { > + gphi *lcphi; > + unsigned i; > + check_reduction = false; > + FOR_EACH_VEC_ELT (lcphis, i, lcphi) > + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result (lcphi)) > + { > + gimple *use_stmt = USE_STMT (use_p); > + if (is_gimple_debug (use_stmt)) > + continue; > + if (! flow_bb_inside_loop_p (vect_loop, gimple_bb (use_stmt))) > + check_reduction = true; > + } > + } > + > + bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop); > code = orig_code = gimple_assign_rhs_code (def_stmt); > > /* We can handle "res -= x[i]", which is non-associative by > @@ -2887,27 +2911,8 @@ vect_is_simple_reduction (loop_vec_info > > if (code == COND_EXPR) > { > - if (check_reduction) > + if (! nested_in_vect_loop) > *v_reduc_type = COND_REDUCTION; > - } > - else if (!commutative_tree_code (code) || !associative_tree_code (code)) > - { > - if (dump_enabled_p ()) > - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, > - "reduction: not commutative/associative: "); > - return NULL; > - } > - > - if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS) > - { > - if (code != COND_EXPR) > - { > - if (dump_enabled_p ()) > - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, > - "reduction: not binary operation: "); > - > - return NULL; > - } > > op3 = gimple_assign_rhs1 (def_stmt); > if (COMPARISON_CLASS_P (op3)) > @@ -2918,30 +2923,35 @@ vect_is_simple_reduction (loop_vec_info > > op1 = gimple_assign_rhs2 (def_stmt); > op2 = gimple_assign_rhs3 (def_stmt); > - > - if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME) > - { > - if (dump_enabled_p ()) > - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, > - "reduction: uses not ssa_names: "); > - > - return NULL; > - } > } > - else > + else if (!commutative_tree_code (code) || !associative_tree_code (code)) > + { > + if (dump_enabled_p ()) > + report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, > + "reduction: not commutative/associative: "); > + return NULL; > + } > + else if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS) > { > op1 = gimple_assign_rhs1 (def_stmt); > op2 = gimple_assign_rhs2 (def_stmt); > + } > + else > + { > + if (dump_enabled_p ()) > + report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, > + "reduction: not handled operation: "); > + return NULL; > + } > > - if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME) > - { > - if (dump_enabled_p ()) > - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, > - "reduction: uses not ssa_names: "); > + if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME) > + { > + if (dump_enabled_p ()) > + report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, > + "reduction: both uses not ssa_names: "); > > - return NULL; > - } > - } > + return NULL; > + } > > type = TREE_TYPE (gimple_assign_lhs (def_stmt)); > if ((TREE_CODE (op1) == SSA_NAME > @@ -3091,7 +3101,7 @@ vect_is_simple_reduction (loop_vec_info > == vect_internal_def > && !is_loop_header_bb_p (gimple_bb (def2))))))) > { > - if (check_reduction && orig_code != MINUS_EXPR) > + if (! nested_in_vect_loop && orig_code != MINUS_EXPR) > { > /* Check if we can swap operands (just for simplicity - so that > the rest of the code can assume that the reduction variable > @@ -3145,7 +3155,8 @@ vect_is_simple_reduction (loop_vec_info > } > > /* Try to find SLP reduction chain. */ > - if (check_reduction && code != COND_EXPR > + if (! nested_in_vect_loop > + && code != COND_EXPR > && vect_is_slp_reduction (loop_info, phi, def_stmt)) > { > if (dump_enabled_p ()) > Index: gcc/testsuite/gcc.dg/vect/pr66623.c > =================================================================== > --- gcc/testsuite/gcc.dg/vect/pr66623.c (revision 0) > +++ gcc/testsuite/gcc.dg/vect/pr66623.c (working copy) > @@ -0,0 +1,86 @@ > +/* { dg-require-effective-target vect_float } */ > + > +#include "tree-vect.h" > + > +extern void abort (void); > + > +#define OP * > +#define INIT 1.0 > + > +float __attribute__((noinline,noclone)) > +foo (float *__restrict__ i) > +{ > + float l = INIT; > + int a; > + int b; > + > + for (a = 0; a < 4; a++) > + for (b = 0; b < 4; b++) > + l = l OP i[b]; > + > + return l; > +} > + > +float __attribute__((noinline,noclone)) > +foo_ref (float *__restrict__ i) > +{ > + float l = INIT; > + > + l = l OP i[0]; > + l = l OP i[1]; > + l = l OP i[2]; > + l = l OP i[3]; > + > + l = l OP i[0]; > + l = l OP i[1]; > + l = l OP i[2]; > + l = l OP i[3]; > + > + l = l OP i[0]; > + l = l OP i[1]; > + l = l OP i[2]; > + l = l OP i[3]; > + > + l = l OP i[0]; > + l = l OP i[1]; > + l = l OP i[2]; > + l = l OP i[3]; > + > + return l; > +} > + > +union u > +{ > + float f; > + unsigned int u; > +}; > + > +int > +main (void) > +{ > + union u res, res2; > + float a[4]; > + > + if (sizeof (float) != sizeof (unsigned int)) > + return 0; > + > + check_vect (); > + > + a[0] = 0.01; > + a[1] = 0.01; > + a[2] = 0.01; > + a[3] = 1.0; > + > + res.f = foo_ref (a); > + > + res2.f = foo (a); > + > + if (res.u != res2.u) > + abort (); > + > + return 0; > +} > + > +/* need -ffast-math to vectorize this loop. */ > +/* ARM NEON passes -ffast-math to these tests, so expect this to fail. */ > +/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail arm_neon_ok } } } */ ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] Fix PR66623 2017-06-09 15:32 ` Christophe Lyon @ 2017-06-09 15:48 ` Richard Biener 2017-06-12 7:37 ` Christophe Lyon 0 siblings, 1 reply; 6+ messages in thread From: Richard Biener @ 2017-06-09 15:48 UTC (permalink / raw) To: gcc-patches, Christophe Lyon, Richard Biener; +Cc: gcc-patches On June 9, 2017 5:32:10 PM GMT+02:00, Christophe Lyon <christophe.lyon@linaro.org> wrote: >On 8 June 2017 at 15:49, Richard Biener <rguenther@suse.de> wrote: >> On Thu, 8 Jun 2017, Richard Biener wrote: >> >>> >>> The following fixes unsafe vectorization of reductions in outer loop >>> vectorization. It combines it with a bit of cleanup in the affected >>> function. >>> >>> Bootstrap and regtest running on x86_64-unknown-linux-gnu. >> >> Re-testing the following variant -- multiple loop-closed PHI nodes >> for the same SSA name happen. >> >> Richard. >> >> 2017-06-08 Richard Biener <rguenther@suse.de> >> >> PR tree-optimization/66623 >> * tree-vect-loop.c (vect_is_simple_reduction): Cleanup, >> refactor check_reduction into two parts, properly computing >> whether we have to check reduction validity for outer loop >> vectorization. >> >> * gcc.dg/vect/pr66623.c: New testcase. >> > >Hi Richard, > >The new testcase fails on armeb* targets: >FAIL: gcc.dg/vect/pr66623.c -flto -ffat-lto-objects execution test >FAIL: gcc.dg/vect/pr66623.c execution test Ah, the xfail probably isn't enough. Can you try a dg-skip or so? >Christophe > > >> Index: gcc/tree-vect-loop.c >> =================================================================== >> --- gcc/tree-vect-loop.c (revision 249007) >> +++ gcc/tree-vect-loop.c (working copy) >> @@ -2727,8 +2727,6 @@ vect_is_simple_reduction (loop_vec_info >> { >> struct loop *loop = (gimple_bb (phi))->loop_father; >> struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info); >> - edge latch_e = loop_latch_edge (loop); >> - tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e); >> gimple *def_stmt, *def1 = NULL, *def2 = NULL, *phi_use_stmt = >NULL; >> enum tree_code orig_code, code; >> tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE; >> @@ -2742,11 +2740,6 @@ vect_is_simple_reduction (loop_vec_info >> *double_reduc = false; >> *v_reduc_type = TREE_CODE_REDUCTION; >> >> - /* Check validity of the reduction only for the innermost loop. >*/ >> - bool check_reduction = ! flow_loop_nested_p (vect_loop, loop); >> - gcc_assert ((check_reduction && loop == vect_loop) >> - || (!check_reduction && flow_loop_nested_p (vect_loop, >loop))); >> - >> name = PHI_RESULT (phi); >> /* ??? If there are no uses of the PHI result the inner loop >reduction >> won't be detected as possibly double-reduction by >vectorizable_reduction >> @@ -2775,13 +2768,15 @@ vect_is_simple_reduction (loop_vec_info >> { >> if (dump_enabled_p ()) >> dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, >> - "reduction used in loop.\n"); >> + "reduction value used in loop.\n"); >> return NULL; >> } >> >> phi_use_stmt = use_stmt; >> } >> >> + edge latch_e = loop_latch_edge (loop); >> + tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e); >> if (TREE_CODE (loop_arg) != SSA_NAME) >> { >> if (dump_enabled_p ()) >> @@ -2795,18 +2790,22 @@ vect_is_simple_reduction (loop_vec_info >> } >> >> def_stmt = SSA_NAME_DEF_STMT (loop_arg); >> - if (!def_stmt) >> + if (gimple_nop_p (def_stmt)) >> { >> if (dump_enabled_p ()) >> dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, >> - "reduction: no def_stmt.\n"); >> + "reduction: no def_stmt\n"); >> return NULL; >> } >> >> if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != >GIMPLE_PHI) >> { >> if (dump_enabled_p ()) >> - dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0); >> + { >> + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, >> + "reduction: unhandled reduction operation: >"); >> + dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, >def_stmt, 0); >> + } >> return NULL; >> } >> >> @@ -2822,6 +2821,7 @@ vect_is_simple_reduction (loop_vec_info >> } >> >> nloop_uses = 0; >> + auto_vec<gphi *, 3> lcphis; >> FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name) >> { >> gimple *use_stmt = USE_STMT (use_p); >> @@ -2829,6 +2829,9 @@ vect_is_simple_reduction (loop_vec_info >> continue; >> if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))) >> nloop_uses++; >> + else >> + /* We can have more than one loop-closed PHI. */ >> + lcphis.safe_push (as_a <gphi *> (use_stmt)); >> if (nloop_uses > 1) >> { >> if (dump_enabled_p ()) >> @@ -2873,6 +2876,27 @@ vect_is_simple_reduction (loop_vec_info >> return NULL; >> } >> >> + /* If we are vectorizing an inner reduction we are executing that >> + in the original order only in case we are not dealing with a >> + double reduction. */ >> + bool check_reduction = true; >> + if (flow_loop_nested_p (vect_loop, loop)) >> + { >> + gphi *lcphi; >> + unsigned i; >> + check_reduction = false; >> + FOR_EACH_VEC_ELT (lcphis, i, lcphi) >> + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result >(lcphi)) >> + { >> + gimple *use_stmt = USE_STMT (use_p); >> + if (is_gimple_debug (use_stmt)) >> + continue; >> + if (! flow_bb_inside_loop_p (vect_loop, gimple_bb >(use_stmt))) >> + check_reduction = true; >> + } >> + } >> + >> + bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop); >> code = orig_code = gimple_assign_rhs_code (def_stmt); >> >> /* We can handle "res -= x[i]", which is non-associative by >> @@ -2887,27 +2911,8 @@ vect_is_simple_reduction (loop_vec_info >> >> if (code == COND_EXPR) >> { >> - if (check_reduction) >> + if (! nested_in_vect_loop) >> *v_reduc_type = COND_REDUCTION; >> - } >> - else if (!commutative_tree_code (code) || !associative_tree_code >(code)) >> - { >> - if (dump_enabled_p ()) >> - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, >> - "reduction: not commutative/associative: "); >> - return NULL; >> - } >> - >> - if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS) >> - { >> - if (code != COND_EXPR) >> - { >> - if (dump_enabled_p ()) >> - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, >> - "reduction: not binary operation: "); >> - >> - return NULL; >> - } >> >> op3 = gimple_assign_rhs1 (def_stmt); >> if (COMPARISON_CLASS_P (op3)) >> @@ -2918,30 +2923,35 @@ vect_is_simple_reduction (loop_vec_info >> >> op1 = gimple_assign_rhs2 (def_stmt); >> op2 = gimple_assign_rhs3 (def_stmt); >> - >> - if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != >SSA_NAME) >> - { >> - if (dump_enabled_p ()) >> - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, >> - "reduction: uses not ssa_names: "); >> - >> - return NULL; >> - } >> } >> - else >> + else if (!commutative_tree_code (code) || !associative_tree_code >(code)) >> + { >> + if (dump_enabled_p ()) >> + report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, >> + "reduction: not commutative/associative: "); >> + return NULL; >> + } >> + else if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS) >> { >> op1 = gimple_assign_rhs1 (def_stmt); >> op2 = gimple_assign_rhs2 (def_stmt); >> + } >> + else >> + { >> + if (dump_enabled_p ()) >> + report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, >> + "reduction: not handled operation: "); >> + return NULL; >> + } >> >> - if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != >SSA_NAME) >> - { >> - if (dump_enabled_p ()) >> - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, >> - "reduction: uses not ssa_names: "); >> + if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME) >> + { >> + if (dump_enabled_p ()) >> + report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, >> + "reduction: both uses not ssa_names: "); >> >> - return NULL; >> - } >> - } >> + return NULL; >> + } >> >> type = TREE_TYPE (gimple_assign_lhs (def_stmt)); >> if ((TREE_CODE (op1) == SSA_NAME >> @@ -3091,7 +3101,7 @@ vect_is_simple_reduction (loop_vec_info >> == vect_internal_def >> && !is_loop_header_bb_p (gimple_bb (def2))))))) >> { >> - if (check_reduction && orig_code != MINUS_EXPR) >> + if (! nested_in_vect_loop && orig_code != MINUS_EXPR) >> { >> /* Check if we can swap operands (just for simplicity - so >that >> the rest of the code can assume that the reduction >variable >> @@ -3145,7 +3155,8 @@ vect_is_simple_reduction (loop_vec_info >> } >> >> /* Try to find SLP reduction chain. */ >> - if (check_reduction && code != COND_EXPR >> + if (! nested_in_vect_loop >> + && code != COND_EXPR >> && vect_is_slp_reduction (loop_info, phi, def_stmt)) >> { >> if (dump_enabled_p ()) >> Index: gcc/testsuite/gcc.dg/vect/pr66623.c >> =================================================================== >> --- gcc/testsuite/gcc.dg/vect/pr66623.c (revision 0) >> +++ gcc/testsuite/gcc.dg/vect/pr66623.c (working copy) >> @@ -0,0 +1,86 @@ >> +/* { dg-require-effective-target vect_float } */ >> + >> +#include "tree-vect.h" >> + >> +extern void abort (void); >> + >> +#define OP * >> +#define INIT 1.0 >> + >> +float __attribute__((noinline,noclone)) >> +foo (float *__restrict__ i) >> +{ >> + float l = INIT; >> + int a; >> + int b; >> + >> + for (a = 0; a < 4; a++) >> + for (b = 0; b < 4; b++) >> + l = l OP i[b]; >> + >> + return l; >> +} >> + >> +float __attribute__((noinline,noclone)) >> +foo_ref (float *__restrict__ i) >> +{ >> + float l = INIT; >> + >> + l = l OP i[0]; >> + l = l OP i[1]; >> + l = l OP i[2]; >> + l = l OP i[3]; >> + >> + l = l OP i[0]; >> + l = l OP i[1]; >> + l = l OP i[2]; >> + l = l OP i[3]; >> + >> + l = l OP i[0]; >> + l = l OP i[1]; >> + l = l OP i[2]; >> + l = l OP i[3]; >> + >> + l = l OP i[0]; >> + l = l OP i[1]; >> + l = l OP i[2]; >> + l = l OP i[3]; >> + >> + return l; >> +} >> + >> +union u >> +{ >> + float f; >> + unsigned int u; >> +}; >> + >> +int >> +main (void) >> +{ >> + union u res, res2; >> + float a[4]; >> + >> + if (sizeof (float) != sizeof (unsigned int)) >> + return 0; >> + >> + check_vect (); >> + >> + a[0] = 0.01; >> + a[1] = 0.01; >> + a[2] = 0.01; >> + a[3] = 1.0; >> + >> + res.f = foo_ref (a); >> + >> + res2.f = foo (a); >> + >> + if (res.u != res2.u) >> + abort (); >> + >> + return 0; >> +} >> + >> +/* need -ffast-math to vectorize this loop. */ >> +/* ARM NEON passes -ffast-math to these tests, so expect this to >fail. */ >> +/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { >xfail arm_neon_ok } } } */ ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] Fix PR66623 2017-06-09 15:48 ` Richard Biener @ 2017-06-12 7:37 ` Christophe Lyon 2017-06-12 7:45 ` Richard Biener 0 siblings, 1 reply; 6+ messages in thread From: Christophe Lyon @ 2017-06-12 7:37 UTC (permalink / raw) To: Richard Biener; +Cc: gcc-patches, Richard Biener On 9 June 2017 at 17:48, Richard Biener <richard.guenther@gmail.com> wrote: > On June 9, 2017 5:32:10 PM GMT+02:00, Christophe Lyon <christophe.lyon@linaro.org> wrote: >>On 8 June 2017 at 15:49, Richard Biener <rguenther@suse.de> wrote: >>> On Thu, 8 Jun 2017, Richard Biener wrote: >>> >>>> >>>> The following fixes unsafe vectorization of reductions in outer loop >>>> vectorization. It combines it with a bit of cleanup in the affected >>>> function. >>>> >>>> Bootstrap and regtest running on x86_64-unknown-linux-gnu. >>> >>> Re-testing the following variant -- multiple loop-closed PHI nodes >>> for the same SSA name happen. >>> >>> Richard. >>> >>> 2017-06-08 Richard Biener <rguenther@suse.de> >>> >>> PR tree-optimization/66623 >>> * tree-vect-loop.c (vect_is_simple_reduction): Cleanup, >>> refactor check_reduction into two parts, properly computing >>> whether we have to check reduction validity for outer loop >>> vectorization. >>> >>> * gcc.dg/vect/pr66623.c: New testcase. >>> >> >>Hi Richard, >> >>The new testcase fails on armeb* targets: >>FAIL: gcc.dg/vect/pr66623.c -flto -ffat-lto-objects execution test >>FAIL: gcc.dg/vect/pr66623.c execution test > > Ah, the xfail probably isn't enough. Can you try a dg-skip or so? > I can try, but do we really want to ignore/hide the fact that the generated code in non-functional on arm big-endian? (execution is OK on little-endian) >>Christophe >> >> >>> Index: gcc/tree-vect-loop.c >>> =================================================================== >>> --- gcc/tree-vect-loop.c (revision 249007) >>> +++ gcc/tree-vect-loop.c (working copy) >>> @@ -2727,8 +2727,6 @@ vect_is_simple_reduction (loop_vec_info >>> { >>> struct loop *loop = (gimple_bb (phi))->loop_father; >>> struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info); >>> - edge latch_e = loop_latch_edge (loop); >>> - tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e); >>> gimple *def_stmt, *def1 = NULL, *def2 = NULL, *phi_use_stmt = >>NULL; >>> enum tree_code orig_code, code; >>> tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE; >>> @@ -2742,11 +2740,6 @@ vect_is_simple_reduction (loop_vec_info >>> *double_reduc = false; >>> *v_reduc_type = TREE_CODE_REDUCTION; >>> >>> - /* Check validity of the reduction only for the innermost loop. >>*/ >>> - bool check_reduction = ! flow_loop_nested_p (vect_loop, loop); >>> - gcc_assert ((check_reduction && loop == vect_loop) >>> - || (!check_reduction && flow_loop_nested_p (vect_loop, >>loop))); >>> - >>> name = PHI_RESULT (phi); >>> /* ??? If there are no uses of the PHI result the inner loop >>reduction >>> won't be detected as possibly double-reduction by >>vectorizable_reduction >>> @@ -2775,13 +2768,15 @@ vect_is_simple_reduction (loop_vec_info >>> { >>> if (dump_enabled_p ()) >>> dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, >>> - "reduction used in loop.\n"); >>> + "reduction value used in loop.\n"); >>> return NULL; >>> } >>> >>> phi_use_stmt = use_stmt; >>> } >>> >>> + edge latch_e = loop_latch_edge (loop); >>> + tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e); >>> if (TREE_CODE (loop_arg) != SSA_NAME) >>> { >>> if (dump_enabled_p ()) >>> @@ -2795,18 +2790,22 @@ vect_is_simple_reduction (loop_vec_info >>> } >>> >>> def_stmt = SSA_NAME_DEF_STMT (loop_arg); >>> - if (!def_stmt) >>> + if (gimple_nop_p (def_stmt)) >>> { >>> if (dump_enabled_p ()) >>> dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, >>> - "reduction: no def_stmt.\n"); >>> + "reduction: no def_stmt\n"); >>> return NULL; >>> } >>> >>> if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != >>GIMPLE_PHI) >>> { >>> if (dump_enabled_p ()) >>> - dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0); >>> + { >>> + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, >>> + "reduction: unhandled reduction operation: >>"); >>> + dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, >>def_stmt, 0); >>> + } >>> return NULL; >>> } >>> >>> @@ -2822,6 +2821,7 @@ vect_is_simple_reduction (loop_vec_info >>> } >>> >>> nloop_uses = 0; >>> + auto_vec<gphi *, 3> lcphis; >>> FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name) >>> { >>> gimple *use_stmt = USE_STMT (use_p); >>> @@ -2829,6 +2829,9 @@ vect_is_simple_reduction (loop_vec_info >>> continue; >>> if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))) >>> nloop_uses++; >>> + else >>> + /* We can have more than one loop-closed PHI. */ >>> + lcphis.safe_push (as_a <gphi *> (use_stmt)); >>> if (nloop_uses > 1) >>> { >>> if (dump_enabled_p ()) >>> @@ -2873,6 +2876,27 @@ vect_is_simple_reduction (loop_vec_info >>> return NULL; >>> } >>> >>> + /* If we are vectorizing an inner reduction we are executing that >>> + in the original order only in case we are not dealing with a >>> + double reduction. */ >>> + bool check_reduction = true; >>> + if (flow_loop_nested_p (vect_loop, loop)) >>> + { >>> + gphi *lcphi; >>> + unsigned i; >>> + check_reduction = false; >>> + FOR_EACH_VEC_ELT (lcphis, i, lcphi) >>> + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result >>(lcphi)) >>> + { >>> + gimple *use_stmt = USE_STMT (use_p); >>> + if (is_gimple_debug (use_stmt)) >>> + continue; >>> + if (! flow_bb_inside_loop_p (vect_loop, gimple_bb >>(use_stmt))) >>> + check_reduction = true; >>> + } >>> + } >>> + >>> + bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop); >>> code = orig_code = gimple_assign_rhs_code (def_stmt); >>> >>> /* We can handle "res -= x[i]", which is non-associative by >>> @@ -2887,27 +2911,8 @@ vect_is_simple_reduction (loop_vec_info >>> >>> if (code == COND_EXPR) >>> { >>> - if (check_reduction) >>> + if (! nested_in_vect_loop) >>> *v_reduc_type = COND_REDUCTION; >>> - } >>> - else if (!commutative_tree_code (code) || !associative_tree_code >>(code)) >>> - { >>> - if (dump_enabled_p ()) >>> - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, >>> - "reduction: not commutative/associative: "); >>> - return NULL; >>> - } >>> - >>> - if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS) >>> - { >>> - if (code != COND_EXPR) >>> - { >>> - if (dump_enabled_p ()) >>> - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, >>> - "reduction: not binary operation: "); >>> - >>> - return NULL; >>> - } >>> >>> op3 = gimple_assign_rhs1 (def_stmt); >>> if (COMPARISON_CLASS_P (op3)) >>> @@ -2918,30 +2923,35 @@ vect_is_simple_reduction (loop_vec_info >>> >>> op1 = gimple_assign_rhs2 (def_stmt); >>> op2 = gimple_assign_rhs3 (def_stmt); >>> - >>> - if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != >>SSA_NAME) >>> - { >>> - if (dump_enabled_p ()) >>> - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, >>> - "reduction: uses not ssa_names: "); >>> - >>> - return NULL; >>> - } >>> } >>> - else >>> + else if (!commutative_tree_code (code) || !associative_tree_code >>(code)) >>> + { >>> + if (dump_enabled_p ()) >>> + report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, >>> + "reduction: not commutative/associative: "); >>> + return NULL; >>> + } >>> + else if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS) >>> { >>> op1 = gimple_assign_rhs1 (def_stmt); >>> op2 = gimple_assign_rhs2 (def_stmt); >>> + } >>> + else >>> + { >>> + if (dump_enabled_p ()) >>> + report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, >>> + "reduction: not handled operation: "); >>> + return NULL; >>> + } >>> >>> - if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != >>SSA_NAME) >>> - { >>> - if (dump_enabled_p ()) >>> - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, >>> - "reduction: uses not ssa_names: "); >>> + if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME) >>> + { >>> + if (dump_enabled_p ()) >>> + report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, >>> + "reduction: both uses not ssa_names: "); >>> >>> - return NULL; >>> - } >>> - } >>> + return NULL; >>> + } >>> >>> type = TREE_TYPE (gimple_assign_lhs (def_stmt)); >>> if ((TREE_CODE (op1) == SSA_NAME >>> @@ -3091,7 +3101,7 @@ vect_is_simple_reduction (loop_vec_info >>> == vect_internal_def >>> && !is_loop_header_bb_p (gimple_bb (def2))))))) >>> { >>> - if (check_reduction && orig_code != MINUS_EXPR) >>> + if (! nested_in_vect_loop && orig_code != MINUS_EXPR) >>> { >>> /* Check if we can swap operands (just for simplicity - so >>that >>> the rest of the code can assume that the reduction >>variable >>> @@ -3145,7 +3155,8 @@ vect_is_simple_reduction (loop_vec_info >>> } >>> >>> /* Try to find SLP reduction chain. */ >>> - if (check_reduction && code != COND_EXPR >>> + if (! nested_in_vect_loop >>> + && code != COND_EXPR >>> && vect_is_slp_reduction (loop_info, phi, def_stmt)) >>> { >>> if (dump_enabled_p ()) >>> Index: gcc/testsuite/gcc.dg/vect/pr66623.c >>> =================================================================== >>> --- gcc/testsuite/gcc.dg/vect/pr66623.c (revision 0) >>> +++ gcc/testsuite/gcc.dg/vect/pr66623.c (working copy) >>> @@ -0,0 +1,86 @@ >>> +/* { dg-require-effective-target vect_float } */ >>> + >>> +#include "tree-vect.h" >>> + >>> +extern void abort (void); >>> + >>> +#define OP * >>> +#define INIT 1.0 >>> + >>> +float __attribute__((noinline,noclone)) >>> +foo (float *__restrict__ i) >>> +{ >>> + float l = INIT; >>> + int a; >>> + int b; >>> + >>> + for (a = 0; a < 4; a++) >>> + for (b = 0; b < 4; b++) >>> + l = l OP i[b]; >>> + >>> + return l; >>> +} >>> + >>> +float __attribute__((noinline,noclone)) >>> +foo_ref (float *__restrict__ i) >>> +{ >>> + float l = INIT; >>> + >>> + l = l OP i[0]; >>> + l = l OP i[1]; >>> + l = l OP i[2]; >>> + l = l OP i[3]; >>> + >>> + l = l OP i[0]; >>> + l = l OP i[1]; >>> + l = l OP i[2]; >>> + l = l OP i[3]; >>> + >>> + l = l OP i[0]; >>> + l = l OP i[1]; >>> + l = l OP i[2]; >>> + l = l OP i[3]; >>> + >>> + l = l OP i[0]; >>> + l = l OP i[1]; >>> + l = l OP i[2]; >>> + l = l OP i[3]; >>> + >>> + return l; >>> +} >>> + >>> +union u >>> +{ >>> + float f; >>> + unsigned int u; >>> +}; >>> + >>> +int >>> +main (void) >>> +{ >>> + union u res, res2; >>> + float a[4]; >>> + >>> + if (sizeof (float) != sizeof (unsigned int)) >>> + return 0; >>> + >>> + check_vect (); >>> + >>> + a[0] = 0.01; >>> + a[1] = 0.01; >>> + a[2] = 0.01; >>> + a[3] = 1.0; >>> + >>> + res.f = foo_ref (a); >>> + >>> + res2.f = foo (a); >>> + >>> + if (res.u != res2.u) >>> + abort (); >>> + >>> + return 0; >>> +} >>> + >>> +/* need -ffast-math to vectorize this loop. */ >>> +/* ARM NEON passes -ffast-math to these tests, so expect this to >>fail. */ >>> +/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { >>xfail arm_neon_ok } } } */ > ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] Fix PR66623 2017-06-12 7:37 ` Christophe Lyon @ 2017-06-12 7:45 ` Richard Biener 0 siblings, 0 replies; 6+ messages in thread From: Richard Biener @ 2017-06-12 7:45 UTC (permalink / raw) To: Christophe Lyon; +Cc: Richard Biener, gcc-patches On Mon, 12 Jun 2017, Christophe Lyon wrote: > On 9 June 2017 at 17:48, Richard Biener <richard.guenther@gmail.com> wrote: > > On June 9, 2017 5:32:10 PM GMT+02:00, Christophe Lyon <christophe.lyon@linaro.org> wrote: > >>On 8 June 2017 at 15:49, Richard Biener <rguenther@suse.de> wrote: > >>> On Thu, 8 Jun 2017, Richard Biener wrote: > >>> > >>>> > >>>> The following fixes unsafe vectorization of reductions in outer loop > >>>> vectorization. It combines it with a bit of cleanup in the affected > >>>> function. > >>>> > >>>> Bootstrap and regtest running on x86_64-unknown-linux-gnu. > >>> > >>> Re-testing the following variant -- multiple loop-closed PHI nodes > >>> for the same SSA name happen. > >>> > >>> Richard. > >>> > >>> 2017-06-08 Richard Biener <rguenther@suse.de> > >>> > >>> PR tree-optimization/66623 > >>> * tree-vect-loop.c (vect_is_simple_reduction): Cleanup, > >>> refactor check_reduction into two parts, properly computing > >>> whether we have to check reduction validity for outer loop > >>> vectorization. > >>> > >>> * gcc.dg/vect/pr66623.c: New testcase. > >>> > >> > >>Hi Richard, > >> > >>The new testcase fails on armeb* targets: > >>FAIL: gcc.dg/vect/pr66623.c -flto -ffat-lto-objects execution test > >>FAIL: gcc.dg/vect/pr66623.c execution test > > > > Ah, the xfail probably isn't enough. Can you try a dg-skip or so? > > > > I can try, but do we really want to ignore/hide the fact that the generated > code in non-functional on arm big-endian? (execution is OK on little-endian) The code is expected to "miscompile" with -ffast-math and from other testcases I see that (at least in the vectorizer testsuite) NEON always enables that. So - can you check if adding { dg-additional-options "-fno-fast-math" } makes the testcase pass? Or investigate somewhat what the actual issue is? That is, remove the XFAIL (because with dg-do run we really need to not vectorize this)? I also recall that vectorization on big-endian on arm is "broken" but that might have changed. Richard. > > >>Christophe > >> > >> > >>> Index: gcc/tree-vect-loop.c > >>> =================================================================== > >>> --- gcc/tree-vect-loop.c (revision 249007) > >>> +++ gcc/tree-vect-loop.c (working copy) > >>> @@ -2727,8 +2727,6 @@ vect_is_simple_reduction (loop_vec_info > >>> { > >>> struct loop *loop = (gimple_bb (phi))->loop_father; > >>> struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info); > >>> - edge latch_e = loop_latch_edge (loop); > >>> - tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e); > >>> gimple *def_stmt, *def1 = NULL, *def2 = NULL, *phi_use_stmt = > >>NULL; > >>> enum tree_code orig_code, code; > >>> tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE; > >>> @@ -2742,11 +2740,6 @@ vect_is_simple_reduction (loop_vec_info > >>> *double_reduc = false; > >>> *v_reduc_type = TREE_CODE_REDUCTION; > >>> > >>> - /* Check validity of the reduction only for the innermost loop. > >>*/ > >>> - bool check_reduction = ! flow_loop_nested_p (vect_loop, loop); > >>> - gcc_assert ((check_reduction && loop == vect_loop) > >>> - || (!check_reduction && flow_loop_nested_p (vect_loop, > >>loop))); > >>> - > >>> name = PHI_RESULT (phi); > >>> /* ??? If there are no uses of the PHI result the inner loop > >>reduction > >>> won't be detected as possibly double-reduction by > >>vectorizable_reduction > >>> @@ -2775,13 +2768,15 @@ vect_is_simple_reduction (loop_vec_info > >>> { > >>> if (dump_enabled_p ()) > >>> dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > >>> - "reduction used in loop.\n"); > >>> + "reduction value used in loop.\n"); > >>> return NULL; > >>> } > >>> > >>> phi_use_stmt = use_stmt; > >>> } > >>> > >>> + edge latch_e = loop_latch_edge (loop); > >>> + tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e); > >>> if (TREE_CODE (loop_arg) != SSA_NAME) > >>> { > >>> if (dump_enabled_p ()) > >>> @@ -2795,18 +2790,22 @@ vect_is_simple_reduction (loop_vec_info > >>> } > >>> > >>> def_stmt = SSA_NAME_DEF_STMT (loop_arg); > >>> - if (!def_stmt) > >>> + if (gimple_nop_p (def_stmt)) > >>> { > >>> if (dump_enabled_p ()) > >>> dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > >>> - "reduction: no def_stmt.\n"); > >>> + "reduction: no def_stmt\n"); > >>> return NULL; > >>> } > >>> > >>> if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != > >>GIMPLE_PHI) > >>> { > >>> if (dump_enabled_p ()) > >>> - dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0); > >>> + { > >>> + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > >>> + "reduction: unhandled reduction operation: > >>"); > >>> + dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, > >>def_stmt, 0); > >>> + } > >>> return NULL; > >>> } > >>> > >>> @@ -2822,6 +2821,7 @@ vect_is_simple_reduction (loop_vec_info > >>> } > >>> > >>> nloop_uses = 0; > >>> + auto_vec<gphi *, 3> lcphis; > >>> FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name) > >>> { > >>> gimple *use_stmt = USE_STMT (use_p); > >>> @@ -2829,6 +2829,9 @@ vect_is_simple_reduction (loop_vec_info > >>> continue; > >>> if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))) > >>> nloop_uses++; > >>> + else > >>> + /* We can have more than one loop-closed PHI. */ > >>> + lcphis.safe_push (as_a <gphi *> (use_stmt)); > >>> if (nloop_uses > 1) > >>> { > >>> if (dump_enabled_p ()) > >>> @@ -2873,6 +2876,27 @@ vect_is_simple_reduction (loop_vec_info > >>> return NULL; > >>> } > >>> > >>> + /* If we are vectorizing an inner reduction we are executing that > >>> + in the original order only in case we are not dealing with a > >>> + double reduction. */ > >>> + bool check_reduction = true; > >>> + if (flow_loop_nested_p (vect_loop, loop)) > >>> + { > >>> + gphi *lcphi; > >>> + unsigned i; > >>> + check_reduction = false; > >>> + FOR_EACH_VEC_ELT (lcphis, i, lcphi) > >>> + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result > >>(lcphi)) > >>> + { > >>> + gimple *use_stmt = USE_STMT (use_p); > >>> + if (is_gimple_debug (use_stmt)) > >>> + continue; > >>> + if (! flow_bb_inside_loop_p (vect_loop, gimple_bb > >>(use_stmt))) > >>> + check_reduction = true; > >>> + } > >>> + } > >>> + > >>> + bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop); > >>> code = orig_code = gimple_assign_rhs_code (def_stmt); > >>> > >>> /* We can handle "res -= x[i]", which is non-associative by > >>> @@ -2887,27 +2911,8 @@ vect_is_simple_reduction (loop_vec_info > >>> > >>> if (code == COND_EXPR) > >>> { > >>> - if (check_reduction) > >>> + if (! nested_in_vect_loop) > >>> *v_reduc_type = COND_REDUCTION; > >>> - } > >>> - else if (!commutative_tree_code (code) || !associative_tree_code > >>(code)) > >>> - { > >>> - if (dump_enabled_p ()) > >>> - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, > >>> - "reduction: not commutative/associative: "); > >>> - return NULL; > >>> - } > >>> - > >>> - if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS) > >>> - { > >>> - if (code != COND_EXPR) > >>> - { > >>> - if (dump_enabled_p ()) > >>> - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, > >>> - "reduction: not binary operation: "); > >>> - > >>> - return NULL; > >>> - } > >>> > >>> op3 = gimple_assign_rhs1 (def_stmt); > >>> if (COMPARISON_CLASS_P (op3)) > >>> @@ -2918,30 +2923,35 @@ vect_is_simple_reduction (loop_vec_info > >>> > >>> op1 = gimple_assign_rhs2 (def_stmt); > >>> op2 = gimple_assign_rhs3 (def_stmt); > >>> - > >>> - if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != > >>SSA_NAME) > >>> - { > >>> - if (dump_enabled_p ()) > >>> - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, > >>> - "reduction: uses not ssa_names: "); > >>> - > >>> - return NULL; > >>> - } > >>> } > >>> - else > >>> + else if (!commutative_tree_code (code) || !associative_tree_code > >>(code)) > >>> + { > >>> + if (dump_enabled_p ()) > >>> + report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, > >>> + "reduction: not commutative/associative: "); > >>> + return NULL; > >>> + } > >>> + else if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS) > >>> { > >>> op1 = gimple_assign_rhs1 (def_stmt); > >>> op2 = gimple_assign_rhs2 (def_stmt); > >>> + } > >>> + else > >>> + { > >>> + if (dump_enabled_p ()) > >>> + report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, > >>> + "reduction: not handled operation: "); > >>> + return NULL; > >>> + } > >>> > >>> - if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != > >>SSA_NAME) > >>> - { > >>> - if (dump_enabled_p ()) > >>> - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, > >>> - "reduction: uses not ssa_names: "); > >>> + if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME) > >>> + { > >>> + if (dump_enabled_p ()) > >>> + report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, > >>> + "reduction: both uses not ssa_names: "); > >>> > >>> - return NULL; > >>> - } > >>> - } > >>> + return NULL; > >>> + } > >>> > >>> type = TREE_TYPE (gimple_assign_lhs (def_stmt)); > >>> if ((TREE_CODE (op1) == SSA_NAME > >>> @@ -3091,7 +3101,7 @@ vect_is_simple_reduction (loop_vec_info > >>> == vect_internal_def > >>> && !is_loop_header_bb_p (gimple_bb (def2))))))) > >>> { > >>> - if (check_reduction && orig_code != MINUS_EXPR) > >>> + if (! nested_in_vect_loop && orig_code != MINUS_EXPR) > >>> { > >>> /* Check if we can swap operands (just for simplicity - so > >>that > >>> the rest of the code can assume that the reduction > >>variable > >>> @@ -3145,7 +3155,8 @@ vect_is_simple_reduction (loop_vec_info > >>> } > >>> > >>> /* Try to find SLP reduction chain. */ > >>> - if (check_reduction && code != COND_EXPR > >>> + if (! nested_in_vect_loop > >>> + && code != COND_EXPR > >>> && vect_is_slp_reduction (loop_info, phi, def_stmt)) > >>> { > >>> if (dump_enabled_p ()) > >>> Index: gcc/testsuite/gcc.dg/vect/pr66623.c > >>> =================================================================== > >>> --- gcc/testsuite/gcc.dg/vect/pr66623.c (revision 0) > >>> +++ gcc/testsuite/gcc.dg/vect/pr66623.c (working copy) > >>> @@ -0,0 +1,86 @@ > >>> +/* { dg-require-effective-target vect_float } */ > >>> + > >>> +#include "tree-vect.h" > >>> + > >>> +extern void abort (void); > >>> + > >>> +#define OP * > >>> +#define INIT 1.0 > >>> + > >>> +float __attribute__((noinline,noclone)) > >>> +foo (float *__restrict__ i) > >>> +{ > >>> + float l = INIT; > >>> + int a; > >>> + int b; > >>> + > >>> + for (a = 0; a < 4; a++) > >>> + for (b = 0; b < 4; b++) > >>> + l = l OP i[b]; > >>> + > >>> + return l; > >>> +} > >>> + > >>> +float __attribute__((noinline,noclone)) > >>> +foo_ref (float *__restrict__ i) > >>> +{ > >>> + float l = INIT; > >>> + > >>> + l = l OP i[0]; > >>> + l = l OP i[1]; > >>> + l = l OP i[2]; > >>> + l = l OP i[3]; > >>> + > >>> + l = l OP i[0]; > >>> + l = l OP i[1]; > >>> + l = l OP i[2]; > >>> + l = l OP i[3]; > >>> + > >>> + l = l OP i[0]; > >>> + l = l OP i[1]; > >>> + l = l OP i[2]; > >>> + l = l OP i[3]; > >>> + > >>> + l = l OP i[0]; > >>> + l = l OP i[1]; > >>> + l = l OP i[2]; > >>> + l = l OP i[3]; > >>> + > >>> + return l; > >>> +} > >>> + > >>> +union u > >>> +{ > >>> + float f; > >>> + unsigned int u; > >>> +}; > >>> + > >>> +int > >>> +main (void) > >>> +{ > >>> + union u res, res2; > >>> + float a[4]; > >>> + > >>> + if (sizeof (float) != sizeof (unsigned int)) > >>> + return 0; > >>> + > >>> + check_vect (); > >>> + > >>> + a[0] = 0.01; > >>> + a[1] = 0.01; > >>> + a[2] = 0.01; > >>> + a[3] = 1.0; > >>> + > >>> + res.f = foo_ref (a); > >>> + > >>> + res2.f = foo (a); > >>> + > >>> + if (res.u != res2.u) > >>> + abort (); > >>> + > >>> + return 0; > >>> +} > >>> + > >>> +/* need -ffast-math to vectorize this loop. */ > >>> +/* ARM NEON passes -ffast-math to these tests, so expect this to > >>fail. */ > >>> +/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { > >>xfail arm_neon_ok } } } */ > > > > -- Richard Biener <rguenther@suse.de> SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg) ^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2017-06-12 7:45 UTC | newest] Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2017-06-08 9:18 [PATCH] Fix PR66623 Richard Biener 2017-06-08 13:49 ` Richard Biener 2017-06-09 15:32 ` Christophe Lyon 2017-06-09 15:48 ` Richard Biener 2017-06-12 7:37 ` Christophe Lyon 2017-06-12 7:45 ` Richard Biener
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).