* [PATCH] Fix PR66623
@ 2017-06-08 9:18 Richard Biener
2017-06-08 13:49 ` Richard Biener
0 siblings, 1 reply; 6+ messages in thread
From: Richard Biener @ 2017-06-08 9:18 UTC (permalink / raw)
To: gcc-patches
The following fixes unsafe vectorization of reductions in outer loop
vectorization. It combines it with a bit of cleanup in the affected
function.
Bootstrap and regtest running on x86_64-unknown-linux-gnu.
Richard.
2017-06-08 Richard Biener <rguenther@suse.de>
PR tree-optimization/66623
* tree-vect-loop.c (vect_is_simple_reduction): Cleanup,
refactor check_reduction into two parts, properly computing
whether we have to check reduction validity for outer loop
vectorization.
* gcc.dg/vect/pr66623.c: New testcase.
Index: gcc/tree-vect-loop.c
===================================================================
--- gcc/tree-vect-loop.c (revision 249003)
+++ gcc/tree-vect-loop.c (working copy)
@@ -2727,8 +2727,6 @@ vect_is_simple_reduction (loop_vec_info
{
struct loop *loop = (gimple_bb (phi))->loop_father;
struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
- edge latch_e = loop_latch_edge (loop);
- tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
gimple *def_stmt, *def1 = NULL, *def2 = NULL, *phi_use_stmt = NULL;
enum tree_code orig_code, code;
tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
@@ -2742,11 +2740,6 @@ vect_is_simple_reduction (loop_vec_info
*double_reduc = false;
*v_reduc_type = TREE_CODE_REDUCTION;
- /* Check validity of the reduction only for the innermost loop. */
- bool check_reduction = ! flow_loop_nested_p (vect_loop, loop);
- gcc_assert ((check_reduction && loop == vect_loop)
- || (!check_reduction && flow_loop_nested_p (vect_loop, loop)));
-
name = PHI_RESULT (phi);
/* ??? If there are no uses of the PHI result the inner loop reduction
won't be detected as possibly double-reduction by vectorizable_reduction
@@ -2775,13 +2768,15 @@ vect_is_simple_reduction (loop_vec_info
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "reduction used in loop.\n");
+ "reduction value used in loop.\n");
return NULL;
}
phi_use_stmt = use_stmt;
}
+ edge latch_e = loop_latch_edge (loop);
+ tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
if (TREE_CODE (loop_arg) != SSA_NAME)
{
if (dump_enabled_p ())
@@ -2795,18 +2790,22 @@ vect_is_simple_reduction (loop_vec_info
}
def_stmt = SSA_NAME_DEF_STMT (loop_arg);
- if (!def_stmt)
+ if (gimple_nop_p (def_stmt))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "reduction: no def_stmt.\n");
+ "reduction: no def_stmt\n");
return NULL;
}
if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != GIMPLE_PHI)
{
if (dump_enabled_p ())
- dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
+ {
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "reduction: unhandled reduction operation: ");
+ dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def_stmt, 0);
+ }
return NULL;
}
@@ -2822,6 +2821,7 @@ vect_is_simple_reduction (loop_vec_info
}
nloop_uses = 0;
+ gphi *lcphi = NULL;
FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
{
gimple *use_stmt = USE_STMT (use_p);
@@ -2829,6 +2829,11 @@ vect_is_simple_reduction (loop_vec_info
continue;
if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
nloop_uses++;
+ else
+ {
+ gcc_assert (!lcphi);
+ lcphi = as_a <gphi *> (use_stmt);
+ }
if (nloop_uses > 1)
{
if (dump_enabled_p ())
@@ -2873,6 +2878,24 @@ vect_is_simple_reduction (loop_vec_info
return NULL;
}
+ /* If we are vectorizing an inner reduction we are executing that
+ in the original order only in case we are not dealing with a
+ double reduction. */
+ bool check_reduction = true;
+ if (flow_loop_nested_p (vect_loop, loop))
+ {
+ check_reduction = false;
+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result (lcphi))
+ {
+ gimple *use_stmt = USE_STMT (use_p);
+ if (is_gimple_debug (use_stmt))
+ continue;
+ if (! flow_bb_inside_loop_p (vect_loop, gimple_bb (use_stmt)))
+ check_reduction = true;
+ }
+ }
+
+ bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop);
code = orig_code = gimple_assign_rhs_code (def_stmt);
/* We can handle "res -= x[i]", which is non-associative by
@@ -2887,27 +2910,8 @@ vect_is_simple_reduction (loop_vec_info
if (code == COND_EXPR)
{
- if (check_reduction)
+ if (! nested_in_vect_loop)
*v_reduc_type = COND_REDUCTION;
- }
- else if (!commutative_tree_code (code) || !associative_tree_code (code))
- {
- if (dump_enabled_p ())
- report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
- "reduction: not commutative/associative: ");
- return NULL;
- }
-
- if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
- {
- if (code != COND_EXPR)
- {
- if (dump_enabled_p ())
- report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
- "reduction: not binary operation: ");
-
- return NULL;
- }
op3 = gimple_assign_rhs1 (def_stmt);
if (COMPARISON_CLASS_P (op3))
@@ -2918,30 +2922,35 @@ vect_is_simple_reduction (loop_vec_info
op1 = gimple_assign_rhs2 (def_stmt);
op2 = gimple_assign_rhs3 (def_stmt);
-
- if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
- {
- if (dump_enabled_p ())
- report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
- "reduction: uses not ssa_names: ");
-
- return NULL;
- }
}
- else
+ else if (!commutative_tree_code (code) || !associative_tree_code (code))
+ {
+ if (dump_enabled_p ())
+ report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
+ "reduction: not commutative/associative: ");
+ return NULL;
+ }
+ else if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
{
op1 = gimple_assign_rhs1 (def_stmt);
op2 = gimple_assign_rhs2 (def_stmt);
+ }
+ else
+ {
+ if (dump_enabled_p ())
+ report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
+ "reduction: not handled operation: ");
+ return NULL;
+ }
- if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
- {
- if (dump_enabled_p ())
- report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
- "reduction: uses not ssa_names: ");
+ if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
+ {
+ if (dump_enabled_p ())
+ report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
+ "reduction: both uses not ssa_names: ");
- return NULL;
- }
- }
+ return NULL;
+ }
type = TREE_TYPE (gimple_assign_lhs (def_stmt));
if ((TREE_CODE (op1) == SSA_NAME
@@ -3091,7 +3100,7 @@ vect_is_simple_reduction (loop_vec_info
== vect_internal_def
&& !is_loop_header_bb_p (gimple_bb (def2)))))))
{
- if (check_reduction && orig_code != MINUS_EXPR)
+ if (! nested_in_vect_loop && orig_code != MINUS_EXPR)
{
/* Check if we can swap operands (just for simplicity - so that
the rest of the code can assume that the reduction variable
@@ -3145,7 +3154,8 @@ vect_is_simple_reduction (loop_vec_info
}
/* Try to find SLP reduction chain. */
- if (check_reduction && code != COND_EXPR
+ if (! nested_in_vect_loop
+ && code != COND_EXPR
&& vect_is_slp_reduction (loop_info, phi, def_stmt))
{
if (dump_enabled_p ())
Index: gcc/testsuite/gcc.dg/vect/pr66623.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/pr66623.c (nonexistent)
+++ gcc/testsuite/gcc.dg/vect/pr66623.c (working copy)
@@ -0,0 +1,86 @@
+/* { dg-require-effective-target vect_float } */
+
+#include "tree-vect.h"
+
+extern void abort (void);
+
+#define OP *
+#define INIT 1.0
+
+float __attribute__((noinline,noclone))
+foo (float *__restrict__ i)
+{
+ float l = INIT;
+ int a;
+ int b;
+
+ for (a = 0; a < 4; a++)
+ for (b = 0; b < 4; b++)
+ l = l OP i[b];
+
+ return l;
+}
+
+float __attribute__((noinline,noclone))
+foo_ref (float *__restrict__ i)
+{
+ float l = INIT;
+
+ l = l OP i[0];
+ l = l OP i[1];
+ l = l OP i[2];
+ l = l OP i[3];
+
+ l = l OP i[0];
+ l = l OP i[1];
+ l = l OP i[2];
+ l = l OP i[3];
+
+ l = l OP i[0];
+ l = l OP i[1];
+ l = l OP i[2];
+ l = l OP i[3];
+
+ l = l OP i[0];
+ l = l OP i[1];
+ l = l OP i[2];
+ l = l OP i[3];
+
+ return l;
+}
+
+union u
+{
+ float f;
+ unsigned int u;
+};
+
+int
+main (void)
+{
+ union u res, res2;
+ float a[4];
+
+ if (sizeof (float) != sizeof (unsigned int))
+ return 0;
+
+ check_vect ();
+
+ a[0] = 0.01;
+ a[1] = 0.01;
+ a[2] = 0.01;
+ a[3] = 1.0;
+
+ res.f = foo_ref (a);
+
+ res2.f = foo (a);
+
+ if (res.u != res2.u)
+ abort ();
+
+ return 0;
+}
+
+/* need -ffast-math to vectorize this loop. */
+/* ARM NEON passes -ffast-math to these tests, so expect this to fail. */
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail arm_neon_ok } } } */
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] Fix PR66623
2017-06-08 9:18 [PATCH] Fix PR66623 Richard Biener
@ 2017-06-08 13:49 ` Richard Biener
2017-06-09 15:32 ` Christophe Lyon
0 siblings, 1 reply; 6+ messages in thread
From: Richard Biener @ 2017-06-08 13:49 UTC (permalink / raw)
To: gcc-patches
On Thu, 8 Jun 2017, Richard Biener wrote:
>
> The following fixes unsafe vectorization of reductions in outer loop
> vectorization. It combines it with a bit of cleanup in the affected
> function.
>
> Bootstrap and regtest running on x86_64-unknown-linux-gnu.
Re-testing the following variant -- multiple loop-closed PHI nodes
for the same SSA name happen.
Richard.
2017-06-08 Richard Biener <rguenther@suse.de>
PR tree-optimization/66623
* tree-vect-loop.c (vect_is_simple_reduction): Cleanup,
refactor check_reduction into two parts, properly computing
whether we have to check reduction validity for outer loop
vectorization.
* gcc.dg/vect/pr66623.c: New testcase.
Index: gcc/tree-vect-loop.c
===================================================================
--- gcc/tree-vect-loop.c (revision 249007)
+++ gcc/tree-vect-loop.c (working copy)
@@ -2727,8 +2727,6 @@ vect_is_simple_reduction (loop_vec_info
{
struct loop *loop = (gimple_bb (phi))->loop_father;
struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
- edge latch_e = loop_latch_edge (loop);
- tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
gimple *def_stmt, *def1 = NULL, *def2 = NULL, *phi_use_stmt = NULL;
enum tree_code orig_code, code;
tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
@@ -2742,11 +2740,6 @@ vect_is_simple_reduction (loop_vec_info
*double_reduc = false;
*v_reduc_type = TREE_CODE_REDUCTION;
- /* Check validity of the reduction only for the innermost loop. */
- bool check_reduction = ! flow_loop_nested_p (vect_loop, loop);
- gcc_assert ((check_reduction && loop == vect_loop)
- || (!check_reduction && flow_loop_nested_p (vect_loop, loop)));
-
name = PHI_RESULT (phi);
/* ??? If there are no uses of the PHI result the inner loop reduction
won't be detected as possibly double-reduction by vectorizable_reduction
@@ -2775,13 +2768,15 @@ vect_is_simple_reduction (loop_vec_info
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "reduction used in loop.\n");
+ "reduction value used in loop.\n");
return NULL;
}
phi_use_stmt = use_stmt;
}
+ edge latch_e = loop_latch_edge (loop);
+ tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
if (TREE_CODE (loop_arg) != SSA_NAME)
{
if (dump_enabled_p ())
@@ -2795,18 +2790,22 @@ vect_is_simple_reduction (loop_vec_info
}
def_stmt = SSA_NAME_DEF_STMT (loop_arg);
- if (!def_stmt)
+ if (gimple_nop_p (def_stmt))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "reduction: no def_stmt.\n");
+ "reduction: no def_stmt\n");
return NULL;
}
if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != GIMPLE_PHI)
{
if (dump_enabled_p ())
- dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
+ {
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "reduction: unhandled reduction operation: ");
+ dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def_stmt, 0);
+ }
return NULL;
}
@@ -2822,6 +2821,7 @@ vect_is_simple_reduction (loop_vec_info
}
nloop_uses = 0;
+ auto_vec<gphi *, 3> lcphis;
FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
{
gimple *use_stmt = USE_STMT (use_p);
@@ -2829,6 +2829,9 @@ vect_is_simple_reduction (loop_vec_info
continue;
if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
nloop_uses++;
+ else
+ /* We can have more than one loop-closed PHI. */
+ lcphis.safe_push (as_a <gphi *> (use_stmt));
if (nloop_uses > 1)
{
if (dump_enabled_p ())
@@ -2873,6 +2876,27 @@ vect_is_simple_reduction (loop_vec_info
return NULL;
}
+ /* If we are vectorizing an inner reduction we are executing that
+ in the original order only in case we are not dealing with a
+ double reduction. */
+ bool check_reduction = true;
+ if (flow_loop_nested_p (vect_loop, loop))
+ {
+ gphi *lcphi;
+ unsigned i;
+ check_reduction = false;
+ FOR_EACH_VEC_ELT (lcphis, i, lcphi)
+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result (lcphi))
+ {
+ gimple *use_stmt = USE_STMT (use_p);
+ if (is_gimple_debug (use_stmt))
+ continue;
+ if (! flow_bb_inside_loop_p (vect_loop, gimple_bb (use_stmt)))
+ check_reduction = true;
+ }
+ }
+
+ bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop);
code = orig_code = gimple_assign_rhs_code (def_stmt);
/* We can handle "res -= x[i]", which is non-associative by
@@ -2887,27 +2911,8 @@ vect_is_simple_reduction (loop_vec_info
if (code == COND_EXPR)
{
- if (check_reduction)
+ if (! nested_in_vect_loop)
*v_reduc_type = COND_REDUCTION;
- }
- else if (!commutative_tree_code (code) || !associative_tree_code (code))
- {
- if (dump_enabled_p ())
- report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
- "reduction: not commutative/associative: ");
- return NULL;
- }
-
- if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
- {
- if (code != COND_EXPR)
- {
- if (dump_enabled_p ())
- report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
- "reduction: not binary operation: ");
-
- return NULL;
- }
op3 = gimple_assign_rhs1 (def_stmt);
if (COMPARISON_CLASS_P (op3))
@@ -2918,30 +2923,35 @@ vect_is_simple_reduction (loop_vec_info
op1 = gimple_assign_rhs2 (def_stmt);
op2 = gimple_assign_rhs3 (def_stmt);
-
- if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
- {
- if (dump_enabled_p ())
- report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
- "reduction: uses not ssa_names: ");
-
- return NULL;
- }
}
- else
+ else if (!commutative_tree_code (code) || !associative_tree_code (code))
+ {
+ if (dump_enabled_p ())
+ report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
+ "reduction: not commutative/associative: ");
+ return NULL;
+ }
+ else if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
{
op1 = gimple_assign_rhs1 (def_stmt);
op2 = gimple_assign_rhs2 (def_stmt);
+ }
+ else
+ {
+ if (dump_enabled_p ())
+ report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
+ "reduction: not handled operation: ");
+ return NULL;
+ }
- if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
- {
- if (dump_enabled_p ())
- report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
- "reduction: uses not ssa_names: ");
+ if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
+ {
+ if (dump_enabled_p ())
+ report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
+ "reduction: both uses not ssa_names: ");
- return NULL;
- }
- }
+ return NULL;
+ }
type = TREE_TYPE (gimple_assign_lhs (def_stmt));
if ((TREE_CODE (op1) == SSA_NAME
@@ -3091,7 +3101,7 @@ vect_is_simple_reduction (loop_vec_info
== vect_internal_def
&& !is_loop_header_bb_p (gimple_bb (def2)))))))
{
- if (check_reduction && orig_code != MINUS_EXPR)
+ if (! nested_in_vect_loop && orig_code != MINUS_EXPR)
{
/* Check if we can swap operands (just for simplicity - so that
the rest of the code can assume that the reduction variable
@@ -3145,7 +3155,8 @@ vect_is_simple_reduction (loop_vec_info
}
/* Try to find SLP reduction chain. */
- if (check_reduction && code != COND_EXPR
+ if (! nested_in_vect_loop
+ && code != COND_EXPR
&& vect_is_slp_reduction (loop_info, phi, def_stmt))
{
if (dump_enabled_p ())
Index: gcc/testsuite/gcc.dg/vect/pr66623.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/pr66623.c (revision 0)
+++ gcc/testsuite/gcc.dg/vect/pr66623.c (working copy)
@@ -0,0 +1,86 @@
+/* { dg-require-effective-target vect_float } */
+
+#include "tree-vect.h"
+
+extern void abort (void);
+
+#define OP *
+#define INIT 1.0
+
+float __attribute__((noinline,noclone))
+foo (float *__restrict__ i)
+{
+ float l = INIT;
+ int a;
+ int b;
+
+ for (a = 0; a < 4; a++)
+ for (b = 0; b < 4; b++)
+ l = l OP i[b];
+
+ return l;
+}
+
+float __attribute__((noinline,noclone))
+foo_ref (float *__restrict__ i)
+{
+ float l = INIT;
+
+ l = l OP i[0];
+ l = l OP i[1];
+ l = l OP i[2];
+ l = l OP i[3];
+
+ l = l OP i[0];
+ l = l OP i[1];
+ l = l OP i[2];
+ l = l OP i[3];
+
+ l = l OP i[0];
+ l = l OP i[1];
+ l = l OP i[2];
+ l = l OP i[3];
+
+ l = l OP i[0];
+ l = l OP i[1];
+ l = l OP i[2];
+ l = l OP i[3];
+
+ return l;
+}
+
+union u
+{
+ float f;
+ unsigned int u;
+};
+
+int
+main (void)
+{
+ union u res, res2;
+ float a[4];
+
+ if (sizeof (float) != sizeof (unsigned int))
+ return 0;
+
+ check_vect ();
+
+ a[0] = 0.01;
+ a[1] = 0.01;
+ a[2] = 0.01;
+ a[3] = 1.0;
+
+ res.f = foo_ref (a);
+
+ res2.f = foo (a);
+
+ if (res.u != res2.u)
+ abort ();
+
+ return 0;
+}
+
+/* need -ffast-math to vectorize this loop. */
+/* ARM NEON passes -ffast-math to these tests, so expect this to fail. */
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail arm_neon_ok } } } */
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] Fix PR66623
2017-06-08 13:49 ` Richard Biener
@ 2017-06-09 15:32 ` Christophe Lyon
2017-06-09 15:48 ` Richard Biener
0 siblings, 1 reply; 6+ messages in thread
From: Christophe Lyon @ 2017-06-09 15:32 UTC (permalink / raw)
To: Richard Biener; +Cc: gcc-patches
On 8 June 2017 at 15:49, Richard Biener <rguenther@suse.de> wrote:
> On Thu, 8 Jun 2017, Richard Biener wrote:
>
>>
>> The following fixes unsafe vectorization of reductions in outer loop
>> vectorization. It combines it with a bit of cleanup in the affected
>> function.
>>
>> Bootstrap and regtest running on x86_64-unknown-linux-gnu.
>
> Re-testing the following variant -- multiple loop-closed PHI nodes
> for the same SSA name happen.
>
> Richard.
>
> 2017-06-08 Richard Biener <rguenther@suse.de>
>
> PR tree-optimization/66623
> * tree-vect-loop.c (vect_is_simple_reduction): Cleanup,
> refactor check_reduction into two parts, properly computing
> whether we have to check reduction validity for outer loop
> vectorization.
>
> * gcc.dg/vect/pr66623.c: New testcase.
>
Hi Richard,
The new testcase fails on armeb* targets:
FAIL: gcc.dg/vect/pr66623.c -flto -ffat-lto-objects execution test
FAIL: gcc.dg/vect/pr66623.c execution test
Christophe
> Index: gcc/tree-vect-loop.c
> ===================================================================
> --- gcc/tree-vect-loop.c (revision 249007)
> +++ gcc/tree-vect-loop.c (working copy)
> @@ -2727,8 +2727,6 @@ vect_is_simple_reduction (loop_vec_info
> {
> struct loop *loop = (gimple_bb (phi))->loop_father;
> struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
> - edge latch_e = loop_latch_edge (loop);
> - tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
> gimple *def_stmt, *def1 = NULL, *def2 = NULL, *phi_use_stmt = NULL;
> enum tree_code orig_code, code;
> tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
> @@ -2742,11 +2740,6 @@ vect_is_simple_reduction (loop_vec_info
> *double_reduc = false;
> *v_reduc_type = TREE_CODE_REDUCTION;
>
> - /* Check validity of the reduction only for the innermost loop. */
> - bool check_reduction = ! flow_loop_nested_p (vect_loop, loop);
> - gcc_assert ((check_reduction && loop == vect_loop)
> - || (!check_reduction && flow_loop_nested_p (vect_loop, loop)));
> -
> name = PHI_RESULT (phi);
> /* ??? If there are no uses of the PHI result the inner loop reduction
> won't be detected as possibly double-reduction by vectorizable_reduction
> @@ -2775,13 +2768,15 @@ vect_is_simple_reduction (loop_vec_info
> {
> if (dump_enabled_p ())
> dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> - "reduction used in loop.\n");
> + "reduction value used in loop.\n");
> return NULL;
> }
>
> phi_use_stmt = use_stmt;
> }
>
> + edge latch_e = loop_latch_edge (loop);
> + tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
> if (TREE_CODE (loop_arg) != SSA_NAME)
> {
> if (dump_enabled_p ())
> @@ -2795,18 +2790,22 @@ vect_is_simple_reduction (loop_vec_info
> }
>
> def_stmt = SSA_NAME_DEF_STMT (loop_arg);
> - if (!def_stmt)
> + if (gimple_nop_p (def_stmt))
> {
> if (dump_enabled_p ())
> dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> - "reduction: no def_stmt.\n");
> + "reduction: no def_stmt\n");
> return NULL;
> }
>
> if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != GIMPLE_PHI)
> {
> if (dump_enabled_p ())
> - dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
> + {
> + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> + "reduction: unhandled reduction operation: ");
> + dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def_stmt, 0);
> + }
> return NULL;
> }
>
> @@ -2822,6 +2821,7 @@ vect_is_simple_reduction (loop_vec_info
> }
>
> nloop_uses = 0;
> + auto_vec<gphi *, 3> lcphis;
> FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
> {
> gimple *use_stmt = USE_STMT (use_p);
> @@ -2829,6 +2829,9 @@ vect_is_simple_reduction (loop_vec_info
> continue;
> if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
> nloop_uses++;
> + else
> + /* We can have more than one loop-closed PHI. */
> + lcphis.safe_push (as_a <gphi *> (use_stmt));
> if (nloop_uses > 1)
> {
> if (dump_enabled_p ())
> @@ -2873,6 +2876,27 @@ vect_is_simple_reduction (loop_vec_info
> return NULL;
> }
>
> + /* If we are vectorizing an inner reduction we are executing that
> + in the original order only in case we are not dealing with a
> + double reduction. */
> + bool check_reduction = true;
> + if (flow_loop_nested_p (vect_loop, loop))
> + {
> + gphi *lcphi;
> + unsigned i;
> + check_reduction = false;
> + FOR_EACH_VEC_ELT (lcphis, i, lcphi)
> + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result (lcphi))
> + {
> + gimple *use_stmt = USE_STMT (use_p);
> + if (is_gimple_debug (use_stmt))
> + continue;
> + if (! flow_bb_inside_loop_p (vect_loop, gimple_bb (use_stmt)))
> + check_reduction = true;
> + }
> + }
> +
> + bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop);
> code = orig_code = gimple_assign_rhs_code (def_stmt);
>
> /* We can handle "res -= x[i]", which is non-associative by
> @@ -2887,27 +2911,8 @@ vect_is_simple_reduction (loop_vec_info
>
> if (code == COND_EXPR)
> {
> - if (check_reduction)
> + if (! nested_in_vect_loop)
> *v_reduc_type = COND_REDUCTION;
> - }
> - else if (!commutative_tree_code (code) || !associative_tree_code (code))
> - {
> - if (dump_enabled_p ())
> - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
> - "reduction: not commutative/associative: ");
> - return NULL;
> - }
> -
> - if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
> - {
> - if (code != COND_EXPR)
> - {
> - if (dump_enabled_p ())
> - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
> - "reduction: not binary operation: ");
> -
> - return NULL;
> - }
>
> op3 = gimple_assign_rhs1 (def_stmt);
> if (COMPARISON_CLASS_P (op3))
> @@ -2918,30 +2923,35 @@ vect_is_simple_reduction (loop_vec_info
>
> op1 = gimple_assign_rhs2 (def_stmt);
> op2 = gimple_assign_rhs3 (def_stmt);
> -
> - if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
> - {
> - if (dump_enabled_p ())
> - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
> - "reduction: uses not ssa_names: ");
> -
> - return NULL;
> - }
> }
> - else
> + else if (!commutative_tree_code (code) || !associative_tree_code (code))
> + {
> + if (dump_enabled_p ())
> + report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
> + "reduction: not commutative/associative: ");
> + return NULL;
> + }
> + else if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
> {
> op1 = gimple_assign_rhs1 (def_stmt);
> op2 = gimple_assign_rhs2 (def_stmt);
> + }
> + else
> + {
> + if (dump_enabled_p ())
> + report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
> + "reduction: not handled operation: ");
> + return NULL;
> + }
>
> - if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
> - {
> - if (dump_enabled_p ())
> - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
> - "reduction: uses not ssa_names: ");
> + if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
> + {
> + if (dump_enabled_p ())
> + report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
> + "reduction: both uses not ssa_names: ");
>
> - return NULL;
> - }
> - }
> + return NULL;
> + }
>
> type = TREE_TYPE (gimple_assign_lhs (def_stmt));
> if ((TREE_CODE (op1) == SSA_NAME
> @@ -3091,7 +3101,7 @@ vect_is_simple_reduction (loop_vec_info
> == vect_internal_def
> && !is_loop_header_bb_p (gimple_bb (def2)))))))
> {
> - if (check_reduction && orig_code != MINUS_EXPR)
> + if (! nested_in_vect_loop && orig_code != MINUS_EXPR)
> {
> /* Check if we can swap operands (just for simplicity - so that
> the rest of the code can assume that the reduction variable
> @@ -3145,7 +3155,8 @@ vect_is_simple_reduction (loop_vec_info
> }
>
> /* Try to find SLP reduction chain. */
> - if (check_reduction && code != COND_EXPR
> + if (! nested_in_vect_loop
> + && code != COND_EXPR
> && vect_is_slp_reduction (loop_info, phi, def_stmt))
> {
> if (dump_enabled_p ())
> Index: gcc/testsuite/gcc.dg/vect/pr66623.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/vect/pr66623.c (revision 0)
> +++ gcc/testsuite/gcc.dg/vect/pr66623.c (working copy)
> @@ -0,0 +1,86 @@
> +/* { dg-require-effective-target vect_float } */
> +
> +#include "tree-vect.h"
> +
> +extern void abort (void);
> +
> +#define OP *
> +#define INIT 1.0
> +
> +float __attribute__((noinline,noclone))
> +foo (float *__restrict__ i)
> +{
> + float l = INIT;
> + int a;
> + int b;
> +
> + for (a = 0; a < 4; a++)
> + for (b = 0; b < 4; b++)
> + l = l OP i[b];
> +
> + return l;
> +}
> +
> +float __attribute__((noinline,noclone))
> +foo_ref (float *__restrict__ i)
> +{
> + float l = INIT;
> +
> + l = l OP i[0];
> + l = l OP i[1];
> + l = l OP i[2];
> + l = l OP i[3];
> +
> + l = l OP i[0];
> + l = l OP i[1];
> + l = l OP i[2];
> + l = l OP i[3];
> +
> + l = l OP i[0];
> + l = l OP i[1];
> + l = l OP i[2];
> + l = l OP i[3];
> +
> + l = l OP i[0];
> + l = l OP i[1];
> + l = l OP i[2];
> + l = l OP i[3];
> +
> + return l;
> +}
> +
> +union u
> +{
> + float f;
> + unsigned int u;
> +};
> +
> +int
> +main (void)
> +{
> + union u res, res2;
> + float a[4];
> +
> + if (sizeof (float) != sizeof (unsigned int))
> + return 0;
> +
> + check_vect ();
> +
> + a[0] = 0.01;
> + a[1] = 0.01;
> + a[2] = 0.01;
> + a[3] = 1.0;
> +
> + res.f = foo_ref (a);
> +
> + res2.f = foo (a);
> +
> + if (res.u != res2.u)
> + abort ();
> +
> + return 0;
> +}
> +
> +/* need -ffast-math to vectorize this loop. */
> +/* ARM NEON passes -ffast-math to these tests, so expect this to fail. */
> +/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail arm_neon_ok } } } */
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] Fix PR66623
2017-06-09 15:32 ` Christophe Lyon
@ 2017-06-09 15:48 ` Richard Biener
2017-06-12 7:37 ` Christophe Lyon
0 siblings, 1 reply; 6+ messages in thread
From: Richard Biener @ 2017-06-09 15:48 UTC (permalink / raw)
To: gcc-patches, Christophe Lyon, Richard Biener; +Cc: gcc-patches
On June 9, 2017 5:32:10 PM GMT+02:00, Christophe Lyon <christophe.lyon@linaro.org> wrote:
>On 8 June 2017 at 15:49, Richard Biener <rguenther@suse.de> wrote:
>> On Thu, 8 Jun 2017, Richard Biener wrote:
>>
>>>
>>> The following fixes unsafe vectorization of reductions in outer loop
>>> vectorization. It combines it with a bit of cleanup in the affected
>>> function.
>>>
>>> Bootstrap and regtest running on x86_64-unknown-linux-gnu.
>>
>> Re-testing the following variant -- multiple loop-closed PHI nodes
>> for the same SSA name happen.
>>
>> Richard.
>>
>> 2017-06-08 Richard Biener <rguenther@suse.de>
>>
>> PR tree-optimization/66623
>> * tree-vect-loop.c (vect_is_simple_reduction): Cleanup,
>> refactor check_reduction into two parts, properly computing
>> whether we have to check reduction validity for outer loop
>> vectorization.
>>
>> * gcc.dg/vect/pr66623.c: New testcase.
>>
>
>Hi Richard,
>
>The new testcase fails on armeb* targets:
>FAIL: gcc.dg/vect/pr66623.c -flto -ffat-lto-objects execution test
>FAIL: gcc.dg/vect/pr66623.c execution test
Ah, the xfail probably isn't enough. Can you try a dg-skip or so?
>Christophe
>
>
>> Index: gcc/tree-vect-loop.c
>> ===================================================================
>> --- gcc/tree-vect-loop.c (revision 249007)
>> +++ gcc/tree-vect-loop.c (working copy)
>> @@ -2727,8 +2727,6 @@ vect_is_simple_reduction (loop_vec_info
>> {
>> struct loop *loop = (gimple_bb (phi))->loop_father;
>> struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
>> - edge latch_e = loop_latch_edge (loop);
>> - tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
>> gimple *def_stmt, *def1 = NULL, *def2 = NULL, *phi_use_stmt =
>NULL;
>> enum tree_code orig_code, code;
>> tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
>> @@ -2742,11 +2740,6 @@ vect_is_simple_reduction (loop_vec_info
>> *double_reduc = false;
>> *v_reduc_type = TREE_CODE_REDUCTION;
>>
>> - /* Check validity of the reduction only for the innermost loop.
>*/
>> - bool check_reduction = ! flow_loop_nested_p (vect_loop, loop);
>> - gcc_assert ((check_reduction && loop == vect_loop)
>> - || (!check_reduction && flow_loop_nested_p (vect_loop,
>loop)));
>> -
>> name = PHI_RESULT (phi);
>> /* ??? If there are no uses of the PHI result the inner loop
>reduction
>> won't be detected as possibly double-reduction by
>vectorizable_reduction
>> @@ -2775,13 +2768,15 @@ vect_is_simple_reduction (loop_vec_info
>> {
>> if (dump_enabled_p ())
>> dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
>> - "reduction used in loop.\n");
>> + "reduction value used in loop.\n");
>> return NULL;
>> }
>>
>> phi_use_stmt = use_stmt;
>> }
>>
>> + edge latch_e = loop_latch_edge (loop);
>> + tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
>> if (TREE_CODE (loop_arg) != SSA_NAME)
>> {
>> if (dump_enabled_p ())
>> @@ -2795,18 +2790,22 @@ vect_is_simple_reduction (loop_vec_info
>> }
>>
>> def_stmt = SSA_NAME_DEF_STMT (loop_arg);
>> - if (!def_stmt)
>> + if (gimple_nop_p (def_stmt))
>> {
>> if (dump_enabled_p ())
>> dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
>> - "reduction: no def_stmt.\n");
>> + "reduction: no def_stmt\n");
>> return NULL;
>> }
>>
>> if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) !=
>GIMPLE_PHI)
>> {
>> if (dump_enabled_p ())
>> - dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
>> + {
>> + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
>> + "reduction: unhandled reduction operation:
>");
>> + dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
>def_stmt, 0);
>> + }
>> return NULL;
>> }
>>
>> @@ -2822,6 +2821,7 @@ vect_is_simple_reduction (loop_vec_info
>> }
>>
>> nloop_uses = 0;
>> + auto_vec<gphi *, 3> lcphis;
>> FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
>> {
>> gimple *use_stmt = USE_STMT (use_p);
>> @@ -2829,6 +2829,9 @@ vect_is_simple_reduction (loop_vec_info
>> continue;
>> if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
>> nloop_uses++;
>> + else
>> + /* We can have more than one loop-closed PHI. */
>> + lcphis.safe_push (as_a <gphi *> (use_stmt));
>> if (nloop_uses > 1)
>> {
>> if (dump_enabled_p ())
>> @@ -2873,6 +2876,27 @@ vect_is_simple_reduction (loop_vec_info
>> return NULL;
>> }
>>
>> + /* If we are vectorizing an inner reduction we are executing that
>> + in the original order only in case we are not dealing with a
>> + double reduction. */
>> + bool check_reduction = true;
>> + if (flow_loop_nested_p (vect_loop, loop))
>> + {
>> + gphi *lcphi;
>> + unsigned i;
>> + check_reduction = false;
>> + FOR_EACH_VEC_ELT (lcphis, i, lcphi)
>> + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result
>(lcphi))
>> + {
>> + gimple *use_stmt = USE_STMT (use_p);
>> + if (is_gimple_debug (use_stmt))
>> + continue;
>> + if (! flow_bb_inside_loop_p (vect_loop, gimple_bb
>(use_stmt)))
>> + check_reduction = true;
>> + }
>> + }
>> +
>> + bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop);
>> code = orig_code = gimple_assign_rhs_code (def_stmt);
>>
>> /* We can handle "res -= x[i]", which is non-associative by
>> @@ -2887,27 +2911,8 @@ vect_is_simple_reduction (loop_vec_info
>>
>> if (code == COND_EXPR)
>> {
>> - if (check_reduction)
>> + if (! nested_in_vect_loop)
>> *v_reduc_type = COND_REDUCTION;
>> - }
>> - else if (!commutative_tree_code (code) || !associative_tree_code
>(code))
>> - {
>> - if (dump_enabled_p ())
>> - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
>> - "reduction: not commutative/associative: ");
>> - return NULL;
>> - }
>> -
>> - if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
>> - {
>> - if (code != COND_EXPR)
>> - {
>> - if (dump_enabled_p ())
>> - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
>> - "reduction: not binary operation: ");
>> -
>> - return NULL;
>> - }
>>
>> op3 = gimple_assign_rhs1 (def_stmt);
>> if (COMPARISON_CLASS_P (op3))
>> @@ -2918,30 +2923,35 @@ vect_is_simple_reduction (loop_vec_info
>>
>> op1 = gimple_assign_rhs2 (def_stmt);
>> op2 = gimple_assign_rhs3 (def_stmt);
>> -
>> - if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) !=
>SSA_NAME)
>> - {
>> - if (dump_enabled_p ())
>> - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
>> - "reduction: uses not ssa_names: ");
>> -
>> - return NULL;
>> - }
>> }
>> - else
>> + else if (!commutative_tree_code (code) || !associative_tree_code
>(code))
>> + {
>> + if (dump_enabled_p ())
>> + report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
>> + "reduction: not commutative/associative: ");
>> + return NULL;
>> + }
>> + else if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
>> {
>> op1 = gimple_assign_rhs1 (def_stmt);
>> op2 = gimple_assign_rhs2 (def_stmt);
>> + }
>> + else
>> + {
>> + if (dump_enabled_p ())
>> + report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
>> + "reduction: not handled operation: ");
>> + return NULL;
>> + }
>>
>> - if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) !=
>SSA_NAME)
>> - {
>> - if (dump_enabled_p ())
>> - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
>> - "reduction: uses not ssa_names: ");
>> + if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
>> + {
>> + if (dump_enabled_p ())
>> + report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
>> + "reduction: both uses not ssa_names: ");
>>
>> - return NULL;
>> - }
>> - }
>> + return NULL;
>> + }
>>
>> type = TREE_TYPE (gimple_assign_lhs (def_stmt));
>> if ((TREE_CODE (op1) == SSA_NAME
>> @@ -3091,7 +3101,7 @@ vect_is_simple_reduction (loop_vec_info
>> == vect_internal_def
>> && !is_loop_header_bb_p (gimple_bb (def2)))))))
>> {
>> - if (check_reduction && orig_code != MINUS_EXPR)
>> + if (! nested_in_vect_loop && orig_code != MINUS_EXPR)
>> {
>> /* Check if we can swap operands (just for simplicity - so
>that
>> the rest of the code can assume that the reduction
>variable
>> @@ -3145,7 +3155,8 @@ vect_is_simple_reduction (loop_vec_info
>> }
>>
>> /* Try to find SLP reduction chain. */
>> - if (check_reduction && code != COND_EXPR
>> + if (! nested_in_vect_loop
>> + && code != COND_EXPR
>> && vect_is_slp_reduction (loop_info, phi, def_stmt))
>> {
>> if (dump_enabled_p ())
>> Index: gcc/testsuite/gcc.dg/vect/pr66623.c
>> ===================================================================
>> --- gcc/testsuite/gcc.dg/vect/pr66623.c (revision 0)
>> +++ gcc/testsuite/gcc.dg/vect/pr66623.c (working copy)
>> @@ -0,0 +1,86 @@
>> +/* { dg-require-effective-target vect_float } */
>> +
>> +#include "tree-vect.h"
>> +
>> +extern void abort (void);
>> +
>> +#define OP *
>> +#define INIT 1.0
>> +
>> +float __attribute__((noinline,noclone))
>> +foo (float *__restrict__ i)
>> +{
>> + float l = INIT;
>> + int a;
>> + int b;
>> +
>> + for (a = 0; a < 4; a++)
>> + for (b = 0; b < 4; b++)
>> + l = l OP i[b];
>> +
>> + return l;
>> +}
>> +
>> +float __attribute__((noinline,noclone))
>> +foo_ref (float *__restrict__ i)
>> +{
>> + float l = INIT;
>> +
>> + l = l OP i[0];
>> + l = l OP i[1];
>> + l = l OP i[2];
>> + l = l OP i[3];
>> +
>> + l = l OP i[0];
>> + l = l OP i[1];
>> + l = l OP i[2];
>> + l = l OP i[3];
>> +
>> + l = l OP i[0];
>> + l = l OP i[1];
>> + l = l OP i[2];
>> + l = l OP i[3];
>> +
>> + l = l OP i[0];
>> + l = l OP i[1];
>> + l = l OP i[2];
>> + l = l OP i[3];
>> +
>> + return l;
>> +}
>> +
>> +union u
>> +{
>> + float f;
>> + unsigned int u;
>> +};
>> +
>> +int
>> +main (void)
>> +{
>> + union u res, res2;
>> + float a[4];
>> +
>> + if (sizeof (float) != sizeof (unsigned int))
>> + return 0;
>> +
>> + check_vect ();
>> +
>> + a[0] = 0.01;
>> + a[1] = 0.01;
>> + a[2] = 0.01;
>> + a[3] = 1.0;
>> +
>> + res.f = foo_ref (a);
>> +
>> + res2.f = foo (a);
>> +
>> + if (res.u != res2.u)
>> + abort ();
>> +
>> + return 0;
>> +}
>> +
>> +/* need -ffast-math to vectorize this loop. */
>> +/* ARM NEON passes -ffast-math to these tests, so expect this to
>fail. */
>> +/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" {
>xfail arm_neon_ok } } } */
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] Fix PR66623
2017-06-09 15:48 ` Richard Biener
@ 2017-06-12 7:37 ` Christophe Lyon
2017-06-12 7:45 ` Richard Biener
0 siblings, 1 reply; 6+ messages in thread
From: Christophe Lyon @ 2017-06-12 7:37 UTC (permalink / raw)
To: Richard Biener; +Cc: gcc-patches, Richard Biener
On 9 June 2017 at 17:48, Richard Biener <richard.guenther@gmail.com> wrote:
> On June 9, 2017 5:32:10 PM GMT+02:00, Christophe Lyon <christophe.lyon@linaro.org> wrote:
>>On 8 June 2017 at 15:49, Richard Biener <rguenther@suse.de> wrote:
>>> On Thu, 8 Jun 2017, Richard Biener wrote:
>>>
>>>>
>>>> The following fixes unsafe vectorization of reductions in outer loop
>>>> vectorization. It combines it with a bit of cleanup in the affected
>>>> function.
>>>>
>>>> Bootstrap and regtest running on x86_64-unknown-linux-gnu.
>>>
>>> Re-testing the following variant -- multiple loop-closed PHI nodes
>>> for the same SSA name happen.
>>>
>>> Richard.
>>>
>>> 2017-06-08 Richard Biener <rguenther@suse.de>
>>>
>>> PR tree-optimization/66623
>>> * tree-vect-loop.c (vect_is_simple_reduction): Cleanup,
>>> refactor check_reduction into two parts, properly computing
>>> whether we have to check reduction validity for outer loop
>>> vectorization.
>>>
>>> * gcc.dg/vect/pr66623.c: New testcase.
>>>
>>
>>Hi Richard,
>>
>>The new testcase fails on armeb* targets:
>>FAIL: gcc.dg/vect/pr66623.c -flto -ffat-lto-objects execution test
>>FAIL: gcc.dg/vect/pr66623.c execution test
>
> Ah, the xfail probably isn't enough. Can you try a dg-skip or so?
>
I can try, but do we really want to ignore/hide the fact that the generated
code in non-functional on arm big-endian? (execution is OK on little-endian)
>>Christophe
>>
>>
>>> Index: gcc/tree-vect-loop.c
>>> ===================================================================
>>> --- gcc/tree-vect-loop.c (revision 249007)
>>> +++ gcc/tree-vect-loop.c (working copy)
>>> @@ -2727,8 +2727,6 @@ vect_is_simple_reduction (loop_vec_info
>>> {
>>> struct loop *loop = (gimple_bb (phi))->loop_father;
>>> struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
>>> - edge latch_e = loop_latch_edge (loop);
>>> - tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
>>> gimple *def_stmt, *def1 = NULL, *def2 = NULL, *phi_use_stmt =
>>NULL;
>>> enum tree_code orig_code, code;
>>> tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
>>> @@ -2742,11 +2740,6 @@ vect_is_simple_reduction (loop_vec_info
>>> *double_reduc = false;
>>> *v_reduc_type = TREE_CODE_REDUCTION;
>>>
>>> - /* Check validity of the reduction only for the innermost loop.
>>*/
>>> - bool check_reduction = ! flow_loop_nested_p (vect_loop, loop);
>>> - gcc_assert ((check_reduction && loop == vect_loop)
>>> - || (!check_reduction && flow_loop_nested_p (vect_loop,
>>loop)));
>>> -
>>> name = PHI_RESULT (phi);
>>> /* ??? If there are no uses of the PHI result the inner loop
>>reduction
>>> won't be detected as possibly double-reduction by
>>vectorizable_reduction
>>> @@ -2775,13 +2768,15 @@ vect_is_simple_reduction (loop_vec_info
>>> {
>>> if (dump_enabled_p ())
>>> dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
>>> - "reduction used in loop.\n");
>>> + "reduction value used in loop.\n");
>>> return NULL;
>>> }
>>>
>>> phi_use_stmt = use_stmt;
>>> }
>>>
>>> + edge latch_e = loop_latch_edge (loop);
>>> + tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
>>> if (TREE_CODE (loop_arg) != SSA_NAME)
>>> {
>>> if (dump_enabled_p ())
>>> @@ -2795,18 +2790,22 @@ vect_is_simple_reduction (loop_vec_info
>>> }
>>>
>>> def_stmt = SSA_NAME_DEF_STMT (loop_arg);
>>> - if (!def_stmt)
>>> + if (gimple_nop_p (def_stmt))
>>> {
>>> if (dump_enabled_p ())
>>> dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
>>> - "reduction: no def_stmt.\n");
>>> + "reduction: no def_stmt\n");
>>> return NULL;
>>> }
>>>
>>> if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) !=
>>GIMPLE_PHI)
>>> {
>>> if (dump_enabled_p ())
>>> - dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
>>> + {
>>> + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
>>> + "reduction: unhandled reduction operation:
>>");
>>> + dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
>>def_stmt, 0);
>>> + }
>>> return NULL;
>>> }
>>>
>>> @@ -2822,6 +2821,7 @@ vect_is_simple_reduction (loop_vec_info
>>> }
>>>
>>> nloop_uses = 0;
>>> + auto_vec<gphi *, 3> lcphis;
>>> FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
>>> {
>>> gimple *use_stmt = USE_STMT (use_p);
>>> @@ -2829,6 +2829,9 @@ vect_is_simple_reduction (loop_vec_info
>>> continue;
>>> if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
>>> nloop_uses++;
>>> + else
>>> + /* We can have more than one loop-closed PHI. */
>>> + lcphis.safe_push (as_a <gphi *> (use_stmt));
>>> if (nloop_uses > 1)
>>> {
>>> if (dump_enabled_p ())
>>> @@ -2873,6 +2876,27 @@ vect_is_simple_reduction (loop_vec_info
>>> return NULL;
>>> }
>>>
>>> + /* If we are vectorizing an inner reduction we are executing that
>>> + in the original order only in case we are not dealing with a
>>> + double reduction. */
>>> + bool check_reduction = true;
>>> + if (flow_loop_nested_p (vect_loop, loop))
>>> + {
>>> + gphi *lcphi;
>>> + unsigned i;
>>> + check_reduction = false;
>>> + FOR_EACH_VEC_ELT (lcphis, i, lcphi)
>>> + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result
>>(lcphi))
>>> + {
>>> + gimple *use_stmt = USE_STMT (use_p);
>>> + if (is_gimple_debug (use_stmt))
>>> + continue;
>>> + if (! flow_bb_inside_loop_p (vect_loop, gimple_bb
>>(use_stmt)))
>>> + check_reduction = true;
>>> + }
>>> + }
>>> +
>>> + bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop);
>>> code = orig_code = gimple_assign_rhs_code (def_stmt);
>>>
>>> /* We can handle "res -= x[i]", which is non-associative by
>>> @@ -2887,27 +2911,8 @@ vect_is_simple_reduction (loop_vec_info
>>>
>>> if (code == COND_EXPR)
>>> {
>>> - if (check_reduction)
>>> + if (! nested_in_vect_loop)
>>> *v_reduc_type = COND_REDUCTION;
>>> - }
>>> - else if (!commutative_tree_code (code) || !associative_tree_code
>>(code))
>>> - {
>>> - if (dump_enabled_p ())
>>> - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
>>> - "reduction: not commutative/associative: ");
>>> - return NULL;
>>> - }
>>> -
>>> - if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
>>> - {
>>> - if (code != COND_EXPR)
>>> - {
>>> - if (dump_enabled_p ())
>>> - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
>>> - "reduction: not binary operation: ");
>>> -
>>> - return NULL;
>>> - }
>>>
>>> op3 = gimple_assign_rhs1 (def_stmt);
>>> if (COMPARISON_CLASS_P (op3))
>>> @@ -2918,30 +2923,35 @@ vect_is_simple_reduction (loop_vec_info
>>>
>>> op1 = gimple_assign_rhs2 (def_stmt);
>>> op2 = gimple_assign_rhs3 (def_stmt);
>>> -
>>> - if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) !=
>>SSA_NAME)
>>> - {
>>> - if (dump_enabled_p ())
>>> - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
>>> - "reduction: uses not ssa_names: ");
>>> -
>>> - return NULL;
>>> - }
>>> }
>>> - else
>>> + else if (!commutative_tree_code (code) || !associative_tree_code
>>(code))
>>> + {
>>> + if (dump_enabled_p ())
>>> + report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
>>> + "reduction: not commutative/associative: ");
>>> + return NULL;
>>> + }
>>> + else if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
>>> {
>>> op1 = gimple_assign_rhs1 (def_stmt);
>>> op2 = gimple_assign_rhs2 (def_stmt);
>>> + }
>>> + else
>>> + {
>>> + if (dump_enabled_p ())
>>> + report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
>>> + "reduction: not handled operation: ");
>>> + return NULL;
>>> + }
>>>
>>> - if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) !=
>>SSA_NAME)
>>> - {
>>> - if (dump_enabled_p ())
>>> - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
>>> - "reduction: uses not ssa_names: ");
>>> + if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
>>> + {
>>> + if (dump_enabled_p ())
>>> + report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
>>> + "reduction: both uses not ssa_names: ");
>>>
>>> - return NULL;
>>> - }
>>> - }
>>> + return NULL;
>>> + }
>>>
>>> type = TREE_TYPE (gimple_assign_lhs (def_stmt));
>>> if ((TREE_CODE (op1) == SSA_NAME
>>> @@ -3091,7 +3101,7 @@ vect_is_simple_reduction (loop_vec_info
>>> == vect_internal_def
>>> && !is_loop_header_bb_p (gimple_bb (def2)))))))
>>> {
>>> - if (check_reduction && orig_code != MINUS_EXPR)
>>> + if (! nested_in_vect_loop && orig_code != MINUS_EXPR)
>>> {
>>> /* Check if we can swap operands (just for simplicity - so
>>that
>>> the rest of the code can assume that the reduction
>>variable
>>> @@ -3145,7 +3155,8 @@ vect_is_simple_reduction (loop_vec_info
>>> }
>>>
>>> /* Try to find SLP reduction chain. */
>>> - if (check_reduction && code != COND_EXPR
>>> + if (! nested_in_vect_loop
>>> + && code != COND_EXPR
>>> && vect_is_slp_reduction (loop_info, phi, def_stmt))
>>> {
>>> if (dump_enabled_p ())
>>> Index: gcc/testsuite/gcc.dg/vect/pr66623.c
>>> ===================================================================
>>> --- gcc/testsuite/gcc.dg/vect/pr66623.c (revision 0)
>>> +++ gcc/testsuite/gcc.dg/vect/pr66623.c (working copy)
>>> @@ -0,0 +1,86 @@
>>> +/* { dg-require-effective-target vect_float } */
>>> +
>>> +#include "tree-vect.h"
>>> +
>>> +extern void abort (void);
>>> +
>>> +#define OP *
>>> +#define INIT 1.0
>>> +
>>> +float __attribute__((noinline,noclone))
>>> +foo (float *__restrict__ i)
>>> +{
>>> + float l = INIT;
>>> + int a;
>>> + int b;
>>> +
>>> + for (a = 0; a < 4; a++)
>>> + for (b = 0; b < 4; b++)
>>> + l = l OP i[b];
>>> +
>>> + return l;
>>> +}
>>> +
>>> +float __attribute__((noinline,noclone))
>>> +foo_ref (float *__restrict__ i)
>>> +{
>>> + float l = INIT;
>>> +
>>> + l = l OP i[0];
>>> + l = l OP i[1];
>>> + l = l OP i[2];
>>> + l = l OP i[3];
>>> +
>>> + l = l OP i[0];
>>> + l = l OP i[1];
>>> + l = l OP i[2];
>>> + l = l OP i[3];
>>> +
>>> + l = l OP i[0];
>>> + l = l OP i[1];
>>> + l = l OP i[2];
>>> + l = l OP i[3];
>>> +
>>> + l = l OP i[0];
>>> + l = l OP i[1];
>>> + l = l OP i[2];
>>> + l = l OP i[3];
>>> +
>>> + return l;
>>> +}
>>> +
>>> +union u
>>> +{
>>> + float f;
>>> + unsigned int u;
>>> +};
>>> +
>>> +int
>>> +main (void)
>>> +{
>>> + union u res, res2;
>>> + float a[4];
>>> +
>>> + if (sizeof (float) != sizeof (unsigned int))
>>> + return 0;
>>> +
>>> + check_vect ();
>>> +
>>> + a[0] = 0.01;
>>> + a[1] = 0.01;
>>> + a[2] = 0.01;
>>> + a[3] = 1.0;
>>> +
>>> + res.f = foo_ref (a);
>>> +
>>> + res2.f = foo (a);
>>> +
>>> + if (res.u != res2.u)
>>> + abort ();
>>> +
>>> + return 0;
>>> +}
>>> +
>>> +/* need -ffast-math to vectorize this loop. */
>>> +/* ARM NEON passes -ffast-math to these tests, so expect this to
>>fail. */
>>> +/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" {
>>xfail arm_neon_ok } } } */
>
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] Fix PR66623
2017-06-12 7:37 ` Christophe Lyon
@ 2017-06-12 7:45 ` Richard Biener
0 siblings, 0 replies; 6+ messages in thread
From: Richard Biener @ 2017-06-12 7:45 UTC (permalink / raw)
To: Christophe Lyon; +Cc: Richard Biener, gcc-patches
On Mon, 12 Jun 2017, Christophe Lyon wrote:
> On 9 June 2017 at 17:48, Richard Biener <richard.guenther@gmail.com> wrote:
> > On June 9, 2017 5:32:10 PM GMT+02:00, Christophe Lyon <christophe.lyon@linaro.org> wrote:
> >>On 8 June 2017 at 15:49, Richard Biener <rguenther@suse.de> wrote:
> >>> On Thu, 8 Jun 2017, Richard Biener wrote:
> >>>
> >>>>
> >>>> The following fixes unsafe vectorization of reductions in outer loop
> >>>> vectorization. It combines it with a bit of cleanup in the affected
> >>>> function.
> >>>>
> >>>> Bootstrap and regtest running on x86_64-unknown-linux-gnu.
> >>>
> >>> Re-testing the following variant -- multiple loop-closed PHI nodes
> >>> for the same SSA name happen.
> >>>
> >>> Richard.
> >>>
> >>> 2017-06-08 Richard Biener <rguenther@suse.de>
> >>>
> >>> PR tree-optimization/66623
> >>> * tree-vect-loop.c (vect_is_simple_reduction): Cleanup,
> >>> refactor check_reduction into two parts, properly computing
> >>> whether we have to check reduction validity for outer loop
> >>> vectorization.
> >>>
> >>> * gcc.dg/vect/pr66623.c: New testcase.
> >>>
> >>
> >>Hi Richard,
> >>
> >>The new testcase fails on armeb* targets:
> >>FAIL: gcc.dg/vect/pr66623.c -flto -ffat-lto-objects execution test
> >>FAIL: gcc.dg/vect/pr66623.c execution test
> >
> > Ah, the xfail probably isn't enough. Can you try a dg-skip or so?
> >
>
> I can try, but do we really want to ignore/hide the fact that the generated
> code in non-functional on arm big-endian? (execution is OK on little-endian)
The code is expected to "miscompile" with -ffast-math and from other
testcases I see that (at least in the vectorizer testsuite) NEON
always enables that. So - can you check if adding
{ dg-additional-options "-fno-fast-math" }
makes the testcase pass? Or investigate somewhat what the actual
issue is? That is, remove the XFAIL (because with dg-do run we
really need to not vectorize this)? I also recall that vectorization
on big-endian on arm is "broken" but that might have changed.
Richard.
>
> >>Christophe
> >>
> >>
> >>> Index: gcc/tree-vect-loop.c
> >>> ===================================================================
> >>> --- gcc/tree-vect-loop.c (revision 249007)
> >>> +++ gcc/tree-vect-loop.c (working copy)
> >>> @@ -2727,8 +2727,6 @@ vect_is_simple_reduction (loop_vec_info
> >>> {
> >>> struct loop *loop = (gimple_bb (phi))->loop_father;
> >>> struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
> >>> - edge latch_e = loop_latch_edge (loop);
> >>> - tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
> >>> gimple *def_stmt, *def1 = NULL, *def2 = NULL, *phi_use_stmt =
> >>NULL;
> >>> enum tree_code orig_code, code;
> >>> tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
> >>> @@ -2742,11 +2740,6 @@ vect_is_simple_reduction (loop_vec_info
> >>> *double_reduc = false;
> >>> *v_reduc_type = TREE_CODE_REDUCTION;
> >>>
> >>> - /* Check validity of the reduction only for the innermost loop.
> >>*/
> >>> - bool check_reduction = ! flow_loop_nested_p (vect_loop, loop);
> >>> - gcc_assert ((check_reduction && loop == vect_loop)
> >>> - || (!check_reduction && flow_loop_nested_p (vect_loop,
> >>loop)));
> >>> -
> >>> name = PHI_RESULT (phi);
> >>> /* ??? If there are no uses of the PHI result the inner loop
> >>reduction
> >>> won't be detected as possibly double-reduction by
> >>vectorizable_reduction
> >>> @@ -2775,13 +2768,15 @@ vect_is_simple_reduction (loop_vec_info
> >>> {
> >>> if (dump_enabled_p ())
> >>> dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> >>> - "reduction used in loop.\n");
> >>> + "reduction value used in loop.\n");
> >>> return NULL;
> >>> }
> >>>
> >>> phi_use_stmt = use_stmt;
> >>> }
> >>>
> >>> + edge latch_e = loop_latch_edge (loop);
> >>> + tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
> >>> if (TREE_CODE (loop_arg) != SSA_NAME)
> >>> {
> >>> if (dump_enabled_p ())
> >>> @@ -2795,18 +2790,22 @@ vect_is_simple_reduction (loop_vec_info
> >>> }
> >>>
> >>> def_stmt = SSA_NAME_DEF_STMT (loop_arg);
> >>> - if (!def_stmt)
> >>> + if (gimple_nop_p (def_stmt))
> >>> {
> >>> if (dump_enabled_p ())
> >>> dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> >>> - "reduction: no def_stmt.\n");
> >>> + "reduction: no def_stmt\n");
> >>> return NULL;
> >>> }
> >>>
> >>> if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) !=
> >>GIMPLE_PHI)
> >>> {
> >>> if (dump_enabled_p ())
> >>> - dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
> >>> + {
> >>> + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> >>> + "reduction: unhandled reduction operation:
> >>");
> >>> + dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
> >>def_stmt, 0);
> >>> + }
> >>> return NULL;
> >>> }
> >>>
> >>> @@ -2822,6 +2821,7 @@ vect_is_simple_reduction (loop_vec_info
> >>> }
> >>>
> >>> nloop_uses = 0;
> >>> + auto_vec<gphi *, 3> lcphis;
> >>> FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
> >>> {
> >>> gimple *use_stmt = USE_STMT (use_p);
> >>> @@ -2829,6 +2829,9 @@ vect_is_simple_reduction (loop_vec_info
> >>> continue;
> >>> if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
> >>> nloop_uses++;
> >>> + else
> >>> + /* We can have more than one loop-closed PHI. */
> >>> + lcphis.safe_push (as_a <gphi *> (use_stmt));
> >>> if (nloop_uses > 1)
> >>> {
> >>> if (dump_enabled_p ())
> >>> @@ -2873,6 +2876,27 @@ vect_is_simple_reduction (loop_vec_info
> >>> return NULL;
> >>> }
> >>>
> >>> + /* If we are vectorizing an inner reduction we are executing that
> >>> + in the original order only in case we are not dealing with a
> >>> + double reduction. */
> >>> + bool check_reduction = true;
> >>> + if (flow_loop_nested_p (vect_loop, loop))
> >>> + {
> >>> + gphi *lcphi;
> >>> + unsigned i;
> >>> + check_reduction = false;
> >>> + FOR_EACH_VEC_ELT (lcphis, i, lcphi)
> >>> + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result
> >>(lcphi))
> >>> + {
> >>> + gimple *use_stmt = USE_STMT (use_p);
> >>> + if (is_gimple_debug (use_stmt))
> >>> + continue;
> >>> + if (! flow_bb_inside_loop_p (vect_loop, gimple_bb
> >>(use_stmt)))
> >>> + check_reduction = true;
> >>> + }
> >>> + }
> >>> +
> >>> + bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop);
> >>> code = orig_code = gimple_assign_rhs_code (def_stmt);
> >>>
> >>> /* We can handle "res -= x[i]", which is non-associative by
> >>> @@ -2887,27 +2911,8 @@ vect_is_simple_reduction (loop_vec_info
> >>>
> >>> if (code == COND_EXPR)
> >>> {
> >>> - if (check_reduction)
> >>> + if (! nested_in_vect_loop)
> >>> *v_reduc_type = COND_REDUCTION;
> >>> - }
> >>> - else if (!commutative_tree_code (code) || !associative_tree_code
> >>(code))
> >>> - {
> >>> - if (dump_enabled_p ())
> >>> - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
> >>> - "reduction: not commutative/associative: ");
> >>> - return NULL;
> >>> - }
> >>> -
> >>> - if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
> >>> - {
> >>> - if (code != COND_EXPR)
> >>> - {
> >>> - if (dump_enabled_p ())
> >>> - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
> >>> - "reduction: not binary operation: ");
> >>> -
> >>> - return NULL;
> >>> - }
> >>>
> >>> op3 = gimple_assign_rhs1 (def_stmt);
> >>> if (COMPARISON_CLASS_P (op3))
> >>> @@ -2918,30 +2923,35 @@ vect_is_simple_reduction (loop_vec_info
> >>>
> >>> op1 = gimple_assign_rhs2 (def_stmt);
> >>> op2 = gimple_assign_rhs3 (def_stmt);
> >>> -
> >>> - if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) !=
> >>SSA_NAME)
> >>> - {
> >>> - if (dump_enabled_p ())
> >>> - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
> >>> - "reduction: uses not ssa_names: ");
> >>> -
> >>> - return NULL;
> >>> - }
> >>> }
> >>> - else
> >>> + else if (!commutative_tree_code (code) || !associative_tree_code
> >>(code))
> >>> + {
> >>> + if (dump_enabled_p ())
> >>> + report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
> >>> + "reduction: not commutative/associative: ");
> >>> + return NULL;
> >>> + }
> >>> + else if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
> >>> {
> >>> op1 = gimple_assign_rhs1 (def_stmt);
> >>> op2 = gimple_assign_rhs2 (def_stmt);
> >>> + }
> >>> + else
> >>> + {
> >>> + if (dump_enabled_p ())
> >>> + report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
> >>> + "reduction: not handled operation: ");
> >>> + return NULL;
> >>> + }
> >>>
> >>> - if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) !=
> >>SSA_NAME)
> >>> - {
> >>> - if (dump_enabled_p ())
> >>> - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
> >>> - "reduction: uses not ssa_names: ");
> >>> + if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
> >>> + {
> >>> + if (dump_enabled_p ())
> >>> + report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
> >>> + "reduction: both uses not ssa_names: ");
> >>>
> >>> - return NULL;
> >>> - }
> >>> - }
> >>> + return NULL;
> >>> + }
> >>>
> >>> type = TREE_TYPE (gimple_assign_lhs (def_stmt));
> >>> if ((TREE_CODE (op1) == SSA_NAME
> >>> @@ -3091,7 +3101,7 @@ vect_is_simple_reduction (loop_vec_info
> >>> == vect_internal_def
> >>> && !is_loop_header_bb_p (gimple_bb (def2)))))))
> >>> {
> >>> - if (check_reduction && orig_code != MINUS_EXPR)
> >>> + if (! nested_in_vect_loop && orig_code != MINUS_EXPR)
> >>> {
> >>> /* Check if we can swap operands (just for simplicity - so
> >>that
> >>> the rest of the code can assume that the reduction
> >>variable
> >>> @@ -3145,7 +3155,8 @@ vect_is_simple_reduction (loop_vec_info
> >>> }
> >>>
> >>> /* Try to find SLP reduction chain. */
> >>> - if (check_reduction && code != COND_EXPR
> >>> + if (! nested_in_vect_loop
> >>> + && code != COND_EXPR
> >>> && vect_is_slp_reduction (loop_info, phi, def_stmt))
> >>> {
> >>> if (dump_enabled_p ())
> >>> Index: gcc/testsuite/gcc.dg/vect/pr66623.c
> >>> ===================================================================
> >>> --- gcc/testsuite/gcc.dg/vect/pr66623.c (revision 0)
> >>> +++ gcc/testsuite/gcc.dg/vect/pr66623.c (working copy)
> >>> @@ -0,0 +1,86 @@
> >>> +/* { dg-require-effective-target vect_float } */
> >>> +
> >>> +#include "tree-vect.h"
> >>> +
> >>> +extern void abort (void);
> >>> +
> >>> +#define OP *
> >>> +#define INIT 1.0
> >>> +
> >>> +float __attribute__((noinline,noclone))
> >>> +foo (float *__restrict__ i)
> >>> +{
> >>> + float l = INIT;
> >>> + int a;
> >>> + int b;
> >>> +
> >>> + for (a = 0; a < 4; a++)
> >>> + for (b = 0; b < 4; b++)
> >>> + l = l OP i[b];
> >>> +
> >>> + return l;
> >>> +}
> >>> +
> >>> +float __attribute__((noinline,noclone))
> >>> +foo_ref (float *__restrict__ i)
> >>> +{
> >>> + float l = INIT;
> >>> +
> >>> + l = l OP i[0];
> >>> + l = l OP i[1];
> >>> + l = l OP i[2];
> >>> + l = l OP i[3];
> >>> +
> >>> + l = l OP i[0];
> >>> + l = l OP i[1];
> >>> + l = l OP i[2];
> >>> + l = l OP i[3];
> >>> +
> >>> + l = l OP i[0];
> >>> + l = l OP i[1];
> >>> + l = l OP i[2];
> >>> + l = l OP i[3];
> >>> +
> >>> + l = l OP i[0];
> >>> + l = l OP i[1];
> >>> + l = l OP i[2];
> >>> + l = l OP i[3];
> >>> +
> >>> + return l;
> >>> +}
> >>> +
> >>> +union u
> >>> +{
> >>> + float f;
> >>> + unsigned int u;
> >>> +};
> >>> +
> >>> +int
> >>> +main (void)
> >>> +{
> >>> + union u res, res2;
> >>> + float a[4];
> >>> +
> >>> + if (sizeof (float) != sizeof (unsigned int))
> >>> + return 0;
> >>> +
> >>> + check_vect ();
> >>> +
> >>> + a[0] = 0.01;
> >>> + a[1] = 0.01;
> >>> + a[2] = 0.01;
> >>> + a[3] = 1.0;
> >>> +
> >>> + res.f = foo_ref (a);
> >>> +
> >>> + res2.f = foo (a);
> >>> +
> >>> + if (res.u != res2.u)
> >>> + abort ();
> >>> +
> >>> + return 0;
> >>> +}
> >>> +
> >>> +/* need -ffast-math to vectorize this loop. */
> >>> +/* ARM NEON passes -ffast-math to these tests, so expect this to
> >>fail. */
> >>> +/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" {
> >>xfail arm_neon_ok } } } */
> >
>
>
--
Richard Biener <rguenther@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2017-06-12 7:45 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-06-08 9:18 [PATCH] Fix PR66623 Richard Biener
2017-06-08 13:49 ` Richard Biener
2017-06-09 15:32 ` Christophe Lyon
2017-06-09 15:48 ` Richard Biener
2017-06-12 7:37 ` Christophe Lyon
2017-06-12 7:45 ` Richard Biener
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).