* [PATCH] Fix PR67790
@ 2015-11-18 8:27 Richard Biener
0 siblings, 0 replies; only message in thread
From: Richard Biener @ 2015-11-18 8:27 UTC (permalink / raw)
To: gcc-patches
This PR exposes an issue with how we deal with vectorization of
reductions with minus (x -= ...). We do that by re-writing the
IL to a plus and a new negate stmt. Unfortunately that messes
up UIDs of stmts in the loop which are used for dominance
checks by the vectorizer.
I've tried using a pattern for this but this fails miserably
(requiring some larger re-org of reduction detection).
Instead the much simpler fix is to not do the IL rewriting
but just fix up the reduction epilogue to use a plus when
the reduction code is a minus.
Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.
Richard.
2015-11-18 Richard Biener <rguenther@suse.de>
PR tree-optimization/67790
* tree-vect-loop.c (vect_is_simple_reduction_1): Remove
IL rewrite for MINUS_EXPR reductions, rename back to ...
(vect_is_simple_reduction): ... this, removing the wrapper.
(vect_force_simple_reduction): Adjust.
(vectorizable_reduction): Adjust reduc_index for MINUS_EXPR
reductions and make use if reduc_index in all places. For
the final reduction of MINUS_EXPR use PLUS_EXPR.
* gcc.dg/vect/pr67790.c: New testcase.
Index: gcc/tree-vect-loop.c
===================================================================
*** gcc/tree-vect-loop.c (revision 230455)
--- gcc/tree-vect-loop.c (working copy)
*************** vect_is_slp_reduction (loop_vec_info loo
*** 2468,2483 ****
if (a[i] < val)
ret_val = a[i];
- If MODIFY is true it tries also to rework the code in-place to enable
- detection of more reduction patterns. For the time being we rewrite
- "res -= RHS" into "rhs += -RHS" when it seems worthwhile.
*/
static gimple *
! vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple *phi,
! bool check_reduction, bool *double_reduc,
! bool modify, bool need_wrapping_integral_overflow,
! enum vect_reduction_type *v_reduc_type)
{
struct loop *loop = (gimple_bb (phi))->loop_father;
struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
--- 2477,2489 ----
if (a[i] < val)
ret_val = a[i];
*/
static gimple *
! vect_is_simple_reduction (loop_vec_info loop_info, gimple *phi,
! bool check_reduction, bool *double_reduc,
! bool need_wrapping_integral_overflow,
! enum vect_reduction_type *v_reduc_type)
{
struct loop *loop = (gimple_bb (phi))->loop_father;
struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
*************** vect_is_simple_reduction_1 (loop_vec_inf
*** 2634,2640 ****
gimple instruction for the first simple tests and only do this
if we're allowed to change code at all. */
if (code == MINUS_EXPR
- && modify
&& (op1 = gimple_assign_rhs1 (def_stmt))
&& TREE_CODE (op1) == SSA_NAME
&& SSA_NAME_DEF_STMT (op1) == phi)
--- 2640,2645 ----
*************** vect_is_simple_reduction_1 (loop_vec_inf
*** 2791,2813 ****
}
}
- /* If we detected "res -= x[i]" earlier, rewrite it into
- "res += -x[i]" now. If this turns out to be useless reassoc
- will clean it up again. */
- if (orig_code == MINUS_EXPR)
- {
- tree rhs = gimple_assign_rhs2 (def_stmt);
- tree negrhs = make_ssa_name (TREE_TYPE (rhs));
- gimple *negate_stmt = gimple_build_assign (negrhs, NEGATE_EXPR, rhs);
- gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
- set_vinfo_for_stmt (negate_stmt, new_stmt_vec_info (negate_stmt,
- loop_info));
- gsi_insert_before (&gsi, negate_stmt, GSI_NEW_STMT);
- gimple_assign_set_rhs2 (def_stmt, negrhs);
- gimple_assign_set_rhs_code (def_stmt, PLUS_EXPR);
- update_stmt (def_stmt);
- }
-
/* Reduction is safe. We're dealing with one of the following:
1) integer arithmetic and no trapv
2) floating point arithmetic, and special flags permit this optimization
--- 2796,2801 ----
*************** vect_is_simple_reduction_1 (loop_vec_inf
*** 2863,2869 ****
== vect_internal_def
&& !is_loop_header_bb_p (gimple_bb (def2)))))))
{
! if (check_reduction)
{
if (code == COND_EXPR)
{
--- 2851,2858 ----
== vect_internal_def
&& !is_loop_header_bb_p (gimple_bb (def2)))))))
{
! if (check_reduction
! && orig_code != MINUS_EXPR)
{
if (code == COND_EXPR)
{
*************** vect_is_simple_reduction_1 (loop_vec_inf
*** 2915,2935 ****
return NULL;
}
- /* Wrapper around vect_is_simple_reduction_1, that won't modify code
- in-place. Arguments as there. */
-
- static gimple *
- vect_is_simple_reduction (loop_vec_info loop_info, gimple *phi,
- bool check_reduction, bool *double_reduc,
- bool need_wrapping_integral_overflow,
- enum vect_reduction_type *v_reduc_type)
- {
- return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
- double_reduc, false,
- need_wrapping_integral_overflow,
- v_reduc_type);
- }
-
/* Wrapper around vect_is_simple_reduction_1, which will modify code
in-place if it enables detection of more reductions. Arguments
as there. */
--- 2904,2909 ----
*************** vect_force_simple_reduction (loop_vec_in
*** 2940,2949 ****
bool need_wrapping_integral_overflow)
{
enum vect_reduction_type v_reduc_type;
! return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
! double_reduc, true,
! need_wrapping_integral_overflow,
! &v_reduc_type);
}
/* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times. */
--- 2914,2923 ----
bool need_wrapping_integral_overflow)
{
enum vect_reduction_type v_reduc_type;
! return vect_is_simple_reduction (loop_info, phi, check_reduction,
! double_reduc,
! need_wrapping_integral_overflow,
! &v_reduc_type);
}
/* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times. */
*************** vectorizable_reduction (gimple *stmt, gi
*** 5398,5403 ****
--- 5372,5379 ----
}
/* The default is that the reduction variable is the last in statement. */
int reduc_index = op_type - 1;
+ if (code == MINUS_EXPR)
+ reduc_index = 0;
if (code == COND_EXPR && slp_node)
return false;
*************** vectorizable_reduction (gimple *stmt, gi
*** 5417,5424 ****
The last use is the reduction variable. In case of nested cycle this
assumption is not true: we use reduc_index to record the index of the
reduction variable. */
! for (i = 0; i < op_type - 1; i++)
{
/* The condition of COND_EXPR is checked in vectorizable_condition(). */
if (i == 0 && code == COND_EXPR)
continue;
--- 5393,5403 ----
The last use is the reduction variable. In case of nested cycle this
assumption is not true: we use reduc_index to record the index of the
reduction variable. */
! for (i = 0; i < op_type; i++)
{
+ if (i == reduc_index)
+ continue;
+
/* The condition of COND_EXPR is checked in vectorizable_condition(). */
if (i == 0 && code == COND_EXPR)
continue;
*************** vectorizable_reduction (gimple *stmt, gi
*** 5454,5460 ****
}
}
! is_simple_use = vect_is_simple_use (ops[i], loop_vinfo, &def_stmt, &dt, &tem);
if (!vectype_in)
vectype_in = tem;
gcc_assert (is_simple_use);
--- 5433,5440 ----
}
}
! is_simple_use = vect_is_simple_use (ops[reduc_index], loop_vinfo,
! &def_stmt, &dt, &tem);
if (!vectype_in)
vectype_in = tem;
gcc_assert (is_simple_use);
*************** vectorizable_reduction (gimple *stmt, gi
*** 5625,5630 ****
--- 5605,5613 ----
the vector code inside the loop can be used for the epilog code. */
orig_code = code;
+ if (code == MINUS_EXPR)
+ orig_code = PLUS_EXPR;
+
/* For simple condition reductions, replace with the actual expression
we want to base our reduction around. */
if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
Index: gcc/testsuite/gcc.dg/vect/pr67790.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/pr67790.c (revision 0)
--- gcc/testsuite/gcc.dg/vect/pr67790.c (working copy)
***************
*** 0 ****
--- 1,40 ----
+ /* { dg-require-effective-target vect_int } */
+
+ #include "tree-vect.h"
+
+ struct {
+ int x_advance;
+ int y_advance;
+ } a[256];
+
+ int b, c;
+
+ void __attribute__((noinline,noclone)) fn1()
+ {
+ for (int i = 0; i < 256; i++)
+ {
+ c -= a[i].x_advance;
+ b -= a[i].y_advance;
+ }
+ }
+
+ int main()
+ {
+ check_vect ();
+
+ for (int i = 0; i < 256; ++i)
+ {
+ a[i].x_advance = i;
+ a[i].y_advance = -i + 3;
+ __asm__ volatile ("" : : : "memory");
+ }
+
+ fn1();
+
+ if (c != -32640 || b != 31872)
+ abort ();
+
+ return 0;
+ }
+
+ /* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2015-11-18 8:27 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-11-18 8:27 [PATCH] Fix PR67790 Richard Biener
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).