* [RFC][GCC][Vect] Add support for minmax + index pattern.
@ 2021-03-12 16:20 Joel Hutton
0 siblings, 0 replies; only message in thread
From: Joel Hutton @ 2021-03-12 16:20 UTC (permalink / raw)
To: gcc
[-- Attachment #1: Type: text/plain, Size: 797 bytes --]
Hi all,
Some community members have shown interest in a patch I made a while back but didn't submit.
I'm not looking to commit this at the moment, just to make it available (hence why I haven't sent it to gcc-patches)>
This patch was based on master at the following commit and doesn't currently apply to the head of master:
90f7636bf8df50940e0f749af60a6b374a8f09b4 libstdc++: Make C++17 ignore --disable-libstdcxx-filesystem-ts [PR 94681]
This patch adds support to vectorize the following snippet to find the max and corresponding index from an array:
void foo (int *data, int n) {
int best_i=0, best = 0;
for (int i = 0; i < n; i++) {
if (data[i] > best) {
best = data[i];
best_i = i;
}
}
data[best_i] = data[0];
data[0] = best;
}
[-- Attachment #2: minmax.patch --]
[-- Type: application/octet-stream, Size: 14649 bytes --]
diff --git a/gcc/testsuite/gcc.dg/vect/vect-127.c b/gcc/testsuite/gcc.dg/vect/vect-127.c
new file mode 100644
index 0000000000000000000000000000000000000000..fc1fb6eb7bc44fdcced4e6a904e998cce845b8dc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-127.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-O3 -fdump-tree-vect-all -fno-vect-cost-model -fdisable-tree-cunroll" } */
+#include "tree-vect.h"
+#define ARR_LEN(A) (sizeof (A)/sizeof(A[0]))
+
+void max_to_front (int *data, int n) {
+ int best_i=0, best = 0;
+
+ for (int i = 0; i < n; i++) {
+ if (data[i] > best) {
+ best = data[i];
+ best_i = i;
+ }
+ }
+ data[best_i] = data[0];
+ data[0] = best;
+}
+
+void lastmax_to_front (int *data, int n) {
+ int best_i=0, best = 0;
+
+ for (int i = 0; i < n; i++) {
+ if (data[i] >= best) {
+ best = data[i];
+ best_i = i;
+ }
+ }
+ data[best_i] = data[0];
+ data[0] = best;
+}
+int main ()
+{
+ int a[] = {0,1,0,1,3,1,5,16,4,4,1,16,3,1};
+ max_to_front (a, ARR_LEN (a));
+ if (a[0] != 16 || a[7] != 0)
+ abort ();
+
+ int b[] = {15,0,0,0,0,0,0,16,0,15,0,0,0,0};
+ max_to_front (b, ARR_LEN (b));
+ if (b[0] != 16 || b[7] != 15)
+ abort ();
+
+ int c[] = {15,0,0,0,0,0,0,16,0,16,0,0,0,0};
+ lastmax_to_front (c, ARR_LEN (c));
+ if (c[0] != 16 || c[9] != 15)
+ abort ();
+}
+
+/* { dg-final { scan-tree-dump-times "Found minmax index pattern.*" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Reduce minmax index pattern.*" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-128.c b/gcc/testsuite/gcc.dg/vect/vect-128.c
new file mode 100644
index 0000000000000000000000000000000000000000..a8f403433baec5b835213fa222cd453c8c85be37
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-128.c
@@ -0,0 +1,51 @@
+#include <limits.h>
+/* { dg-do run } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-O3 -fdump-tree-vect-all -fno-vect-cost-model -fdisable-tree-cunroll" } */
+#include "tree-vect.h"
+#define ARR_LEN(A) (sizeof (A)/sizeof(A[0]))
+
+
+/* This is vectorized. */
+void min_to_front (int *data, int n) {
+ int best_i=0, best = INT_MAX;
+
+ for (int i = 0; i < n; i++) {
+ if (data[i] < best) {
+ best = data[i];
+ best_i = i;
+ }
+ }
+ data[best_i] = data[0];
+ data[0] = best;
+}
+
+/* This is not vectorized. */
+void lastmin_to_front (int *data, int n) {
+ int best_i=0, best = INT_MAX;
+
+ for (int i = 0; i < n; i++) {
+ if (data[i] <= best) {
+ best = data[i];
+ best_i = i;
+ }
+ }
+ data[best_i] = data[0];
+ data[0] = best;
+}
+
+int main ()
+{
+ int d[] = {15,-3,-2,-2,-3,-1,0,16,0,16,0,0,0,-3};
+ min_to_front (d, ARR_LEN (d));
+ if (d[0] != -3 || d[1] != 15)
+ abort ();
+
+ int e[] = {15,-3,-2,-2,-3,-1,0,16,0,16,0,0,0,-3};
+ lastmin_to_front (e, ARR_LEN (e));
+ if (e[0] != -3 || e[13] != 15)
+ abort ();
+}
+
+/* { dg-final { scan-tree-dump-times "Found minmax index pattern.*" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Reduce minmax index pattern.*" 1 "vect" } } */
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index dba230f63204911b11fb70095ed50edc9f90db8e..80db136f11503fceaf2c633b5a2c8403db74e5b5 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -156,6 +156,101 @@ static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *);
static stmt_vec_info vect_is_simple_reduction (loop_vec_info, stmt_vec_info,
bool *, bool *);
+static bool
+vect_reassociating_reduction_simple_p (stmt_vec_info stmt_info,
+ loop_vec_info loop_info,
+ tree_code code,
+ tree *op0_out, tree *op1_out)
+{
+ gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
+ if (!assign || gimple_assign_rhs_code (assign) != code)
+ return false;
+
+ /* We don't allow changing the order of the computation in the inner-loop
+ when doing outer-loop vectorization. */
+ class loop *loop = LOOP_VINFO_LOOP (loop_info);
+ if (loop && nested_in_vect_loop_p (loop, stmt_info))
+ return false;
+
+ *op0_out = gimple_assign_rhs1 (assign);
+ *op1_out = gimple_assign_rhs2 (assign);
+ return true;
+}
+
+static bool
+vect_recog_minmax_index_pattern (stmt_vec_info stmt_vinfo,
+ loop_vec_info loop_vinfo)
+{
+ tree oprnd0, oprnd1;
+ gimple *stmt = stmt_vinfo->stmt;
+ gimple *use_stmt;
+ use_operand_p use_p;
+ imm_use_iterator iter;
+
+ if (!(vect_reassociating_reduction_simple_p (stmt_vinfo, loop_vinfo,
+ MAX_EXPR,
+ &oprnd0, &oprnd1)
+ || vect_reassociating_reduction_simple_p (stmt_vinfo, loop_vinfo,
+ MIN_EXPR,
+ &oprnd0, &oprnd1)))
+ return false;
+
+ if (!((TREE_CODE (oprnd0) == SSA_NAME)
+ && (TREE_CODE (oprnd1) == SSA_NAME)
+ && (is_a <gphi *> (SSA_NAME_DEF_STMT (oprnd1))
+ || is_a <gphi *> (SSA_NAME_DEF_STMT (oprnd0)))))
+ return false;
+
+
+ tree_code tc_stmt = gimple_assign_rhs_code (stmt);
+
+ /* Starting from LAST_STMT, follow the defs of its uses in search
+ of the above pattern. */
+ FOR_EACH_IMM_USE_FAST (use_p, iter, oprnd1)
+ {
+ tree_code tc_us;
+ tree_code tc_us_cond;
+ use_stmt = USE_STMT (use_p);
+ if (stmt->bb != use_stmt->bb)
+ continue;
+ if (!is_gimple_assign (use_stmt))
+ continue;
+ tc_us = gimple_assign_rhs_code (use_stmt);
+ if (tc_us != COND_EXPR)
+ continue;
+ tree us_op0 = TREE_OPERAND (gimple_assign_rhs1 (use_stmt), 0);
+ tree us_op1 = TREE_OPERAND (gimple_assign_rhs1 (use_stmt), 1);
+ if (!((us_op0 == oprnd0 && us_op1 == oprnd1)
+ ||(us_op0 == oprnd1 && us_op1 == oprnd0)))
+ continue;
+ tree us_cond = gimple_assign_rhs1 (use_stmt);
+ tc_us_cond = TREE_CODE (us_cond);
+ tree minmax_type = TREE_TYPE (gimple_assign_lhs (stmt_vinfo->stmt));
+ tree us_type = TREE_TYPE (gimple_assign_lhs (use_stmt));
+ if (minmax_type != us_type)
+ continue;
+ if ((tc_stmt == MAX_EXPR
+ &&(tc_us_cond == GT_EXPR || tc_us_cond == LE_EXPR))
+ || (tc_stmt == MIN_EXPR
+ &&(tc_us_cond == LT_EXPR || tc_us_cond == GE_EXPR)))
+ {
+ stmt_vec_info ind_stmt = loop_vinfo->lookup_stmt (use_stmt);
+ ind_stmt->reduc_multi_use_related_stmt = stmt_vinfo;
+ stmt_vinfo->reduc_multi_use_related_stmt = ind_stmt;
+ ind_stmt->is_minmax_index = true;
+ stmt_vinfo->is_minmax = true;
+ if (dump_enabled_p ())
+ {
+ dump_printf (MSG_NOTE, "Found minmax index pattern:\n %G%G",
+ stmt_vinfo->stmt, ind_stmt->stmt);
+ }
+ return true;
+ }
+ }
+
+ return false;
+}
+
/* Subroutine of vect_determine_vf_for_stmt that handles only one
statement. VECTYPE_MAYBE_SET_P is true if STMT_VINFO_VECTYPE
may already be set for general statements (not just data refs). */
@@ -3068,7 +3163,8 @@ needs_fold_left_reduction_p (tree type, tree_code code)
static bool
check_reduction_path (dump_user_location_t loc, loop_p loop, gphi *phi,
tree loop_arg, enum tree_code *code,
- vec<std::pair<ssa_op_iter, use_operand_p> > &path)
+ vec<std::pair<ssa_op_iter, use_operand_p> > &path,
+ bool supported_multi_use_reduction)
{
auto_bitmap visited;
tree lookfor = PHI_RESULT (phi);
@@ -3176,7 +3272,7 @@ pop:
else
cnt++;
}
- if (cnt != 1)
+ if (cnt != 1 && !supported_multi_use_reduction)
{
fail = true;
break;
@@ -3194,10 +3290,10 @@ pop:
TREE_TYPE (gimple_assign_rhs1 (use_stmt))))
;
else if (*code == ERROR_MARK)
- {
+ {
*code = use_code;
sign = TYPE_SIGN (TREE_TYPE (gimple_assign_lhs (use_stmt)));
- }
+ }
else if (use_code != *code)
{
fail = true;
@@ -3220,7 +3316,7 @@ check_reduction_path (dump_user_location_t loc, loop_p loop, gphi *phi,
{
auto_vec<std::pair<ssa_op_iter, use_operand_p> > path;
enum tree_code code_;
- return (check_reduction_path (loc, loop, phi, loop_arg, &code_, path)
+ return (check_reduction_path (loc, loop, phi, loop_arg, &code_, path, false)
&& code_ == code);
}
@@ -3359,10 +3455,14 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
return def_stmt_info;
}
+ bool supported_multi_use_reduction =
+ vect_recog_minmax_index_pattern (def_stmt_info, loop_info);
+
/* If this isn't a nested cycle or if the nested cycle reduction value
is used ouside of the inner loop we cannot handle uses of the reduction
value. */
- if (nlatch_def_loop_uses > 1 || nphi_def_loop_uses > 1)
+ if (!(supported_multi_use_reduction)
+ && (nlatch_def_loop_uses > 1 || nphi_def_loop_uses > 1))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -3409,7 +3509,7 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
auto_vec<std::pair<ssa_op_iter, use_operand_p> > path;
enum tree_code code;
if (check_reduction_path (vect_location, loop, phi, latch_def, &code,
- path))
+ path, supported_multi_use_reduction))
{
STMT_VINFO_REDUC_CODE (phi_info) = code;
if (code == COND_EXPR && !nested_in_vect_loop)
@@ -5062,7 +5162,120 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
else
new_phi_result = PHI_RESULT (new_phis[0]);
- if (STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION
+ if (stmt_info->is_minmax_index || stmt_info->is_minmax)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Reduce minmax index pattern\n");
+
+ stmt_vec_info minmax;
+ stmt_vec_info minmax_i;
+
+ if (stmt_info->is_minmax_index)
+ {
+ minmax_i = stmt_info;
+ minmax = stmt_info->reduc_multi_use_related_stmt;
+ if (induc_val != NULL_TREE)
+ minmax_i->induc_val = induc_val;
+ if (initial_def != NULL_TREE)
+ minmax_i->initial_def = initial_def;
+ }
+ else
+ {
+ minmax = stmt_info;
+ minmax_i = stmt_info->reduc_multi_use_related_stmt;
+ }
+
+ if (!stmt_info->epilog_finished
+ && minmax->vec_stmts.length() != 0
+ && minmax_i->vec_stmts.length() != 0)
+ {
+ gimple* minmax_accum_stmt = minmax->vec_stmts[0];
+ gimple* minmax_i_accum_stmt = minmax_i->vec_stmts[0];
+
+ tree minmax_accum_lhs = gimple_assign_lhs (minmax_accum_stmt);
+ tree minmax_i_accum_lhs = gimple_assign_lhs (minmax_i_accum_stmt);
+ tree scalar_type = TREE_TYPE (vectype);
+
+ /* Reduce the min/max accumulator. */
+ if (gimple_assign_rhs_code (minmax->stmt) == MAX_EXPR)
+ reduc_fn = IFN_REDUC_MAX;
+ else
+ reduc_fn = IFN_REDUC_MIN;
+ if (minmax->scalar_result == NULL)
+ minmax->scalar_result = make_ssa_name (scalar_type);
+ gimple* maxreducstmt = gimple_build_call_internal (reduc_fn, 1,
+ minmax_accum_lhs);
+ gimple_call_set_lhs (maxreducstmt, minmax->scalar_result);
+ gsi_insert_before (&exit_gsi, maxreducstmt, GSI_NEW_STMT);
+
+ /* Create a vector of {maxval,maxval...}. */
+ tree maxval = minmax->scalar_result;
+ tree maxvec = make_ssa_name (vectype);
+ tree maxvec_rhs = build_vector_from_val (vectype, maxval);
+ gimple* maxvec_decl = gimple_build_assign (maxvec, maxvec_rhs);
+ gsi_insert_after (&exit_gsi, maxvec_decl, GSI_NEW_STMT);
+
+ /* Create a vector of {INT_MAX,INT_MAX,INT_MAX}. */
+ tree intmax = TYPE_MAX_VALUE (TREE_TYPE (vectype));
+ tree intmaxvec = build_vector_from_val (vectype, intmax);
+
+ /* Select the correct index from the index accumulator vector, using
+ the position in the minmax accumulator vector. Mask out the
+ incorrect indexes with INT_MAX, to allow for a MIN reduction. */
+ tree truthvectype = truth_type_for (vectype);
+ tree cond = build2 (EQ_EXPR, truthvectype, minmax_accum_lhs, maxvec);
+ tree cond_lhs = make_ssa_name(truthvectype);
+ gimple* cond_decl = gimple_build_assign (cond_lhs, cond);
+ gsi_insert_after (&exit_gsi, cond_decl, GSI_NEW_STMT);
+ tree mask_rhs = build3 (VEC_COND_EXPR, truthvectype, cond_lhs,
+ minmax_i_accum_lhs, intmaxvec);
+ tree masked_ind = make_ssa_name (vectype);
+ gimple* masked_ind_stmt = gimple_build_assign (masked_ind, mask_rhs);
+ gsi_insert_after (&exit_gsi, masked_ind_stmt, GSI_NEW_STMT);
+
+ /* Reduce the index vector to a scalar result. */
+ tree index_scalar_reduc;
+ if (minmax_i->scalar_result == NULL)
+ minmax_i->scalar_result = make_ssa_name (TREE_TYPE (vectype));
+ if (minmax_i->induc_val == NULL_TREE)
+ index_scalar_reduc = minmax_i->scalar_result;
+ else
+ index_scalar_reduc = make_ssa_name (scalar_type);
+ gimple* index_result_stmt = gimple_build_call_internal (IFN_REDUC_MIN,
+ 1, masked_ind);
+ gimple_call_set_lhs (index_result_stmt, index_scalar_reduc);
+ gsi_insert_after (&exit_gsi, index_result_stmt, GSI_NEW_STMT);
+
+ if (minmax_i->induc_val != NULL_TREE)
+ {
+ /* Earlier we set the initial value to be a vector if induc_val
+ values. Check the result and if it is induc_val then replace
+ with the original initial value, unless induc_val is
+ the same as initial_def already. */
+ tree zcompare = build2 (EQ_EXPR, boolean_type_node,
+ index_scalar_reduc, minmax_i->induc_val);
+
+ epilog_stmt = gimple_build_assign (minmax_i->scalar_result, COND_EXPR,
+ zcompare, minmax_i->initial_def,
+ index_scalar_reduc);
+ gsi_insert_after (&exit_gsi, epilog_stmt, GSI_NEW_STMT);
+ }
+
+ minmax->epilog_finished = true;
+ minmax_i->epilog_finished = true;
+ }
+ else if (stmt_info->scalar_result == NULL)
+ {
+ /* Placeholder. The vectorized forms from both the max and the index are
+ needed for the reduction. */
+ tree scalar_result = make_ssa_name (scalar_type);
+ stmt_info->scalar_result = scalar_result;
+ }
+
+ scalar_results.safe_push (stmt_info->scalar_result);
+ }
+ else if (STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION
&& reduc_fn != IFN_LAST)
{
/* For condition reductions, we have a vector (NEW_PHI_RESULT) containing
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 7ab6d8234ac73f34dde09ed028d02fa9b3fa67b1..e5931666fab64e324816ef8545a1bea416a21834 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -1123,6 +1123,17 @@ public:
pattern). */
stmt_vec_info related_stmt;
+ /* For handling multi phi reductions. */
+ tree scalar_result;
+ stmt_vec_info reduc_multi_use_related_stmt;
+ gimple* reduc_multi_use_result;
+ tree induc_val;
+ tree initial_def;
+ bool is_minmax_index;
+ bool is_minmax;
+ bool epilog_finished;
+
+
/* Used to keep a sequence of def stmts of a pattern stmt if such exists.
The sequence is attached to the original statement rather than the
pattern statement. */
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2021-03-12 16:21 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-12 16:20 [RFC][GCC][Vect] Add support for minmax + index pattern Joel Hutton
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).