From: Alan Hayward <alan.hayward@arm.com>
To: "gcc-patches@gcc.gnu.org" <gcc-patches@gcc.gnu.org>
Subject: [Patch] Optimize condition reductions where the result is an integer induction variable
Date: Wed, 11 Nov 2015 12:22:00 -0000 [thread overview]
Message-ID: <D268E36F.98B3%alan.hayward@arm.com> (raw)
[-- Attachment #1: Type: text/plain, Size: 1361 bytes --]
Hi,
I hoped to post this in time for Monday’s cut off date, but circumstances
delayed me until today. Hoping if possible this patch will still be able
to go in.
This patch builds upon the change for PR65947, and reduces the amount of
code produced in a vectorized condition reduction where operand 2 of the
COND_EXPR is an assignment of a increasing integer induction variable that
won't wrap.
For example (assuming all types are ints), this is a match:
last = 5;
for (i = 0; i < N; i++)
if (a[i] < min_v)
last = i;
Whereas, this is not because the result is based off a memory access:
last = 5;
for (i = 0; i < N; i++)
if (a[i] < min_v)
last = a[i];
In the integer induction variable case we can just use a MAX reduction and
skip all the code I added in my vectorized condition reduction patch - the
additional induction variables in vectorizable_reduction () and the
additional checks in vect_create_epilog_for_reduction (). From the patch
diff only, it's not immediately obvious that those parts will be skipped
as there is no code changes in those areas.
The initial value of the induction variable is force set to zero, as any
other value could effect the result of the induction. At the end of the
loop, if the result is zero, then we restore the original initial value.
Cheers,
Alan.
[-- Attachment #2: optimizeConditionReductions.patch --]
[-- Type: application/octet-stream, Size: 16071 bytes --]
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-1.c b/gcc/testsuite/gcc.dg/vect/pr65947-1.c
index 7933f5c861201a7720ca77d84671898586516914..1e7a05dc8cc6f00fee265b9b8ad65beaa520f1e8 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-1.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-1.c
@@ -9,7 +9,7 @@ extern void abort (void) __attribute__ ((noreturn));
int
condition_reduction (int *a, int min_v)
{
- int last = -1;
+ int last = 66; /* High start value. */
for (int i = 0; i < N; i++)
if (a[i] < min_v)
@@ -28,12 +28,13 @@ main (void)
31, 32
};
- int ret = condition_reduction (a, 16);
+ int ret = condition_reduction (a, 1);
- if (ret != 19)
+ if (ret != 17)
abort ();
return 0;
}
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 4 "vect" { xfail { ! vect_max_reduc } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-10.c b/gcc/testsuite/gcc.dg/vect/pr65947-10.c
index 9a43a6059fabc1ca527dd6305bf6482f2103fab4..b4c6659b77ccbbafd77c62976e408dda37a2f4c4 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-10.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-10.c
@@ -37,4 +37,5 @@ main (void)
}
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-12.c b/gcc/testsuite/gcc.dg/vect/pr65947-12.c
new file mode 100644
index 0000000000000000000000000000000000000000..fb5ffd48c7bb87f6e2f9e055e90e4f121a202d5e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-12.c
@@ -0,0 +1,41 @@
+/* { dg-require-effective-target vect_condition } */
+
+extern void abort (void) __attribute__ ((noreturn));
+
+#define N 32
+
+/* Simple condition reduction where the result is a negative of the induction.
+ Will fail to vectorize to a simple case. */
+
+signed int
+condition_reduction (signed int *a, signed int min_v)
+{
+ signed int last = -1;
+
+ for (signed int i = 0; i < N; i++)
+ if (a[i] < min_v)
+ last = -i;
+
+ return last;
+}
+
+int
+main (void)
+{
+ signed int a[N] = {
+ 11, -12, 13, 14, 15, 16, 17, 18, 19, 20,
+ 1, 2, -3, 4, 5, 6, 7, -8, 9, 10,
+ 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+ 31, 32
+ };
+
+ signed int ret = condition_reduction (a, 16);
+
+ if (ret != -19)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-13.c b/gcc/testsuite/gcc.dg/vect/pr65947-13.c
new file mode 100644
index 0000000000000000000000000000000000000000..8c6faddd189dabf9b96b4e665c59cf268cf9a3aa
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-13.c
@@ -0,0 +1,41 @@
+/* { dg-require-effective-target vect_condition } */
+
+extern void abort (void) __attribute__ ((noreturn));
+
+#define N 32
+
+/* Simple condition reduction with a reversed loop.
+ Will fail to vectorize to a simple case. */
+
+int
+condition_reduction (int *a, int min_v)
+{
+ int last = -1;
+
+ for (int i = N-1; i >=0; i--)
+ if (a[i] < min_v)
+ last = i;
+
+ return last;
+}
+
+int
+main (void)
+{
+ int a[N] = {
+ 17, 28, 13, 14, 15, 16, 17, 18, 19, 20,
+ 1, 2, -3, 4, 5, 6, 7, -8, 9, 10,
+ 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+ 31, 32
+ };
+
+ int ret = condition_reduction (a, 16);
+
+ if (ret != 2)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-2.c b/gcc/testsuite/gcc.dg/vect/pr65947-2.c
index 9c627d9d7d5c1c42f285a29a1a601b3bc60b8a5a..9e9ff53828609b19e6fe97ae2d640294cb980c14 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-2.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-2.c
@@ -38,3 +38,4 @@ main (void)
}
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-3.c b/gcc/testsuite/gcc.dg/vect/pr65947-3.c
index e115de2a28259307e4eaa9cbbe2fa93663c22873..4b6aa9216b0c200c43a227bf8b4954ecbe4f0090 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-3.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-3.c
@@ -48,3 +48,4 @@ main (void)
}
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-4.c b/gcc/testsuite/gcc.dg/vect/pr65947-4.c
index 76a0567aa5450e00de2d700123ffc930b4028486..f4e7fdc97c89866b19838593cb84fc1754f87494 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-4.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-4.c
@@ -37,4 +37,5 @@ main (void)
}
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 4 "vect" { xfail { ! vect_max_reduc } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-5.c b/gcc/testsuite/gcc.dg/vect/pr65947-5.c
index 360e3b51ee15ecea3b5d590c93d524e143f0ffca..21be8d0b749a4c1f2f29b6aed17533ec26b7aede 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-5.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-5.c
@@ -39,3 +39,4 @@ main (void)
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" { xfail { ! vect_max_reduc } } } } */
/* { dg-final { scan-tree-dump "loop size is greater than data size" "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-6.c b/gcc/testsuite/gcc.dg/vect/pr65947-6.c
index 4997ef79cae83585758436874ac109ca368a1fc1..e1432403b2dd227a7e13db6e228d819780b89ad6 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-6.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-6.c
@@ -37,3 +37,4 @@ main (void)
}
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 55e53093caaf663d592dccbf2c913f334f79b3d9..1e251c0cef3f31fd9494edbc0ac23c32c595a3c0 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -4184,7 +4184,7 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
tree bitsize;
tree adjustment_def = NULL;
tree vec_initial_def = NULL;
- tree reduction_op, expr, def;
+ tree reduction_op, expr, def, initial_def = NULL;
tree orig_name, scalar_result;
imm_use_iterator imm_iter, phi_imm_iter;
use_operand_p use_p, phi_use_p;
@@ -4245,9 +4245,10 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
/* Get at the scalar def before the loop, that defines the initial value
of the reduction variable. */
gimple *def_stmt = SSA_NAME_DEF_STMT (reduction_op);
- tree op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
+ initial_def = PHI_ARG_DEF_FROM_EDGE (def_stmt,
+ loop_preheader_edge (loop));
vec_initial_defs.create (1);
- vec_initial_def = get_initial_def_for_reduction (stmt, op,
+ vec_initial_def = get_initial_def_for_reduction (stmt, initial_def,
&adjustment_def);
vec_initial_defs.quick_push (vec_initial_def);
}
@@ -4263,9 +4264,28 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
def = vect_defs[i];
for (j = 0; j < ncopies; j++)
{
- /* Set the loop-entry arg of the reduction-phi. */
- add_phi_arg (as_a <gphi *> (phi), vec_init_def,
+ /* Set the loop-entry arg of the reduction-phi. */
+
+ if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+ == INTEGER_INDUC_COND_REDUCTION)
+ {
+ /* Initialise the reduction phi to zero. This prevents initial
+ values of non-zero interferring with the reduction op. */
+ gcc_assert (ncopies == 1);
+ gcc_assert (i == 0);
+
+ tree vec_init_def_type = TREE_TYPE (vec_init_def);
+ tree zero = build_int_cst ( TREE_TYPE (vec_init_def_type), 0);
+ tree zero_vec = build_vector_from_val (vec_init_def_type, zero);
+
+ add_phi_arg (as_a <gphi *> (phi), zero_vec,
+ loop_preheader_edge (loop), UNKNOWN_LOCATION);
+ }
+ else
+ {
+ add_phi_arg (as_a <gphi *> (phi), vec_init_def,
loop_preheader_edge (loop), UNKNOWN_LOCATION);
+ }
/* Set the loop-latch arg for the reduction-phi. */
if (j > 0)
@@ -4607,10 +4627,29 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
}
else
tmp = build1 (reduc_code, scalar_type, new_phi_result);
+
epilog_stmt = gimple_build_assign (new_scalar_dest, tmp);
new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
gimple_assign_set_lhs (epilog_stmt, new_temp);
gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
+
+ if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+ == INTEGER_INDUC_COND_REDUCTION)
+ {
+ /* Earlier we set the initial value to be zero. Check the result
+ and if it is zero then replace with the original initial
+ value. */
+ tree zero = build_zero_cst (scalar_type);
+ tree comparez = build2 (EQ_EXPR, boolean_type_node, new_temp, zero);
+ tmp = build3 (COND_EXPR, scalar_type, comparez, initial_def,
+ new_temp);
+
+ epilog_stmt = gimple_build_assign (new_scalar_dest, tmp);
+ new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
+ gimple_assign_set_lhs (epilog_stmt, new_temp);
+ gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
+ }
+
scalar_results.safe_push (new_temp);
}
else
@@ -5087,6 +5126,45 @@ vect_finalize_reduction:
}
+/* Function is_integer_induction.
+
+ Check if STMT (which is part of loop LOOP) both increments and
+ does not cause overflow. */
+
+static bool
+is_integer_induction (gimple *stmt, struct loop *loop)
+{
+ stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
+ tree base = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
+ tree step = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo);
+ tree lhs_max = TYPE_MAX_VALUE (TREE_TYPE (gimple_phi_result (stmt)));
+ widest_int ni, max_loop_value;
+
+ /* Make sure the loop is integer based. */
+ if (TREE_CODE (base) != INTEGER_CST
+ || TREE_CODE (step) != INTEGER_CST)
+ return false;
+
+ /* Check that the induction increments. */
+ if (tree_int_cst_compare (step, size_zero_node) <= 0)
+ return false;
+
+ /* Check that the max size of the loop will not wrap. */
+
+ if (! max_loop_iterations (loop, &ni))
+ return false;
+ /* Convert backedges to iterations. */
+ ni += 1;
+
+ max_loop_value = wi::add (wi::to_widest (base),
+ wi::mul (wi::to_widest (step), ni));
+
+ if (wi::gtu_p (max_loop_value, wi::to_widest (lhs_max)))
+ return false;
+
+ return true;
+}
+
/* Function vectorizable_reduction.
Check if STMT performs a reduction operation that can be vectorized.
@@ -5185,6 +5263,7 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
tree def0, def1, tem, op0, op1 = NULL_TREE;
bool first_p = true;
tree cr_index_scalar_type = NULL_TREE, cr_index_vector_type = NULL_TREE;
+ bool cond_expr_is_integer_induction = false;
/* In case of reduction chain we switch to the first stmt in the chain, but
we don't update STMT_INFO, since only the last stmt is marked as reduction
@@ -5328,6 +5407,16 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
reduc_def_stmt = def_stmt;
reduc_index = i;
}
+
+ if (i == 1 && code == COND_EXPR && dt == vect_induction_def
+ && is_integer_induction (def_stmt, loop))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "condition expression based on integer "
+ "induction.\n");
+ cond_expr_is_integer_induction = true;
+ }
}
is_simple_use = vect_is_simple_use (ops[i], loop_vinfo, &def_stmt, &dt, &tem);
@@ -5357,6 +5446,11 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
(loop_vinfo, reduc_def_stmt,
!nested_cycle, &dummy, false,
&STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info));
+
+ if (cond_expr_is_integer_induction
+ && STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
+ STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = INTEGER_INDUC_COND_REDUCTION;
+
if (orig_stmt)
gcc_assert (tmp == orig_stmt
|| GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == orig_stmt);
@@ -5484,6 +5578,8 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
{
/* This is a reduction pattern: get the vectype from the type of the
reduction variable, and get the tree-code from orig_stmt. */
+ gcc_assert (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+ == TREE_CODE_REDUCTION);
orig_code = gimple_assign_rhs_code (orig_stmt);
gcc_assert (vectype_out);
vec_mode = TYPE_MODE (vectype_out);
@@ -5493,6 +5589,12 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
/* Regular reduction: use the same vectype and tree-code as used for
the vector code inside the loop can be used for the epilog code. */
orig_code = code;
+
+ /* For simple condition reductions, replace with the actual expression
+ we want to base our reduction around. */
+ if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+ == INTEGER_INDUC_COND_REDUCTION)
+ orig_code = MAX_EXPR;
}
if (nested_cycle)
@@ -5513,7 +5615,9 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
epilog_reduc_code = ERROR_MARK;
- if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
+ if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION
+ || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+ == INTEGER_INDUC_COND_REDUCTION)
{
if (reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
{
@@ -5539,6 +5643,19 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
epilog_reduc_code = ERROR_MARK;
}
}
+
+ /* When epilog_reduc_code is ERROR_MARK then a reduction will be
+ generated in the epilog using multiple expressions. This does not
+ work for condition reductions. */
+ if (epilog_reduc_code == ERROR_MARK
+ && STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+ == INTEGER_INDUC_COND_REDUCTION)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "no reduc code for scalar code.\n");
+ return false;
+ }
}
else
{
@@ -5573,7 +5690,9 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
}
if ((double_reduc
- || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
+ || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION
+ || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+ == INTEGER_INDUC_COND_REDUCTION)
&& ncopies > 1)
{
if (dump_enabled_p ())
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 6ad0cc452f1168ce6ee48cc03ae140fc947f2a8d..b03eb964da1b6ed6021734b2b7ecc01beb75a525 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -64,7 +64,8 @@ enum vect_def_type {
/* Define type of reduction. */
enum vect_reduction_type {
TREE_CODE_REDUCTION,
- COND_REDUCTION
+ COND_REDUCTION,
+ INTEGER_INDUC_COND_REDUCTION
};
#define VECTORIZABLE_CYCLE_DEF(D) (((D) == vect_reduction_def) \
next reply other threads:[~2015-11-11 12:22 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-11-11 12:22 Alan Hayward [this message]
2015-11-11 13:25 ` Richard Biener
2015-11-11 18:54 ` Alan Hayward
2015-11-12 10:54 ` Richard Biener
2015-11-13 10:52 ` Alan Hayward
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=D268E36F.98B3%alan.hayward@arm.com \
--to=alan.hayward@arm.com \
--cc=gcc-patches@gcc.gnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).