* [PATCH] Change SLP representation of reduction chains
@ 2019-10-24 15:06 Richard Biener
0 siblings, 0 replies; only message in thread
From: Richard Biener @ 2019-10-24 15:06 UTC (permalink / raw)
To: gcc-patches
Instead of
t.c:4:3: note: node 0x3751bf0 (max_nunits=1)
t.c:4:3: note: stmt 0 sum_24 = _5 + sum_30;
t.c:4:3: note: stmt 1 sum_25 = _10 + sum_24;
t.c:4:3: note: stmt 2 sum_26 = _14 + sum_25;
t.c:4:3: note: stmt 3 sum_27 = _18 + sum_26;
t.c:4:3: note: children 0x38eb4d0 0x374acb0
t.c:4:3: note: node 0x38eb4d0 (max_nunits=1)
t.c:4:3: note: stmt 0 _5 = *_4;
t.c:4:3: note: stmt 1 _10 = *_9;
t.c:4:3: note: stmt 2 _14 = *_13;
t.c:4:3: note: stmt 3 _18 = *_17;
t.c:4:3: note: node 0x374acb0 (max_nunits=1)
t.c:4:3: note: stmt 0 sum_30 = PHI <0(5), sum_27(6)>
t.c:4:3: note: stmt 1 sum_24 = _5 + sum_30;
t.c:4:3: note: stmt 2 sum_25 = _10 + sum_24;
t.c:4:3: note: stmt 3 sum_26 = _14 + sum_25;
we want
t.c:4:3: note: node 0x3d9d110 (max_nunits=1)
t.c:4:3: note: stmt 0 sum_24 = _5 + sum_30;
t.c:4:3: note: stmt 1 sum_25 = _10 + sum_24;
t.c:4:3: note: stmt 2 sum_26 = _14 + sum_25;
t.c:4:3: note: stmt 3 sum_27 = _18 + sum_26;
t.c:4:3: note: children 0x3d9d070 0x3d9d0c0
t.c:4:3: note: node 0x3d9d070 (max_nunits=1)
t.c:4:3: note: stmt 0 _5 = *_4;
t.c:4:3: note: stmt 1 _10 = *_9;
t.c:4:3: note: stmt 2 _14 = *_13;
t.c:4:3: note: stmt 3 _18 = *_17;
t.c:4:3: note: node 0x3d9d0c0 (max_nunits=1)
t.c:4:3: note: stmt 0 sum_30 = PHI <0(5), sum_27(6)>
t.c:4:3: note: stmt 1 sum_30 = PHI <0(5), sum_27(6)>
t.c:4:3: note: stmt 2 sum_30 = PHI <0(5), sum_27(6)>
t.c:4:3: note: stmt 3 sum_30 = PHI <0(5), sum_27(6)>
where we correctly represent the reduction chain as re-associated.
Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.
Richard.
2019-10-24 Richard Biener <rguenther@suse.de>
* tree-vect-slp.c (vect_get_and_check_slp_defs): For reduction
chains try harder with operand swapping and instead of
putting a shifted chain into the reduction operands put
a repetition of the final reduction op there as if we'd
reassociate the expression.
* gcc.dg/vect/slp-reduc-10a.c: New testcase.
* gcc.dg/vect/slp-reduc-10b.c: Likewise.
* gcc.dg/vect/slp-reduc-10c.c: Likewise.
* gcc.dg/vect/slp-reduc-10d.c: Likewise.
* gcc.dg/vect/slp-reduc-10e.c: Likewise.
Index: gcc/tree-vect-slp.c
===================================================================
--- gcc/tree-vect-slp.c (revision 277372)
+++ gcc/tree-vect-slp.c (working copy)
@@ -433,20 +433,35 @@ again:
the def-stmt/s of the first stmt. Allow different definition
types for reduction chains: the first stmt must be a
vect_reduction_def (a phi node), and the rest
- vect_internal_def. */
+ end in the reduction chain. */
tree type = TREE_TYPE (oprnd);
if ((oprnd_info->first_dt != dt
&& !(oprnd_info->first_dt == vect_reduction_def
- && dt == vect_internal_def)
+ && !STMT_VINFO_DATA_REF (stmt_info)
+ && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
+ && def_stmt_info
+ && !STMT_VINFO_DATA_REF (def_stmt_info)
+ && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
+ == REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
&& !((oprnd_info->first_dt == vect_external_def
|| oprnd_info->first_dt == vect_constant_def)
&& (dt == vect_external_def
|| dt == vect_constant_def)))
- || !types_compatible_p (oprnd_info->first_op_type, type))
+ || !types_compatible_p (oprnd_info->first_op_type, type)
+ || (!STMT_VINFO_DATA_REF (stmt_info)
+ && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
+ && ((!def_stmt_info
+ || STMT_VINFO_DATA_REF (def_stmt_info)
+ || (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
+ != REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
+ != (oprnd_info->first_dt != vect_reduction_def))))
{
/* Try swapping operands if we got a mismatch. */
if (i == commutative_op && !swapped)
{
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "trying swapped operands\n");
swapped = true;
goto again;
}
@@ -484,9 +499,26 @@ again:
oprnd_info->ops.quick_push (oprnd);
break;
+ case vect_internal_def:
case vect_reduction_def:
+ if (oprnd_info->first_dt == vect_reduction_def
+ && !STMT_VINFO_DATA_REF (stmt_info)
+ && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
+ && !STMT_VINFO_DATA_REF (def_stmt_info)
+ && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
+ == REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
+ {
+ /* For a SLP reduction chain we want to duplicate the
+ reduction to each of the chain members. That gets
+ us a sane SLP graph (still the stmts are not 100%
+ correct wrt the initial values). */
+ gcc_assert (!first);
+ oprnd_info->def_stmts.quick_push (oprnd_info->def_stmts[0]);
+ oprnd_info->ops.quick_push (oprnd_info->ops[0]);
+ break;
+ }
+ /* Fallthru. */
case vect_induction_def:
- case vect_internal_def:
oprnd_info->def_stmts.quick_push (def_stmt_info);
oprnd_info->ops.quick_push (oprnd);
break;
@@ -1182,15 +1214,8 @@ vect_build_slp_tree_2 (vec_info *vinfo,
/* Else def types have to match. */
stmt_vec_info other_info;
FOR_EACH_VEC_ELT (stmts, i, other_info)
- {
- /* But for reduction chains only check on the first stmt. */
- if (!STMT_VINFO_DATA_REF (other_info)
- && REDUC_GROUP_FIRST_ELEMENT (other_info)
- && REDUC_GROUP_FIRST_ELEMENT (other_info) != stmt_info)
- continue;
- if (STMT_VINFO_DEF_TYPE (other_info) != def_type)
- return NULL;
- }
+ if (STMT_VINFO_DEF_TYPE (other_info) != def_type)
+ return NULL;
}
else
return NULL;
Index: gcc/testsuite/gcc.dg/vect/slp-reduc-10a.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/slp-reduc-10a.c (nonexistent)
+++ gcc/testsuite/gcc.dg/vect/slp-reduc-10a.c (working copy)
@@ -0,0 +1,82 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-fgimple" } */
+
+int __GIMPLE (ssa,guessed_local(118111600),startwith("dce3"))
+foo (int * x, int n)
+{
+ int i;
+ int sum;
+ int _1;
+ long unsigned int _2;
+ long unsigned int _3;
+ int * _4;
+ int _5;
+ __SIZETYPE__ _7;
+ __SIZETYPE__ _8;
+ int * _9;
+ int _10;
+ __SIZETYPE__ _11;
+ __SIZETYPE__ _12;
+ int * _13;
+ int _14;
+ __SIZETYPE__ _15;
+ __SIZETYPE__ _16;
+ int * _17;
+ int _18;
+
+ __BB(2,guessed_local(118111600)):
+ if (n_21(D) > 0)
+ goto __BB5(guessed(119453778));
+ else
+ goto __BB7(guessed(14763950));
+
+ __BB(5,guessed_local(105119324)):
+ goto __BB3(precise(134217728));
+
+ __BB(3,loop_header(1),guessed_local(955630224)):
+ sum_30 = __PHI (__BB5: 0, __BB6: sum_27);
+ i_32 = __PHI (__BB5: 0, __BB6: i_28);
+ _1 = i_32 * 4;
+ _2 = (long unsigned int) _1;
+ _3 = _2 * 4ul;
+ _4 = x_23(D) + _3;
+ _5 = __MEM <int> (_4);
+ sum_24 = _5 + sum_30;
+ _7 = _2 + 1ul;
+ _8 = _7 * 4ul;
+ _9 = x_23(D) + _8;
+ _10 = __MEM <int> (_9);
+ sum_25 = _10 + sum_24;
+ _11 = _2 + 2ul;
+ _12 = _11 * 4ul;
+ _13 = x_23(D) + _12;
+ _14 = __MEM <int> (_13);
+ sum_26 = _14 + sum_25;
+ _15 = _2 + 3ul;
+ _16 = _15 * 4ul;
+ _17 = x_23(D) + _16;
+ _18 = __MEM <int> (_17);
+ sum_27 = _18 + sum_26;
+ i_28 = i_32 + 1;
+ if (n_21(D) > i_28)
+ goto __BB6(guessed(119453778));
+ else
+ goto __BB8(guessed(14763950));
+
+ __BB(8,guessed_local(105119324)):
+ goto __BB4(precise(134217728));
+
+ __BB(6,guessed_local(850510900)):
+ goto __BB3(precise(134217728));
+
+ __BB(7,guessed_local(12992276)):
+ goto __BB4(precise(134217728));
+
+ __BB(4,guessed_local(118111601)):
+ sum_31 = __PHI (__BB7: 0, __BB8: sum_27);
+ return sum_31;
+
+}
+
+/* { dg-final { scan-tree-dump "Decided to SLP 1 instances" "vect" } } */
Index: gcc/testsuite/gcc.dg/vect/slp-reduc-10b.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/slp-reduc-10b.c (nonexistent)
+++ gcc/testsuite/gcc.dg/vect/slp-reduc-10b.c (working copy)
@@ -0,0 +1,82 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-fgimple" } */
+
+int __GIMPLE (ssa,guessed_local(118111600),startwith("dce3"))
+foo (int * x, int n)
+{
+ int i;
+ int sum;
+ int _1;
+ long unsigned int _2;
+ long unsigned int _3;
+ int * _4;
+ int _5;
+ __SIZETYPE__ _7;
+ __SIZETYPE__ _8;
+ int * _9;
+ int _100;
+ __SIZETYPE__ _11;
+ __SIZETYPE__ _12;
+ int * _13;
+ int _14;
+ __SIZETYPE__ _15;
+ __SIZETYPE__ _16;
+ int * _17;
+ int _18;
+
+ __BB(2,guessed_local(118111600)):
+ if (n_21(D) > 0)
+ goto __BB5(guessed(119453778));
+ else
+ goto __BB7(guessed(14763950));
+
+ __BB(5,guessed_local(105119324)):
+ goto __BB3(precise(134217728));
+
+ __BB(3,loop_header(1),guessed_local(955630224)):
+ sum_30 = __PHI (__BB5: 0, __BB6: sum_27);
+ i_32 = __PHI (__BB5: 0, __BB6: i_28);
+ _1 = i_32 * 4;
+ _2 = (long unsigned int) _1;
+ _3 = _2 * 4ul;
+ _4 = x_23(D) + _3;
+ _5 = __MEM <int> (_4);
+ sum_24 = _5 + sum_30;
+ _7 = _2 + 1ul;
+ _8 = _7 * 4ul;
+ _9 = x_23(D) + _8;
+ _100 = __MEM <int> (_9);
+ sum_25 = sum_24 + _100;
+ _11 = _2 + 2ul;
+ _12 = _11 * 4ul;
+ _13 = x_23(D) + _12;
+ _14 = __MEM <int> (_13);
+ sum_26 = _14 + sum_25;
+ _15 = _2 + 3ul;
+ _16 = _15 * 4ul;
+ _17 = x_23(D) + _16;
+ _18 = __MEM <int> (_17);
+ sum_27 = _18 + sum_26;
+ i_28 = i_32 + 1;
+ if (n_21(D) > i_28)
+ goto __BB6(guessed(119453778));
+ else
+ goto __BB8(guessed(14763950));
+
+ __BB(8,guessed_local(105119324)):
+ goto __BB4(precise(134217728));
+
+ __BB(6,guessed_local(850510900)):
+ goto __BB3(precise(134217728));
+
+ __BB(7,guessed_local(12992276)):
+ goto __BB4(precise(134217728));
+
+ __BB(4,guessed_local(118111601)):
+ sum_31 = __PHI (__BB7: 0, __BB8: sum_27);
+ return sum_31;
+
+}
+
+/* { dg-final { scan-tree-dump "Decided to SLP 1 instances" "vect" } } */
Index: gcc/testsuite/gcc.dg/vect/slp-reduc-10c.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/slp-reduc-10c.c (nonexistent)
+++ gcc/testsuite/gcc.dg/vect/slp-reduc-10c.c (working copy)
@@ -0,0 +1,82 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-fgimple" } */
+
+int __GIMPLE (ssa,guessed_local(118111600),startwith("dce3"))
+foo (int * x, int n)
+{
+ int i;
+ int sum;
+ int _1;
+ long unsigned int _2;
+ long unsigned int _3;
+ int * _4;
+ int _5;
+ __SIZETYPE__ _7;
+ __SIZETYPE__ _8;
+ int * _9;
+ int _10;
+ __SIZETYPE__ _11;
+ __SIZETYPE__ _12;
+ int * _13;
+ int _100;
+ __SIZETYPE__ _15;
+ __SIZETYPE__ _16;
+ int * _17;
+ int _18;
+
+ __BB(2,guessed_local(118111600)):
+ if (n_21(D) > 0)
+ goto __BB5(guessed(119453778));
+ else
+ goto __BB7(guessed(14763950));
+
+ __BB(5,guessed_local(105119324)):
+ goto __BB3(precise(134217728));
+
+ __BB(3,loop_header(1),guessed_local(955630224)):
+ sum_30 = __PHI (__BB5: 0, __BB6: sum_27);
+ i_32 = __PHI (__BB5: 0, __BB6: i_28);
+ _1 = i_32 * 4;
+ _2 = (long unsigned int) _1;
+ _3 = _2 * 4ul;
+ _4 = x_23(D) + _3;
+ _5 = __MEM <int> (_4);
+ sum_24 = _5 + sum_30;
+ _7 = _2 + 1ul;
+ _8 = _7 * 4ul;
+ _9 = x_23(D) + _8;
+ _10 = __MEM <int> (_9);
+ sum_25 = _10 + sum_24;
+ _11 = _2 + 2ul;
+ _12 = _11 * 4ul;
+ _13 = x_23(D) + _12;
+ _100 = __MEM <int> (_13);
+ sum_26 = sum_25 + _100;
+ _15 = _2 + 3ul;
+ _16 = _15 * 4ul;
+ _17 = x_23(D) + _16;
+ _18 = __MEM <int> (_17);
+ sum_27 = _18 + sum_26;
+ i_28 = i_32 + 1;
+ if (n_21(D) > i_28)
+ goto __BB6(guessed(119453778));
+ else
+ goto __BB8(guessed(14763950));
+
+ __BB(8,guessed_local(105119324)):
+ goto __BB4(precise(134217728));
+
+ __BB(6,guessed_local(850510900)):
+ goto __BB3(precise(134217728));
+
+ __BB(7,guessed_local(12992276)):
+ goto __BB4(precise(134217728));
+
+ __BB(4,guessed_local(118111601)):
+ sum_31 = __PHI (__BB7: 0, __BB8: sum_27);
+ return sum_31;
+
+}
+
+/* { dg-final { scan-tree-dump "Decided to SLP 1 instances" "vect" } } */
Index: gcc/testsuite/gcc.dg/vect/slp-reduc-10d.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/slp-reduc-10d.c (nonexistent)
+++ gcc/testsuite/gcc.dg/vect/slp-reduc-10d.c (working copy)
@@ -0,0 +1,82 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-fgimple" } */
+
+int __GIMPLE (ssa,guessed_local(118111600),startwith("dce3"))
+foo (int * x, int n)
+{
+ int i;
+ int sum;
+ int _1;
+ long unsigned int _2;
+ long unsigned int _3;
+ int * _4;
+ int _5;
+ __SIZETYPE__ _7;
+ __SIZETYPE__ _8;
+ int * _9;
+ int _10;
+ __SIZETYPE__ _11;
+ __SIZETYPE__ _12;
+ int * _13;
+ int _14;
+ __SIZETYPE__ _15;
+ __SIZETYPE__ _16;
+ int * _17;
+ int _100;
+
+ __BB(2,guessed_local(118111600)):
+ if (n_21(D) > 0)
+ goto __BB5(guessed(119453778));
+ else
+ goto __BB7(guessed(14763950));
+
+ __BB(5,guessed_local(105119324)):
+ goto __BB3(precise(134217728));
+
+ __BB(3,loop_header(1),guessed_local(955630224)):
+ sum_30 = __PHI (__BB5: 0, __BB6: sum_27);
+ i_32 = __PHI (__BB5: 0, __BB6: i_28);
+ _1 = i_32 * 4;
+ _2 = (long unsigned int) _1;
+ _3 = _2 * 4ul;
+ _4 = x_23(D) + _3;
+ _5 = __MEM <int> (_4);
+ sum_24 = _5 + sum_30;
+ _7 = _2 + 1ul;
+ _8 = _7 * 4ul;
+ _9 = x_23(D) + _8;
+ _10 = __MEM <int> (_9);
+ sum_25 = _10 + sum_24;
+ _11 = _2 + 2ul;
+ _12 = _11 * 4ul;
+ _13 = x_23(D) + _12;
+ _14 = __MEM <int> (_13);
+ sum_26 = _14 + sum_25;
+ _15 = _2 + 3ul;
+ _16 = _15 * 4ul;
+ _17 = x_23(D) + _16;
+ _100 = __MEM <int> (_17);
+ sum_27 = sum_26 + _100;
+ i_28 = i_32 + 1;
+ if (n_21(D) > i_28)
+ goto __BB6(guessed(119453778));
+ else
+ goto __BB8(guessed(14763950));
+
+ __BB(8,guessed_local(105119324)):
+ goto __BB4(precise(134217728));
+
+ __BB(6,guessed_local(850510900)):
+ goto __BB3(precise(134217728));
+
+ __BB(7,guessed_local(12992276)):
+ goto __BB4(precise(134217728));
+
+ __BB(4,guessed_local(118111601)):
+ sum_31 = __PHI (__BB7: 0, __BB8: sum_27);
+ return sum_31;
+
+}
+
+/* { dg-final { scan-tree-dump "Decided to SLP 1 instances" "vect" } } */
Index: gcc/testsuite/gcc.dg/vect/slp-reduc-10e.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/slp-reduc-10e.c (nonexistent)
+++ gcc/testsuite/gcc.dg/vect/slp-reduc-10e.c (working copy)
@@ -0,0 +1,82 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-fgimple" } */
+
+int __GIMPLE (ssa,guessed_local(118111600),startwith("dce3"))
+foo (int * x, int n)
+{
+ int i;
+ int sum;
+ int _1;
+ long unsigned int _2;
+ long unsigned int _3;
+ int * _4;
+ int _100;
+ __SIZETYPE__ _7;
+ __SIZETYPE__ _8;
+ int * _9;
+ int _10;
+ __SIZETYPE__ _11;
+ __SIZETYPE__ _12;
+ int * _13;
+ int _14;
+ __SIZETYPE__ _15;
+ __SIZETYPE__ _16;
+ int * _17;
+ int _18;
+
+ __BB(2,guessed_local(118111600)):
+ if (n_21(D) > 0)
+ goto __BB5(guessed(119453778));
+ else
+ goto __BB7(guessed(14763950));
+
+ __BB(5,guessed_local(105119324)):
+ goto __BB3(precise(134217728));
+
+ __BB(3,loop_header(1),guessed_local(955630224)):
+ sum_30 = __PHI (__BB5: 0, __BB6: sum_27);
+ i_32 = __PHI (__BB5: 0, __BB6: i_28);
+ _1 = i_32 * 4;
+ _2 = (long unsigned int) _1;
+ _3 = _2 * 4ul;
+ _4 = x_23(D) + _3;
+ _100 = __MEM <int> (_4);
+ sum_24 = sum_30 + _100;
+ _7 = _2 + 1ul;
+ _8 = _7 * 4ul;
+ _9 = x_23(D) + _8;
+ _10 = __MEM <int> (_9);
+ sum_25 = _10 + sum_24;
+ _11 = _2 + 2ul;
+ _12 = _11 * 4ul;
+ _13 = x_23(D) + _12;
+ _14 = __MEM <int> (_13);
+ sum_26 = _14 + sum_25;
+ _15 = _2 + 3ul;
+ _16 = _15 * 4ul;
+ _17 = x_23(D) + _16;
+ _18 = __MEM <int> (_17);
+ sum_27 = _18 + sum_26;
+ i_28 = i_32 + 1;
+ if (n_21(D) > i_28)
+ goto __BB6(guessed(119453778));
+ else
+ goto __BB8(guessed(14763950));
+
+ __BB(8,guessed_local(105119324)):
+ goto __BB4(precise(134217728));
+
+ __BB(6,guessed_local(850510900)):
+ goto __BB3(precise(134217728));
+
+ __BB(7,guessed_local(12992276)):
+ goto __BB4(precise(134217728));
+
+ __BB(4,guessed_local(118111601)):
+ sum_31 = __PHI (__BB7: 0, __BB8: sum_27);
+ return sum_31;
+
+}
+
+/* { dg-final { scan-tree-dump "Decided to SLP 1 instances" "vect" } } */
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2019-10-24 15:01 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-10-24 15:06 [PATCH] Change SLP representation of reduction chains Richard Biener
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).