From: Richard Biener <rguenther@suse.de>
To: gcc-patches@gcc.gnu.org
Subject: [PATCH] tree-optimization/101264 - rework SLP "any" permute forward prop
Date: Wed, 30 Jun 2021 11:29:29 +0200 (CEST) [thread overview]
Message-ID: <6oq37rq-3673-qs26-p0po-59q29s21s1@fhfr.qr> (raw)
This integrates the forward propagation of SLP "any" permutes into
the main propagation stage as a separate single-pass propagation
didn't work out.
It does make the propagation iterate more - propagation in both
directions doesn't tend to behave nicely. I've checked on CPU 2017
fprate and it isn't too bad:
#iters #count
2 30810
3 7386
4 1250
5 140
6 6
7 3
before and
2 30812
3 7303
4 1218
5 122
6 33
7 56
8 26
9 12
10 4
11 1
12 2
13 2
14 4
after. So yes, the peak number of required iterations grows
significantly but the majority is still covered with three iterations.
Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.
2021-06-30 Richard Biener <rguenther@suse.de>
PR tree-optimization/101264
* tree-vect-slp.c (vect_optimize_slp): Propagate the
computed perm_in to all "any" permute successors
we cannot de-duplicate immediately.
* gfortran.dg/pr101264.f90: New testcase.
---
gcc/testsuite/gfortran.dg/pr101264.f90 | 94 ++++++++++++++++++++++++++
gcc/tree-vect-slp.c | 79 ++++++----------------
2 files changed, 115 insertions(+), 58 deletions(-)
create mode 100644 gcc/testsuite/gfortran.dg/pr101264.f90
diff --git a/gcc/testsuite/gfortran.dg/pr101264.f90 b/gcc/testsuite/gfortran.dg/pr101264.f90
new file mode 100644
index 00000000000..5602a709a36
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr101264.f90
@@ -0,0 +1,94 @@
+! { dg-do compile }
+! { dg-options "-Ofast" }
+ SUBROUTINE foo (a,b,c,d,trigs,inc1,inc2,inc3,inc4,lot,n,la)
+ IMPLICIT NONE (type, external)
+ INTEGER, PARAMETER :: wp = 8
+ INTEGER, PARAMETER :: iwp = 4
+ INTEGER(iwp) :: inc1
+ INTEGER(iwp) :: inc2
+ INTEGER(iwp) :: inc3
+ INTEGER(iwp) :: inc4
+ INTEGER(iwp) :: la
+ INTEGER(iwp) :: lot
+ INTEGER(iwp) :: n
+
+ REAL(wp) :: a(*)
+ REAL(wp) :: b(*)
+ REAL(wp) :: c(*)
+ REAL(wp) :: d(*)
+ REAL(wp) :: trigs(*)
+
+ REAL(wp) :: c1
+ REAL(wp) :: c2
+ REAL(wp) :: s1
+ REAL(wp) :: s2
+ REAL(wp) :: sin60
+
+ INTEGER(iwp) :: i
+ INTEGER(iwp) :: ia
+ INTEGER(iwp) :: ib
+ INTEGER(iwp) :: ibase
+ INTEGER(iwp) :: ic
+ INTEGER(iwp) :: iink
+ INTEGER(iwp) :: ijk
+ INTEGER(iwp) :: j
+ INTEGER(iwp) :: ja
+ INTEGER(iwp) :: jb
+ INTEGER(iwp) :: jbase
+ INTEGER(iwp) :: jc
+ INTEGER(iwp) :: jink
+ INTEGER(iwp) :: jump
+ INTEGER(iwp) :: k
+ INTEGER(iwp) :: kb
+ INTEGER(iwp) :: kc
+ INTEGER(iwp) :: kstop
+ INTEGER(iwp) :: l
+ INTEGER(iwp) :: m
+
+ sin60=0.866025403784437_wp
+
+ ia = 1
+ ib = ia + (2*m-la)*inc1
+ ic = ib
+ ja = 1
+ jb = ja + jink
+ jc = jb + jink
+
+ DO k = la, kstop, la
+ kb = k + k
+ kc = kb + kb
+ c1 = trigs(kb+1)
+ s1 = trigs(kb+2)
+ c2 = trigs(kc+1)
+ s2 = trigs(kc+2)
+ ibase = 0
+ DO l = 1, la
+ i = ibase
+ j = jbase
+ DO ijk = 1, lot
+ c(ja+j) = a(ia+i) + (a(ib+i)+a(ic+i))
+ d(ja+j) = b(ia+i) + (b(ib+i)-b(ic+i))
+ c(jb+j) = c1*((a(ia+i)-0.5_wp*(a(ib+i)+a(ic+i)))-(sin60*(b(ib+i)+ &
+ & b(ic+i)))) &
+ & - s1*((b(ia+i)-0.5_wp*(b(ib+i)-b(ic+i)))+(sin60*(a(ib+i)- &
+ & a(ic+i))))
+ d(jb+j) = s1*((a(ia+i)-0.5_wp*(a(ib+i)+a(ic+i)))-(sin60*(b(ib+i)+ &
+ & b(ic+i)))) &
+ & + c1*((b(ia+i)-0.5_wp*(b(ib+i)-b(ic+i)))+(sin60*(a(ib+i)- &
+ & a(ic+i))))
+ c(jc+j) = c2*((a(ia+i)-0.5_wp*(a(ib+i)+a(ic+i)))+(sin60*(b(ib+i)+ &
+ & b(ic+i)))) &
+ & - s2*((b(ia+i)-0.5_wp*(b(ib+i)-b(ic+i)))-(sin60*(a(ib+i)- &
+ & a(ic+i))))
+ i = i + inc3
+ j = j + inc4
+ END DO
+ ibase = ibase + inc1
+ jbase = jbase + inc2
+ END DO
+ ia = ia + iink
+ ib = ib + iink
+ ic = ic - iink
+ jbase = jbase + jump
+ END DO
+ END
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 9155af499b3..10195d3629f 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -3729,6 +3729,7 @@ vect_optimize_slp (vec_info *vinfo)
perm = vertices[idx].perm_out;
else
{
+ bool any_succ_perm_out_m1 = false;
perm = vertices[idx].get_perm_in ();
for (graph_edge *succ = slpg->vertices[idx].succ;
succ; succ = succ->succ_next)
@@ -3742,7 +3743,15 @@ vect_optimize_slp (vec_info *vinfo)
For example see gcc.dg/vect/bb-slp-14.c for a case
that would break. */
if (succ_perm == -1)
- continue;
+ {
+ /* When we handled a non-leaf optimistically, note
+ that so we can adjust its outgoing permute below. */
+ slp_tree succ_node = vertices[succ_idx].node;
+ if (SLP_TREE_DEF_TYPE (succ_node) != vect_external_def
+ && SLP_TREE_DEF_TYPE (succ_node) != vect_constant_def)
+ any_succ_perm_out_m1 = true;
+ continue;
+ }
if (perm == -1)
perm = succ_perm;
else if (succ_perm == 0
@@ -3753,25 +3762,19 @@ vect_optimize_slp (vec_info *vinfo)
}
}
- /* If this is a node we do not want to eventually unshare
- but it can be permuted at will, verify all users have
- the same permutations registered and otherwise drop to
- zero. */
- if (perm == -1
- && SLP_TREE_DEF_TYPE (node) != vect_external_def
- && SLP_TREE_DEF_TYPE (node) != vect_constant_def)
+ /* Adjust any incoming permutes we treated optimistically. */
+ if (perm != -1 && any_succ_perm_out_m1)
{
- int preds_perm = -1;
- for (graph_edge *pred = slpg->vertices[idx].pred;
- pred; pred = pred->pred_next)
+ for (graph_edge *succ = slpg->vertices[idx].succ;
+ succ; succ = succ->succ_next)
{
- int pred_perm = vertices[pred->src].get_perm_in ();
- if (preds_perm == -1)
- preds_perm = pred_perm;
- else if (!vect_slp_perms_eq (perms,
- pred_perm, preds_perm))
- perm = 0;
+ slp_tree succ_node = vertices[succ->dest].node;
+ if (vertices[succ->dest].perm_out == -1
+ && SLP_TREE_DEF_TYPE (succ_node) != vect_external_def
+ && SLP_TREE_DEF_TYPE (succ_node) != vect_constant_def)
+ vertices[succ->dest].perm_out = perm;
}
+ changed = true;
}
if (!vect_slp_perms_eq (perms, perm,
@@ -3840,47 +3843,7 @@ vect_optimize_slp (vec_info *vinfo)
}
}
while (changed);
- statistics_counter_event (cfun, "SLP optimize perm iterations", iteration);
-
- /* Compute pre-order. */
- auto_vec<int> heads;
- heads.reserve (vinfo->slp_instances.length ());
- for (slp_instance inst : vinfo->slp_instances)
- heads.quick_push (SLP_INSTANCE_TREE (inst)->vertex);
- auto_vec<int> po;
- graphds_dfs (slpg, &heads[0], heads.length (), &po, true, NULL, NULL);
-
- /* Propagate materialized permutes to "any" permute nodes. For heads
- ending up as "any" (reductions with just invariants), set them to
- no permute. */
- for (int idx : heads)
- if (vertices[idx].perm_out == -1)
- vertices[idx].perm_out = 0;
- for (i = po.length (); i > 0; --i)
- {
- int idx = po[i-1];
- int perm_in = vertices[idx].get_perm_in ();
- slp_tree node = vertices[idx].node;
- if (SLP_TREE_DEF_TYPE (node) == vect_external_def
- || SLP_TREE_DEF_TYPE (node) == vect_constant_def)
- continue;
- gcc_assert (perm_in != -1);
- for (graph_edge *succ = slpg->vertices[idx].succ;
- succ; succ = succ->succ_next)
- {
- slp_tree succ_node = vertices[succ->dest].node;
- if (SLP_TREE_DEF_TYPE (succ_node) == vect_external_def
- || SLP_TREE_DEF_TYPE (succ_node) == vect_constant_def)
- continue;
- if (vertices[succ->dest].perm_out == -1)
- vertices[succ->dest].perm_out = perm_in;
- else
- /* Propagation should have ensured that all preds have the same
- permutation. */
- gcc_assert (vect_slp_perms_eq (perms, perm_in,
- vertices[succ->dest].perm_out));
- }
- }
+ statistics_histogram_event (cfun, "SLP optimize perm iterations", iteration);
/* Materialize. */
for (i = 0; i < vertices.length (); ++i)
--
2.26.2
reply other threads:[~2021-06-30 9:29 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=6oq37rq-3673-qs26-p0po-59q29s21s1@fhfr.qr \
--to=rguenther@suse.de \
--cc=gcc-patches@gcc.gnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).