public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] predcom: Enabled by loop vect at O2 [PR100794]
@ 2021-06-02  9:34 Kewen.Lin
  2021-06-02 17:19 ` Richard Sandiford
  0 siblings, 1 reply; 4+ messages in thread
From: Kewen.Lin @ 2021-06-02  9:34 UTC (permalink / raw)
  To: GCC Patches; +Cc: Richard Biener, Segher Boessenkool, Bill Schmidt

[-- Attachment #1: Type: text/plain, Size: 1751 bytes --]

Hi,

As PR100794 shows, in the current implementation PRE bypasses
some optimization to avoid introducing loop carried dependence
which stops loop vectorizer to vectorize the loop.  At -O2,
there is no downstream pass to re-catch this kind of opportunity
if loop vectorizer fails to vectorize that loop.

This patch follows Richi's suggestion in the PR, if predcom flag
isn't set and loop vectorization will enable predcom without any
unrolling implicitly.  The Power9 SPEC2017 evaluation showed it
can speed up 521.wrf_r 3.30% and 554.roms_r 1.08% at very-cheap
cost model, no remarkable impact at cheap cost model, the build
time and size impact is fine (see the PR for the details).

By the way, I tested another proposal to guard PRE not skip the
optimization for cheap and very-cheap vect cost models, the
evaluation results showed it's fine with very cheap cost model,
but it can degrade some bmks like 521.wrf_r -9.17% and
549.fotonik3d_r -2.07% etc.

Bootstrapped/regtested on powerpc64le-linux-gnu P9,
x86_64-redhat-linux and aarch64-linux-gnu.

Is it ok for trunk?

BR,
Kewen
-----
gcc/ChangeLog:

	PR tree-optimization/100794
	* tree-predcom.c (tree_predictive_commoning_loop): Add parameter
	allow_unroll_p and only allow unrolling when it's true.
	(tree_predictive_commoning): Add parameter allow_unroll_p and
	adjust for it.
	(run_tree_predictive_commoning): Likewise.
	(class pass_predcom): Add private member allow_unroll_p.
	(pass_predcom::pass_predcom): Init allow_unroll_p.
	(pass_predcom::gate): Check flag_tree_loop_vectorize and 
	global_options_set.x_flag_predictive_commoning.
	(pass_predcom::execute): Adjust for allow_unroll_p.

gcc/testsuite/ChangeLog:

	PR tree-optimization/100794
	* gcc.dg/tree-ssa/pr100794.c: New test.

[-- Attachment #2: predcom_enabled_by_loop_vect.diff --]
[-- Type: text/plain, Size: 4540 bytes --]

 gcc/testsuite/gcc.dg/tree-ssa/pr100794.c | 20 +++++++++
 gcc/tree-predcom.c                       | 57 +++++++++++++++++-------
 2 files changed, 60 insertions(+), 17 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr100794.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr100794.c b/gcc/testsuite/gcc.dg/tree-ssa/pr100794.c
new file mode 100644
index 00000000000..6f707ae7fba
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr100794.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-loop-vectorize -fdump-tree-pcom-details -fdisable-tree-vect" } */
+
+extern double arr[100];
+extern double foo (double, double);
+extern double sum;
+
+void
+test (int i_0, int i_n)
+{
+  int i;
+  for (i = i_0; i < i_n - 1; i++)
+    {
+      double a = arr[i];
+      double b = arr[i + 1];
+      sum += a * b;
+    }
+}
+
+/* { dg-final { scan-tree-dump "Executing predictive commoning without unrolling" "pcom" } } */
diff --git a/gcc/tree-predcom.c b/gcc/tree-predcom.c
index 02f911a08bb..65a93c8e505 100644
--- a/gcc/tree-predcom.c
+++ b/gcc/tree-predcom.c
@@ -3178,13 +3178,13 @@ insert_init_seqs (class loop *loop, vec<chain_p> chains)
    applied to this loop.  */
 
 static unsigned
-tree_predictive_commoning_loop (class loop *loop)
+tree_predictive_commoning_loop (class loop *loop, bool allow_unroll_p)
 {
   vec<data_reference_p> datarefs;
   vec<ddr_p> dependences;
   struct component *components;
   vec<chain_p> chains = vNULL;
-  unsigned unroll_factor;
+  unsigned unroll_factor = 0;
   class tree_niter_desc desc;
   bool unroll = false, loop_closed_ssa = false;
 
@@ -3272,11 +3272,13 @@ tree_predictive_commoning_loop (class loop *loop)
       dump_chains (dump_file, chains);
     }
 
-  /* Determine the unroll factor, and if the loop should be unrolled, ensure
-     that its number of iterations is divisible by the factor.  */
-  unroll_factor = determine_unroll_factor (chains);
-  unroll = (unroll_factor > 1
-	    && can_unroll_loop_p (loop, unroll_factor, &desc));
+  if (allow_unroll_p)
+    /* Determine the unroll factor, and if the loop should be unrolled, ensure
+       that its number of iterations is divisible by the factor.  */
+    unroll_factor = determine_unroll_factor (chains);
+
+  if (unroll_factor > 1)
+    unroll = can_unroll_loop_p (loop, unroll_factor, &desc);
 
   /* Execute the predictive commoning transformations, and possibly unroll the
      loop.  */
@@ -3319,7 +3321,7 @@ tree_predictive_commoning_loop (class loop *loop)
 /* Runs predictive commoning.  */
 
 unsigned
-tree_predictive_commoning (void)
+tree_predictive_commoning (bool allow_unroll_p)
 {
   class loop *loop;
   unsigned ret = 0, changed = 0;
@@ -3328,7 +3330,7 @@ tree_predictive_commoning (void)
   FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
     if (optimize_loop_for_speed_p (loop))
       {
-	changed |= tree_predictive_commoning_loop (loop);
+	changed |= tree_predictive_commoning_loop (loop, allow_unroll_p);
       }
   free_original_copy_tables ();
 
@@ -3355,12 +3357,12 @@ tree_predictive_commoning (void)
 /* Predictive commoning Pass.  */
 
 static unsigned
-run_tree_predictive_commoning (struct function *fun)
+run_tree_predictive_commoning (struct function *fun, bool allow_unroll_p)
 {
   if (number_of_loops (fun) <= 1)
     return 0;
 
-  return tree_predictive_commoning ();
+  return tree_predictive_commoning (allow_unroll_p);
 }
 
 namespace {
@@ -3382,15 +3384,36 @@ class pass_predcom : public gimple_opt_pass
 {
 public:
   pass_predcom (gcc::context *ctxt)
-    : gimple_opt_pass (pass_data_predcom, ctxt)
+    : gimple_opt_pass (pass_data_predcom, ctxt),
+      allow_unroll_p (true)
   {}
 
   /* opt_pass methods: */
-  virtual bool gate (function *) { return flag_predictive_commoning != 0; }
-  virtual unsigned int execute (function *fun)
-    {
-      return run_tree_predictive_commoning (fun);
-    }
+  virtual bool
+  gate (function *)
+  {
+    if (flag_predictive_commoning != 0)
+      return true;
+    /* Loop vectorization enables predictive commoning implicitly
+       only if predictive commoning isn't set explicitly, and it
+       doesn't allow unrolling.  */
+    if (flag_tree_loop_vectorize
+	&& !global_options_set.x_flag_predictive_commoning)
+      {
+	allow_unroll_p = false;
+	return true;
+      }
+    return false;
+  }
+
+  virtual unsigned int
+  execute (function *fun)
+  {
+    return run_tree_predictive_commoning (fun, allow_unroll_p);
+  }
+
+private:
+  bool allow_unroll_p;
 
 }; // class pass_predcom
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-06-07 14:51 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-02  9:34 [PATCH] predcom: Enabled by loop vect at O2 [PR100794] Kewen.Lin
2021-06-02 17:19 ` Richard Sandiford
2021-06-03  3:33   ` [PATCH v2] " Kewen.Lin
2021-06-07 14:51     ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).