public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Yet another simple fix to enhance outer-loop vectorization.
@ 2015-06-08 10:43 Yuri Rumyantsev
  2015-06-09 13:28 ` Richard Biener
  0 siblings, 1 reply; 8+ messages in thread
From: Yuri Rumyantsev @ 2015-06-08 10:43 UTC (permalink / raw)
  To: gcc-patches, Richard Biener, Igor Zamyatin

[-- Attachment #1: Type: text/plain, Size: 1036 bytes --]

Hi All,

Here is a simple fix which allows duplication of outer loops to
perform peeling for number of iterations if outer loop is marked with
pragma omp simd.

Bootstrap and regression testing did not show any new failures.
Is it OK for trunk?

ChangeLog:

2015-06-08  Yuri Rumyantsev  <ysrumyan@gmail.com>

* tree-vect-loop-manip.c (rename_variables_in_bb): Add argument
to allow renaming of PHI arguments on edges incoming from outer
loop header, add corresponding check before start PHI iterator.
(slpeel_tree_duplicate_loop_to_edge_cfg): Introduce new bool
variable DUPLICATE_OUTER_LOOP and set it to true for outer loops
with true force_vectorize.  Set-up dominator for outer loop too.
Pass DUPLICATE_OUTER_LOOP as argument to rename_variables_in_bb.
(slpeel_can_duplicate_loop_p): Allow duplicate of outer loop if it
was marked with force_vectorize and has restricted cfg.
* tre-vect-loop.c (vect_analyze_loop_2): Prohibit alignment peeling
for outer loops.

gcc/testsuite/ChangeLog:
* gcc.dg/vect/vect-outer-simd-2.c: New test.

[-- Attachment #2: patch.1 --]
[-- Type: application/octet-stream, Size: 5942 bytes --]

Index: testsuite/gcc.dg/vect/vect-outer-simd-2.c
===================================================================
--- testsuite/gcc.dg/vect/vect-outer-simd-2.c	(revision 0)
+++ testsuite/gcc.dg/vect/vect-outer-simd-2.c	(working copy)
@@ -0,0 +1,75 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd -ffast-math" } */
+#include <stdlib.h>
+#include "tree-vect.h"
+#define N 64
+
+float *px, *py;
+float *tx, *ty;
+float *x1, *z1, *t1, *t2;
+
+static void inline bar (const float cx, float cy,
+                         float *vx, float *vy)
+{
+  int j;
+    for (j = 0; j < N; ++j)
+    {
+        const float dx  = cx - px[j];
+        const float dy  = cy - py[j];
+        *vx               -= dx * tx[j];
+        *vy               -= dy * ty[j];
+    }
+}
+
+__attribute__((noinline, noclone)) void foo1 (int n)
+{
+  int i;
+#pragma omp simd
+  for (i=0; i<n; i++)
+    bar (px[i], py[i], x1+i, z1+i);
+}
+
+__attribute__((noinline, noclone)) void foo2 (int n)
+{
+  volatile int i;
+  for (i=0; i<n; i++)
+    bar (px[i], py[i], x1+i, z1+i);
+}
+
+
+int main ()
+{
+  float *X = (float*)malloc (N * 8 * sizeof (float));
+  int i;
+  int n = N - 1;
+  check_vect ();
+  px = &X[0];
+  py = &X[N * 1];
+  tx = &X[N * 2];
+  ty = &X[N * 3];
+  x1 = &X[N * 4];
+  z1 = &X[N * 5];
+  t1 = &X[N * 6];
+  t2 = &X[N * 7];
+
+  for (i=0; i<N; i++)
+    {
+      px[i] = (float) (i+2);
+      tx[i] = (float) (i+1);
+      py[i] = (float) (i+4);
+      ty[i] = (float) (i+3);
+      x1[i] = z1[i] = 1.0f;
+    }
+  foo1 (n);  /* vector variant.  */
+  for (i=0; i<N;i++)
+    {
+      t1[i] = x1[i]; x1[i] = 1.0f;
+      t2[i] = z1[i]; z1[i] = 1.0f;
+    }
+  foo2 (n);  /* scalar variant.  */
+  for (i=0; i<N; i++)
+    if (x1[i] != t1[i] || z1[i] != t2[i])
+      abort ();
+  return 0;
+}
+/* { dg-final { scan-tree-dump "OUTER LOOP VECTORIZED" "vect" } } */
Index: tree-vect-loop-manip.c
===================================================================
--- tree-vect-loop-manip.c	(revision 224100)
+++ tree-vect-loop-manip.c	(working copy)
@@ -97,10 +97,12 @@
 }
 
 
-/* Renames the variables in basic block BB.  */
+/* Renames the variables in basic block BB.  Allow renaming  of PHI argumnets
+   on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
+   true.  */
 
 static void
-rename_variables_in_bb (basic_block bb)
+rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
 {
   gimple stmt;
   use_operand_p use_p;
@@ -108,7 +110,14 @@
   edge e;
   edge_iterator ei;
   struct loop *loop = bb->loop_father;
+  struct loop *outer_loop = NULL;
 
+  if (rename_from_outer_loop)
+    {
+      gcc_assert (loop);
+      outer_loop = loop_outer (loop);
+    }
+
   for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
        gsi_next (&gsi))
     {
@@ -119,7 +128,8 @@
 
   FOR_EACH_EDGE (e, ei, bb->preds)
     {
-      if (!flow_bb_inside_loop_p (loop, e->src))
+      if (!flow_bb_inside_loop_p (loop, e->src)
+	  && (!rename_from_outer_loop || e->src != outer_loop->header))
 	continue;
       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
 	   gsi_next (&gsi))
@@ -775,6 +785,7 @@
   bool was_imm_dom;
   basic_block exit_dest;
   edge exit, new_exit;
+  bool duplicate_outer_loop = false;
 
   exit = single_exit (loop);
   at_exit = (e == exit);
@@ -786,7 +797,10 @@
 
   bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
   get_loop_body_with_size (scalar_loop, bbs, scalar_loop->num_nodes);
-
+  /* Allow duplication of outer loops if they are marked with pragma
+     omp simd.  */
+  if (scalar_loop->force_vectorize && scalar_loop->inner)
+    duplicate_outer_loop = true;
   /* Check whether duplication is possible.  */
   if (!can_copy_bbs_p (bbs, scalar_loop->num_nodes))
     {
@@ -855,7 +869,7 @@
       redirect_edge_and_branch_force (e, new_preheader);
       flush_pending_stmts (e);
       set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
-      if (was_imm_dom)
+      if (was_imm_dom || duplicate_outer_loop)
 	set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
 
       /* And remove the non-necessary forwarder again.  Keep the other
@@ -898,7 +912,7 @@
     }
 
   for (unsigned i = 0; i < scalar_loop->num_nodes + 1; i++)
-    rename_variables_in_bb (new_bbs[i]);
+    rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
 
   if (scalar_loop != loop)
     {
@@ -985,7 +999,10 @@
    (3) it is single entry, single exit
    (4) its exit condition is the last stmt in the header
    (5) E is the entry/exit edge of LOOP.
- */
+   Allow duplication of outer loops if:
+   (1') it is marked with force_vectorize flag.
+   (2') it consists of exactly 5 basic blocks.
+   Other conditions are taken above.  */
 
 bool
 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
@@ -995,6 +1012,11 @@
   gcond *orig_cond = get_loop_exit_condition (loop);
   gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
 
+  if (loop->inner && loop->force_vectorize && loop->num_nodes == 5
+      && single_exit (loop) && (e == exit_e || e == entry_e)
+      && orig_cond && orig_cond == gsi_stmt (loop_exit_gsi))
+    return true;
+
   if (loop->inner
       /* All loops have an outer scope; the only case loop->outer is NULL is for
          the function itself.  */
Index: tree-vect-loop.c
===================================================================
--- tree-vect-loop.c	(revision 224100)
+++ tree-vect-loop.c	(working copy)
@@ -1879,6 +1879,10 @@
       return false;
     }
 
+  /* Peeling for alignment is not supported for outer-loop vectorization.  */
+  if (LOOP_VINFO_LOOP (loop_vinfo)->inner)
+    LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = 0;
+
   /* Decide whether we need to create an epilogue loop to handle
      remaining scalar iterations.  */
   th = ((LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) + 1)

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2015-07-14 11:00 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-06-08 10:43 [PATCH] Yet another simple fix to enhance outer-loop vectorization Yuri Rumyantsev
2015-06-09 13:28 ` Richard Biener
2015-06-16 14:39   ` Yuri Rumyantsev
2015-06-17 12:28     ` Richard Biener
2015-06-17 17:13       ` Yuri Rumyantsev
2015-06-29 16:53         ` Yuri Rumyantsev
2015-07-14 11:00           ` Richard Biener
2015-07-14 11:00         ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).