public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* RFA: vectorize reductions on minus_expr
@ 2010-05-06 14:52 Michael Matz
  2010-05-06 15:26 ` Steven Bosscher
  0 siblings, 1 reply; 4+ messages in thread
From: Michael Matz @ 2010-05-06 14:52 UTC (permalink / raw)
  To: gcc-patches

Hello,

currently we can't handle reductions using subtractions in the vectorizer, 
ala:

  res = 0
  for (i)
    res -= x[i];

The most trivial way to handle them is by mindlessly rewriting this 
pattern into a negation and a plus_expr, which we can already vectorize 
just fine, and then let reassoc clean up the negation+plus into a minus 
again.  Directly supporting reductions for non-associative codes in the 
vectorizer turned out to be much harder (for one because of the deeply 
hard-coded assumption that the _second_ argument is the one to reduce into 
:-/, and because of the peculiar split between detecting reductions 
patterns, other patterns, and actually emitting code to handle 
reductions).

This requires reassoc to also handle vector types, which is only a trivial 
change.

This improves 482.sphinx3 (full of these reductions) by about 9% on 
amdfam10.

Regstrapping on x86_64-linux in progress.  Okay for trunk?


Ciao,
Michael.
-- 
	* tree-ssa-reassoc.c (undistribute_ops_list): Use create_tmp_reg.
	(can_reassociate_p): Use FLOAT_TYPE_P.
	* tree-vect-loop.c (vect_is_simple_reduction): Rewrite "a-b" into
	"a+(-b)".

testsuite/
	* gcc.dg/vect/fast-math-vect-reduc-8.c: New test.

Index: tree-ssa-reassoc.c
===================================================================
--- tree-ssa-reassoc.c	(revision 159105)
+++ tree-ssa-reassoc.c	(working copy)
@@ -1165,7 +1165,7 @@ undistribute_ops_list (enum tree_code op
 	      fprintf (dump_file, "Building (");
 	      print_generic_expr (dump_file, oe1->op, 0);
 	    }
-	  tmpvar = create_tmp_var (TREE_TYPE (oe1->op), NULL);
+	  tmpvar = create_tmp_reg (TREE_TYPE (oe1->op), NULL);
 	  add_referenced_var (tmpvar);
 	  zero_one_operation (&oe1->op, c->oecode, c->op);
 	  EXECUTE_IF_SET_IN_SBITMAP (candidates2, first+1, i, sbi0)
@@ -1840,7 +1840,7 @@ can_reassociate_p (tree op)
   tree type = TREE_TYPE (op);
   if (INTEGRAL_TYPE_P (type)
       || NON_SAT_FIXED_POINT_TYPE_P (type)
-      || (flag_associative_math && SCALAR_FLOAT_TYPE_P (type)))
+      || (flag_associative_math && FLOAT_TYPE_P (type)))
     return true;
   return false;
 }
Index: tree-vect-loop.c
===================================================================
--- tree-vect-loop.c	(revision 159105)
+++ tree-vect-loop.c	(working copy)
@@ -1744,6 +1744,21 @@ vect_is_simple_reduction (loop_vec_info
     }
 
   code = gimple_assign_rhs_code (def_stmt);
+  if (code == MINUS_EXPR)
+    {
+      tree rhs = gimple_assign_rhs2 (def_stmt);
+      tree negrhs = make_ssa_name (SSA_NAME_VAR (rhs), NULL);
+      gimple negate_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, negrhs,
+							 rhs, NULL);
+      gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
+      set_vinfo_for_stmt (negate_stmt, new_stmt_vec_info (negate_stmt, 
+							  loop_info, NULL));
+      gsi_insert_before (&gsi, negate_stmt, GSI_NEW_STMT);
+      gimple_assign_set_rhs2 (def_stmt, negrhs);
+      gimple_assign_set_rhs_code (def_stmt, PLUS_EXPR);
+      update_stmt (def_stmt);
+      code = PLUS_EXPR;
+    }
 
   if (check_reduction
       && (!commutative_tree_code (code) || !associative_tree_code (code)))
Index: testsuite/gcc.dg/vect/fast-math-vect-reduc-8.c
===================================================================
--- testsuite/gcc.dg/vect/fast-math-vect-reduc-8.c	(revision 0)
+++ testsuite/gcc.dg/vect/fast-math-vect-reduc-8.c	(revision 0)
@@ -0,0 +1,23 @@
+/* { dg-require-effective-target vect_float } */
+/* { dg-do compile } */
+
+#include "tree-vect.h"
+
+extern float x[128] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+extern float y[128] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+extern float z[128] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+
+float f (unsigned n)
+{
+  float ret = 0.0;
+  unsigned i;
+  for (i = 0; i < n; i++)
+    {
+      float diff = x[i] - y[i];
+      ret -= diff * diff * z[i];
+    }
+  return ret;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: RFA: vectorize reductions on minus_expr
  2010-05-06 14:52 RFA: vectorize reductions on minus_expr Michael Matz
@ 2010-05-06 15:26 ` Steven Bosscher
  2010-05-07 15:26   ` RFA: vectorize reductions on minus_expr [v2] Michael Matz
  0 siblings, 1 reply; 4+ messages in thread
From: Steven Bosscher @ 2010-05-06 15:26 UTC (permalink / raw)
  To: Michael Matz; +Cc: gcc-patches

On Thu, May 6, 2010 at 4:52 PM, Michael Matz <matz@suse.de> wrote:
> Index: tree-vect-loop.c
> ===================================================================
> --- tree-vect-loop.c    (revision 159105)
> +++ tree-vect-loop.c    (working copy)
> @@ -1744,6 +1744,21 @@ vect_is_simple_reduction (loop_vec_info
>     }
>
>   code = gimple_assign_rhs_code (def_stmt);
> +  if (code == MINUS_EXPR)
> +    {
> +      tree rhs = gimple_assign_rhs2 (def_stmt);
> +      tree negrhs = make_ssa_name (SSA_NAME_VAR (rhs), NULL);
> +      gimple negate_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, negrhs,
> +                                                        rhs, NULL);
> +      gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
> +      set_vinfo_for_stmt (negate_stmt, new_stmt_vec_info (negate_stmt,
> +                                                         loop_info, NULL));
> +      gsi_insert_before (&gsi, negate_stmt, GSI_NEW_STMT);
> +      gimple_assign_set_rhs2 (def_stmt, negrhs);
> +      gimple_assign_set_rhs_code (def_stmt, PLUS_EXPR);
> +      update_stmt (def_stmt);
> +      code = PLUS_EXPR;
> +    }
>
>   if (check_reduction
>       && (!commutative_tree_code (code) || !associative_tree_code (code)))

Deserves a big fat comment on why we do this, IMVHO.

Ciao!
Steven

^ permalink raw reply	[flat|nested] 4+ messages in thread

* RFA: vectorize reductions on minus_expr [v2]
  2010-05-06 15:26 ` Steven Bosscher
@ 2010-05-07 15:26   ` Michael Matz
  2010-05-07 15:50     ` Richard Guenther
  0 siblings, 1 reply; 4+ messages in thread
From: Michael Matz @ 2010-05-07 15:26 UTC (permalink / raw)
  To: gcc-patches; +Cc: Steven Bosscher

[-- Attachment #1: Type: TEXT/PLAIN, Size: 9164 bytes --]

Hello,

On Thu, 6 May 2010, Steven Bosscher wrote:

> > +  if (code == MINUS_EXPR)
> > +    {
...

> Deserves a big fat comment on why we do this, IMVHO.

Agreed.  I've also made the in-place modification conditional on an 
argument, so that the uses in gcc_assert certainly won't change code,
renamed the global function to vect_force_simple_reduction and added some 
comments.

Regstrapping on x86_64-linux in progress.  Okay for trunk if it passes?


Ciao,
Michael.
-- 
	* tree-ssa-reassoc.c (undistribute_ops_list): Use create_tmp_reg.
	(can_reassociate_p): Use FLOAT_TYPE_P.
	* tree-vectorizer.h (vect_is_simple_reduction): Rename to ...
	(vect_force_simple_reduction): ... this.
	* tree-parloops.c (gather_scalar_reductions): Use 
	vect_force_simple_reduction.
	* tree-vect-loop.c (vect_is_simple_reduction_1): Rename from
	vect_is_simple_reduction, add modify argument, if true rewrite
	"a-b" into "a+(-b)".
	(vect_is_simple_reduction, vect_force_simple_reduction): New
	functions.
	(vect_analyze_scalar_cycles_1): Use vect_force_simple_reduction.

testsuite/
	* gcc.dg/vect/fast-math-vect-reduc-8.c: New test.

Index: tree-ssa-reassoc.c
===================================================================
--- tree-ssa-reassoc.c	(revision 159105)
+++ tree-ssa-reassoc.c	(working copy)
@@ -1165,7 +1165,7 @@ undistribute_ops_list (enum tree_code op
 	      fprintf (dump_file, "Building (");
 	      print_generic_expr (dump_file, oe1->op, 0);
 	    }
-	  tmpvar = create_tmp_var (TREE_TYPE (oe1->op), NULL);
+	  tmpvar = create_tmp_reg (TREE_TYPE (oe1->op), NULL);
 	  add_referenced_var (tmpvar);
 	  zero_one_operation (&oe1->op, c->oecode, c->op);
 	  EXECUTE_IF_SET_IN_SBITMAP (candidates2, first+1, i, sbi0)
@@ -1840,7 +1840,7 @@ can_reassociate_p (tree op)
   tree type = TREE_TYPE (op);
   if (INTEGRAL_TYPE_P (type)
       || NON_SAT_FIXED_POINT_TYPE_P (type)
-      || (flag_associative_math && SCALAR_FLOAT_TYPE_P (type)))
+      || (flag_associative_math && FLOAT_TYPE_P (type)))
     return true;
   return false;
 }
Index: tree-vectorizer.h
===================================================================
--- tree-vectorizer.h	(revision 159105)
+++ tree-vectorizer.h	(working copy)
@@ -846,7 +846,7 @@ extern tree vect_create_addr_base_for_ve
 /* In tree-vect-loop.c.  */
 /* FORNOW: Used in tree-parloops.c.  */
 extern void destroy_loop_vec_info (loop_vec_info, bool);
-extern gimple vect_is_simple_reduction (loop_vec_info, gimple, bool, bool *);
+extern gimple vect_force_simple_reduction (loop_vec_info, gimple, bool, bool *);
 /* Drive for loop analysis stage.  */
 extern loop_vec_info vect_analyze_loop (struct loop *);
 /* Drive for loop transformation stage.  */
Index: tree-parloops.c
===================================================================
--- tree-parloops.c	(revision 159105)
+++ tree-parloops.c	(working copy)
@@ -64,7 +64,7 @@ along with GCC; see the file COPYING3.
 
 /*
   Reduction handling:
-  currently we use vect_is_simple_reduction() to detect reduction patterns.
+  currently we use vect_force_simple_reduction() to detect reduction patterns.
   The code transformation will be introduced by an example.
 
 
@@ -1745,7 +1745,9 @@ gather_scalar_reductions (loop_p loop, h
       if (!simple_iv (loop, loop, res, &iv, true)
 	&& simple_loop_info)
 	{
-           gimple reduc_stmt = vect_is_simple_reduction (simple_loop_info, phi, true, &double_reduc);
+           gimple reduc_stmt = vect_force_simple_reduction (simple_loop_info,
+							    phi, true,
+							    &double_reduc);
 	   if (reduc_stmt && !double_reduc)
               build_new_reduction (reduction_list, reduc_stmt, phi);
         }
Index: tree-vect-loop.c
===================================================================
--- tree-vect-loop.c	(revision 159105)
+++ tree-vect-loop.c	(working copy)
@@ -513,8 +513,8 @@ vect_analyze_scalar_cycles_1 (loop_vec_i
       gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
 
       nested_cycle = (loop != LOOP_VINFO_LOOP (loop_vinfo));
-      reduc_stmt = vect_is_simple_reduction (loop_vinfo, phi, !nested_cycle,
-                                             &double_reduc);
+      reduc_stmt = vect_force_simple_reduction (loop_vinfo, phi, !nested_cycle,
+						&double_reduc);
       if (reduc_stmt)
         {
           if (double_reduc)
@@ -1584,7 +1584,7 @@ report_vect_op (gimple stmt, const char
 }
 
 
-/* Function vect_is_simple_reduction
+/* Function vect_is_simple_reduction_1
 
    (1) Detect a cross-iteration def-use cycle that represents a simple
    reduction computation. We look for the following pattern:
@@ -1612,18 +1612,23 @@ report_vect_op (gimple stmt, const char
      a1 = phi < a0, a2 >
      inner loop (def of a3)
      a2 = phi < a3 >
+
+   If MODIFY is true it tries also to rework the code in-place to enable
+   detection of more reduction patterns.  For the time being we rewrite
+   "res -= RHS" into "rhs += -RHS" when it seems worthwhile.
 */
 
-gimple
-vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
-                          bool check_reduction, bool *double_reduc)
+static gimple
+vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
+			    bool check_reduction, bool *double_reduc,
+			    bool modify)
 {
   struct loop *loop = (gimple_bb (phi))->loop_father;
   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
   edge latch_e = loop_latch_edge (loop);
   tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
   gimple def_stmt, def1 = NULL, def2 = NULL;
-  enum tree_code code;
+  enum tree_code orig_code, code;
   tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
   tree type;
   int nloop_uses;
@@ -1743,7 +1748,14 @@ vect_is_simple_reduction (loop_vec_info
       return NULL;
     }
 
-  code = gimple_assign_rhs_code (def_stmt);
+  code = orig_code = gimple_assign_rhs_code (def_stmt);
+
+  /* We can handle "res -= x[i]", which is non-associative by
+     simply rewriting this into "res += -x[i]".  Avoid changing
+     gimple instruction for the first simple tests and only do this
+     if we're allowed to change code at all.  */
+  if (code == MINUS_EXPR && modify)
+    code = PLUS_EXPR;
 
   if (check_reduction
       && (!commutative_tree_code (code) || !associative_tree_code (code)))
@@ -1863,6 +1875,24 @@ vect_is_simple_reduction (loop_vec_info
       return NULL;
     }
 
+  /* If we detected "res -= x[i]" earlier, rewrite it into
+     "res += -x[i]" now.  If this turns out to be useless reassoc
+     will clean it up again.  */
+  if (orig_code == MINUS_EXPR)
+    {
+      tree rhs = gimple_assign_rhs2 (def_stmt);
+      tree negrhs = make_ssa_name (SSA_NAME_VAR (rhs), NULL);
+      gimple negate_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, negrhs,
+							 rhs, NULL);
+      gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
+      set_vinfo_for_stmt (negate_stmt, new_stmt_vec_info (negate_stmt, 
+							  loop_info, NULL));
+      gsi_insert_before (&gsi, negate_stmt, GSI_NEW_STMT);
+      gimple_assign_set_rhs2 (def_stmt, negrhs);
+      gimple_assign_set_rhs_code (def_stmt, PLUS_EXPR);
+      update_stmt (def_stmt);
+    }
+
   /* Reduction is safe. We're dealing with one of the following:
      1) integer arithmetic and no trapv
      2) floating point arithmetic, and special flags permit this optimization
@@ -1940,6 +1970,28 @@ vect_is_simple_reduction (loop_vec_info
     }
 }
 
+/* Wrapper around vect_is_simple_reduction_1, that won't modify code
+   in-place.  Arguments as there.  */
+
+static gimple
+vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
+                          bool check_reduction, bool *double_reduc)
+{
+  return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
+				     double_reduc, false);
+}
+
+/* Wrapper around vect_is_simple_reduction_1, which will modify code
+   in-place if it enables detection of more reductions.  Arguments
+   as there.  */
+
+gimple
+vect_force_simple_reduction (loop_vec_info loop_info, gimple phi,
+                          bool check_reduction, bool *double_reduc)
+{
+  return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
+				     double_reduc, true);
+}
 
 /* Function vect_estimate_min_profitable_iters
 
Index: testsuite/gcc.dg/vect/fast-math-vect-reduc-8.c
===================================================================
--- testsuite/gcc.dg/vect/fast-math-vect-reduc-8.c	(revision 0)
+++ testsuite/gcc.dg/vect/fast-math-vect-reduc-8.c	(revision 0)
@@ -0,0 +1,23 @@
+/* { dg-require-effective-target vect_float } */
+/* { dg-do compile } */
+
+#include "tree-vect.h"
+
+extern float x[128] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+extern float y[128] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+extern float z[128] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+
+float f (unsigned n)
+{
+  float ret = 0.0;
+  unsigned i;
+  for (i = 0; i < n; i++)
+    {
+      float diff = x[i] - y[i];
+      ret -= diff * diff * z[i];
+    }
+  return ret;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: RFA: vectorize reductions on minus_expr [v2]
  2010-05-07 15:26   ` RFA: vectorize reductions on minus_expr [v2] Michael Matz
@ 2010-05-07 15:50     ` Richard Guenther
  0 siblings, 0 replies; 4+ messages in thread
From: Richard Guenther @ 2010-05-07 15:50 UTC (permalink / raw)
  To: Michael Matz; +Cc: gcc-patches, Steven Bosscher

On Fri, May 7, 2010 at 5:26 PM, Michael Matz <matz@suse.de> wrote:
> Hello,
>
> On Thu, 6 May 2010, Steven Bosscher wrote:
>
>> > +  if (code == MINUS_EXPR)
>> > +    {
> ...
>
>> Deserves a big fat comment on why we do this, IMVHO.
>
> Agreed.  I've also made the in-place modification conditional on an
> argument, so that the uses in gcc_assert certainly won't change code,
> renamed the global function to vect_force_simple_reduction and added some
> comments.
>
> Regstrapping on x86_64-linux in progress.  Okay for trunk if it passes?

Ok.

Thanks,
Richard.

>
> Ciao,
> Michael.
> --
>        * tree-ssa-reassoc.c (undistribute_ops_list): Use create_tmp_reg.
>        (can_reassociate_p): Use FLOAT_TYPE_P.
>        * tree-vectorizer.h (vect_is_simple_reduction): Rename to ...
>        (vect_force_simple_reduction): ... this.
>        * tree-parloops.c (gather_scalar_reductions): Use
>        vect_force_simple_reduction.
>        * tree-vect-loop.c (vect_is_simple_reduction_1): Rename from
>        vect_is_simple_reduction, add modify argument, if true rewrite
>        "a-b" into "a+(-b)".
>        (vect_is_simple_reduction, vect_force_simple_reduction): New
>        functions.
>        (vect_analyze_scalar_cycles_1): Use vect_force_simple_reduction.
>
> testsuite/
>        * gcc.dg/vect/fast-math-vect-reduc-8.c: New test.
>
> Index: tree-ssa-reassoc.c
> ===================================================================
> --- tree-ssa-reassoc.c  (revision 159105)
> +++ tree-ssa-reassoc.c  (working copy)
> @@ -1165,7 +1165,7 @@ undistribute_ops_list (enum tree_code op
>              fprintf (dump_file, "Building (");
>              print_generic_expr (dump_file, oe1->op, 0);
>            }
> -         tmpvar = create_tmp_var (TREE_TYPE (oe1->op), NULL);
> +         tmpvar = create_tmp_reg (TREE_TYPE (oe1->op), NULL);
>          add_referenced_var (tmpvar);
>          zero_one_operation (&oe1->op, c->oecode, c->op);
>          EXECUTE_IF_SET_IN_SBITMAP (candidates2, first+1, i, sbi0)
> @@ -1840,7 +1840,7 @@ can_reassociate_p (tree op)
>   tree type = TREE_TYPE (op);
>   if (INTEGRAL_TYPE_P (type)
>       || NON_SAT_FIXED_POINT_TYPE_P (type)
> -      || (flag_associative_math && SCALAR_FLOAT_TYPE_P (type)))
> +      || (flag_associative_math && FLOAT_TYPE_P (type)))
>     return true;
>   return false;
>  }
> Index: tree-vectorizer.h
> ===================================================================
> --- tree-vectorizer.h   (revision 159105)
> +++ tree-vectorizer.h   (working copy)
> @@ -846,7 +846,7 @@ extern tree vect_create_addr_base_for_ve
>  /* In tree-vect-loop.c.  */
>  /* FORNOW: Used in tree-parloops.c.  */
>  extern void destroy_loop_vec_info (loop_vec_info, bool);
> -extern gimple vect_is_simple_reduction (loop_vec_info, gimple, bool, bool *);
> +extern gimple vect_force_simple_reduction (loop_vec_info, gimple, bool, bool *);
>  /* Drive for loop analysis stage.  */
>  extern loop_vec_info vect_analyze_loop (struct loop *);
>  /* Drive for loop transformation stage.  */
> Index: tree-parloops.c
> ===================================================================
> --- tree-parloops.c     (revision 159105)
> +++ tree-parloops.c     (working copy)
> @@ -64,7 +64,7 @@ along with GCC; see the file COPYING3.
>
>  /*
>   Reduction handling:
> -  currently we use vect_is_simple_reduction() to detect reduction patterns.
> +  currently we use vect_force_simple_reduction() to detect reduction patterns.
>   The code transformation will be introduced by an example.
>
>
> @@ -1745,7 +1745,9 @@ gather_scalar_reductions (loop_p loop, h
>       if (!simple_iv (loop, loop, res, &iv, true)
>        && simple_loop_info)
>        {
> -           gimple reduc_stmt = vect_is_simple_reduction (simple_loop_info, phi, true, &double_reduc);
> +           gimple reduc_stmt = vect_force_simple_reduction (simple_loop_info,
> +                                                           phi, true,
> +                                                           &double_reduc);
>           if (reduc_stmt && !double_reduc)
>               build_new_reduction (reduction_list, reduc_stmt, phi);
>         }
> Index: tree-vect-loop.c
> ===================================================================
> --- tree-vect-loop.c    (revision 159105)
> +++ tree-vect-loop.c    (working copy)
> @@ -513,8 +513,8 @@ vect_analyze_scalar_cycles_1 (loop_vec_i
>       gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
>
>       nested_cycle = (loop != LOOP_VINFO_LOOP (loop_vinfo));
> -      reduc_stmt = vect_is_simple_reduction (loop_vinfo, phi, !nested_cycle,
> -                                             &double_reduc);
> +      reduc_stmt = vect_force_simple_reduction (loop_vinfo, phi, !nested_cycle,
> +                                               &double_reduc);
>       if (reduc_stmt)
>         {
>           if (double_reduc)
> @@ -1584,7 +1584,7 @@ report_vect_op (gimple stmt, const char
>  }
>
>
> -/* Function vect_is_simple_reduction
> +/* Function vect_is_simple_reduction_1
>
>    (1) Detect a cross-iteration def-use cycle that represents a simple
>    reduction computation. We look for the following pattern:
> @@ -1612,18 +1612,23 @@ report_vect_op (gimple stmt, const char
>      a1 = phi < a0, a2 >
>      inner loop (def of a3)
>      a2 = phi < a3 >
> +
> +   If MODIFY is true it tries also to rework the code in-place to enable
> +   detection of more reduction patterns.  For the time being we rewrite
> +   "res -= RHS" into "rhs += -RHS" when it seems worthwhile.
>  */
>
> -gimple
> -vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
> -                          bool check_reduction, bool *double_reduc)
> +static gimple
> +vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
> +                           bool check_reduction, bool *double_reduc,
> +                           bool modify)
>  {
>   struct loop *loop = (gimple_bb (phi))->loop_father;
>   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
>   edge latch_e = loop_latch_edge (loop);
>   tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
>   gimple def_stmt, def1 = NULL, def2 = NULL;
> -  enum tree_code code;
> +  enum tree_code orig_code, code;
>   tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
>   tree type;
>   int nloop_uses;
> @@ -1743,7 +1748,14 @@ vect_is_simple_reduction (loop_vec_info
>       return NULL;
>     }
>
> -  code = gimple_assign_rhs_code (def_stmt);
> +  code = orig_code = gimple_assign_rhs_code (def_stmt);
> +
> +  /* We can handle "res -= x[i]", which is non-associative by
> +     simply rewriting this into "res += -x[i]".  Avoid changing
> +     gimple instruction for the first simple tests and only do this
> +     if we're allowed to change code at all.  */
> +  if (code == MINUS_EXPR && modify)
> +    code = PLUS_EXPR;
>
>   if (check_reduction
>       && (!commutative_tree_code (code) || !associative_tree_code (code)))
> @@ -1863,6 +1875,24 @@ vect_is_simple_reduction (loop_vec_info
>       return NULL;
>     }
>
> +  /* If we detected "res -= x[i]" earlier, rewrite it into
> +     "res += -x[i]" now.  If this turns out to be useless reassoc
> +     will clean it up again.  */
> +  if (orig_code == MINUS_EXPR)
> +    {
> +      tree rhs = gimple_assign_rhs2 (def_stmt);
> +      tree negrhs = make_ssa_name (SSA_NAME_VAR (rhs), NULL);
> +      gimple negate_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, negrhs,
> +                                                        rhs, NULL);
> +      gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
> +      set_vinfo_for_stmt (negate_stmt, new_stmt_vec_info (negate_stmt,
> +                                                         loop_info, NULL));
> +      gsi_insert_before (&gsi, negate_stmt, GSI_NEW_STMT);
> +      gimple_assign_set_rhs2 (def_stmt, negrhs);
> +      gimple_assign_set_rhs_code (def_stmt, PLUS_EXPR);
> +      update_stmt (def_stmt);
> +    }
> +
>   /* Reduction is safe. We're dealing with one of the following:
>      1) integer arithmetic and no trapv
>      2) floating point arithmetic, and special flags permit this optimization
> @@ -1940,6 +1970,28 @@ vect_is_simple_reduction (loop_vec_info
>     }
>  }
>
> +/* Wrapper around vect_is_simple_reduction_1, that won't modify code
> +   in-place.  Arguments as there.  */
> +
> +static gimple
> +vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
> +                          bool check_reduction, bool *double_reduc)
> +{
> +  return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
> +                                    double_reduc, false);
> +}
> +
> +/* Wrapper around vect_is_simple_reduction_1, which will modify code
> +   in-place if it enables detection of more reductions.  Arguments
> +   as there.  */
> +
> +gimple
> +vect_force_simple_reduction (loop_vec_info loop_info, gimple phi,
> +                          bool check_reduction, bool *double_reduc)
> +{
> +  return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
> +                                    double_reduc, true);
> +}
>
>  /* Function vect_estimate_min_profitable_iters
>
> Index: testsuite/gcc.dg/vect/fast-math-vect-reduc-8.c
> ===================================================================
> --- testsuite/gcc.dg/vect/fast-math-vect-reduc-8.c      (revision 0)
> +++ testsuite/gcc.dg/vect/fast-math-vect-reduc-8.c      (revision 0)
> @@ -0,0 +1,23 @@
> +/* { dg-require-effective-target vect_float } */
> +/* { dg-do compile } */
> +
> +#include "tree-vect.h"
> +
> +extern float x[128] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
> +extern float y[128] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
> +extern float z[128] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
> +
> +float f (unsigned n)
> +{
> +  float ret = 0.0;
> +  unsigned i;
> +  for (i = 0; i < n; i++)
> +    {
> +      float diff = x[i] - y[i];
> +      ret -= diff * diff * z[i];
> +    }
> +  return ret;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
> +/* { dg-final { cleanup-tree-dump "vect" } } */

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2010-05-07 15:50 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-05-06 14:52 RFA: vectorize reductions on minus_expr Michael Matz
2010-05-06 15:26 ` Steven Bosscher
2010-05-07 15:26   ` RFA: vectorize reductions on minus_expr [v2] Michael Matz
2010-05-07 15:50     ` Richard Guenther

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).