public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [patch, vectorizer] Fix PR tree-optimization/48765
@ 2011-04-28 13:39 Ira Rosen
  2011-04-28 20:24 ` Ira Rosen
  0 siblings, 1 reply; 2+ messages in thread
From: Ira Rosen @ 2011-04-28 13:39 UTC (permalink / raw)
  To: gcc-patches; +Cc: Patch Tracking


Hi,

Sometimes loop vectorization factor changes during the analysis, while
statement analysis depends on it. This patch moves the update of the
vectorization before statements, avoiding current difference between the
analysis and the transformations phases that caused the problem described
in the PR.

Bootstrapped and now testing on powerpc64-suse-linux.
I'll commit the patch once the testing completes.

Ira

ChangeLog:

	PR tree-optimization/48765
	* tree-vect-loop.c (vect_analyze_loop_operations): Scan the
statements
	and update the vectorization factor according to the type of
	vectorization before statement analysis.
	(vectorizable_reduction): Set number of copies to 1 in case of pure
SLP
	statement.
	* tree-vect-stmts.c (vectorizable_conversion,
vectorizable_assignment,
	vectorizable_shift, vectorizable_operation,
vectorizable_type_demotion,
	vectorizable_type_promotion, vectorizable_store, vectorizable_load):
	Likewise.
	(vectorizable_condition): Move the check that it is not SLP
	vectorization before the number of copies check.

testsuite/ChangeLog:

	PR tree-optimization/48765
	* gcc.dg/vect/pr48765.c: New.


Index: tree-vect-loop.c
===================================================================
--- tree-vect-loop.c    (revision 173018)
+++ tree-vect-loop.c    (working copy)
@@ -1167,7 +1167,38 @@ vect_analyze_loop_operations (loop_vec_info loop_v

   gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
   vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+  /* If all the stmts in the loop can be SLPed, we perform only SLP, and
+     vectorization factor of the loop is the unrolling factor required by
the
+     SLP instances.  If that unrolling factor is 1, we say, that we
perform
+     pure SLP on loop - cross iteration parallelism is not exploited.  */
+  for (i = 0; i < nbbs; i++)
+    {
+      basic_block bb = bbs[i];
+      for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
+        {
+          gimple stmt = gsi_stmt (si);
+          stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+          gcc_assert (stmt_info);
+          if ((STMT_VINFO_RELEVANT_P (stmt_info)
+               || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE
(stmt_info)))
+              && !PURE_SLP_STMT (stmt_info))
+            /* STMT needs both SLP and loop-based vectorization.  */
+            only_slp_in_loop = false;
+        }
+    }

+  if (only_slp_in_loop)
+    vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
+  else
+    vectorization_factor = least_common_multiple (vectorization_factor,
+                                LOOP_VINFO_SLP_UNROLLING_FACTOR
(loop_vinfo));
+
+  LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
+
+  if (vect_print_dump_info (REPORT_DETAILS))
+    fprintf (vect_dump, "Updating vectorization factor to %d ",
+             vectorization_factor);
+
   for (i = 0; i < nbbs; i++)
     {
       basic_block bb = bbs[i];
@@ -1272,18 +1303,8 @@ vect_analyze_loop_operations (loop_vec_info loop_v
       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
         {
           gimple stmt = gsi_stmt (si);
-          stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
-
-          gcc_assert (stmt_info);
-
          if (!vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
            return false;
-
-          if ((STMT_VINFO_RELEVANT_P (stmt_info)
-               || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE
(stmt_info)))
-              && !PURE_SLP_STMT (stmt_info))
-            /* STMT needs both SLP and loop-based vectorization.  */
-            only_slp_in_loop = false;
         }
     } /* bbs */

@@ -1303,18 +1324,6 @@ vect_analyze_loop_operations (loop_vec_info loop_v
       return false;
     }

-  /* If all the stmts in the loop can be SLPed, we perform only SLP, and
-     vectorization factor of the loop is the unrolling factor required by
the
-     SLP instances.  If that unrolling factor is 1, we say, that we
perform
-     pure SLP on loop - cross iteration parallelism is not exploited.  */
-  if (only_slp_in_loop)
-    vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
-  else
-    vectorization_factor = least_common_multiple (vectorization_factor,
-                                LOOP_VINFO_SLP_UNROLLING_FACTOR
(loop_vinfo));
-
-  LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
-
   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
       && vect_print_dump_info (REPORT_DETAILS))
     fprintf (vect_dump,
@@ -4136,7 +4145,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_i
   if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
     return false;

-  if (slp_node)
+  if (slp_node || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
   else
     ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
Index: tree-vect-stmts.c
===================================================================
--- tree-vect-stmts.c   (revision 173018)
+++ tree-vect-stmts.c   (working copy)
@@ -1747,7 +1747,7 @@ vectorizable_conversion (gimple stmt, gimple_stmt_
   /* Multiple types in SLP are handled by creating the appropriate number
of
      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
      case of SLP.  */
-  if (slp_node)
+  if (slp_node || PURE_SLP_STMT (stmt_info))
     ncopies = 1;

   /* Sanity check: make sure that at least one copy of the vectorized stmt
@@ -1940,7 +1940,7 @@ vectorizable_assignment (gimple stmt, gimple_stmt_
   /* Multiple types in SLP are handled by creating the appropriate number
of
      vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
      case of SLP.  */
-  if (slp_node)
+  if (slp_node || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
   else
     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
@@ -2149,7 +2149,7 @@ vectorizable_shift (gimple stmt, gimple_stmt_itera
   /* Multiple types in SLP are handled by creating the appropriate number
of
      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
      case of SLP.  */
-  if (slp_node)
+  if (slp_node || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
   else
     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
@@ -2497,7 +2497,7 @@ vectorizable_operation (gimple stmt, gimple_stmt_i
   /* Multiple types in SLP are handled by creating the appropriate number
of
      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
      case of SLP.  */
-  if (slp_node)
+  if (slp_node || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
   else
     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
@@ -2895,7 +2895,7 @@ vectorizable_type_demotion (gimple stmt, gimple_st
   /* Multiple types in SLP are handled by creating the appropriate number
of
      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
      case of SLP.  */
-  if (slp_node)
+  if (slp_node || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
   else
     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
@@ -3175,7 +3175,7 @@ vectorizable_type_promotion (gimple stmt, gimple_s
   /* Multiple types in SLP are handled by creating the appropriate number
of
      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
      case of SLP.  */
-  if (slp_node)
+  if (slp_node || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
   else
     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
@@ -3358,7 +3358,7 @@ vectorizable_store (gimple stmt, gimple_stmt_itera
   /* Multiple types in SLP are handled by creating the appropriate number
of
      vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
      case of SLP.  */
-  if (slp)
+  if (slp || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
   else
     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
@@ -3851,7 +3851,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterat
   /* Multiple types in SLP are handled by creating the appropriate number
of
      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
      case of SLP.  */
-  if (slp)
+  if (slp || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
   else
     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
@@ -4457,6 +4457,10 @@ vectorizable_condition (gimple stmt, gimple_stmt_i
   /* FORNOW: unsupported in basic block SLP.  */
   gcc_assert (loop_vinfo);
+  /* FORNOW: SLP not supported.  */
+  if (STMT_SLP_TYPE (stmt_info))
+    return false;
+
   gcc_assert (ncopies >= 1);
   if (reduc_index && ncopies > 1)
     return false; /* FORNOW */
@@ -4469,10 +4473,6 @@ vectorizable_condition (gimple stmt, gimple_stmt_i
            && reduc_def))
     return false;

-  /* FORNOW: SLP not supported.  */
-  if (STMT_SLP_TYPE (stmt_info))
-    return false;
-
   /* FORNOW: not yet supported.  */
   if (STMT_VINFO_LIVE_P (stmt_info))
     {
Index: testsuite/gcc.dg/vect/pr48765.c
===================================================================
--- testsuite/gcc.dg/vect/pr48765.c     (revision 0)
+++ testsuite/gcc.dg/vect/pr48765.c     (revision 0)
@@ -0,0 +1,82 @@
+/* { dg-do compile { target powerpc*-*-* } } */
+/* { dg-options "-m64 -O3 -mcpu=power6" } */
+
+enum reg_class
+{
+  NO_REGS, AP_REG, XRF_REGS, GENERAL_REGS, AGRF_REGS, XGRF_REGS, ALL_REGS,
+    LIM_REG_CLASSES
+};
+enum machine_mode
+{
+  VOIDmode, QImode, HImode, PSImode, SImode, PDImode, DImode, TImode,
OImode,
+    QFmode, HFmode, TQFmode, SFmode, DFmode, XFmode, TFmode, SCmode,
DCmode,
+    XCmode, TCmode, CQImode, CHImode, CSImode, CDImode, CTImode, COImode,
+    BLKmode, CCmode, CCEVENmode, MAX_MACHINE_MODE
+};
+typedef struct rtx_def
+{
+  int mode:8;
+}
+ *rtx;
+extern rtx *regno_reg_rtx;
+typedef unsigned int HARD_REG_ELT_TYPE;
+typedef HARD_REG_ELT_TYPE HARD_REG_SET[((64 + 32 - 1) / 32)];
+extern int reg_alloc_order[64];
+extern int max_regno;
+extern int *reg_n_calls_crossed;
+extern short *reg_renumber;
+static int *reg_where_dead;
+static int *reg_where_born;
+static int *reg_order;
+static char *regs_change_size;
+static HARD_REG_SET *after_insn_hard_regs;
+static int stupid_find_reg (int, enum reg_class, enum machine_mode, int,
int,
+                           int);
+void
+stupid_life_analysis (f, nregs, file)
+     rtx f;
+{
+  register int i;
+  for (i = (((64)) + 3) + 1; i < max_regno; i++)
+    {
+      register int r = reg_order[i];
+      if ((int) LIM_REG_CLASSES > 1)
+       reg_renumber[r] =
+         stupid_find_reg (reg_n_calls_crossed[r], reg_preferred_class (r),
+                          ((regno_reg_rtx[r])->mode), reg_where_born[r],
+                          reg_where_dead[r], regs_change_size[r]);
+    }
+}
+
+static int
+stupid_find_reg (call_preserved, class, mode, born_insn, dead_insn,
+                changes_size)
+     int call_preserved;
+     enum reg_class class;
+     enum machine_mode mode;
+{
+  register int i, ins;
+  HARD_REG_SET used, this_reg;
+  for (ins = born_insn; ins < dead_insn; ins++)
+    do
+      {
+       register HARD_REG_ELT_TYPE *scan_tp_ = (used), *scan_fp_ =
+         (after_insn_hard_regs[ins]);
+       for (i = 0; i < ((64 + 32 - 1) / 32); i++)
+         *scan_tp_++ |= *scan_fp_++;
+      }
+    while (0);
+  for (i = 0; i < 64; i++)
+    {
+      int regno = reg_alloc_order[i];
+      if (((used)[(regno) / ((unsigned) 32)] &
+          (((HARD_REG_ELT_TYPE) (1)) << ((regno) % ((unsigned) 32)))))
+       {
+         register int j;
+         if (j == regno)
+           return regno;
+       }
+    }
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [patch, vectorizer] Fix PR tree-optimization/48765
  2011-04-28 13:39 [patch, vectorizer] Fix PR tree-optimization/48765 Ira Rosen
@ 2011-04-28 20:24 ` Ira Rosen
  0 siblings, 0 replies; 2+ messages in thread
From: Ira Rosen @ 2011-04-28 20:24 UTC (permalink / raw)
  To: Ira Rosen; +Cc: gcc-patches, Patch Tracking

[-- Attachment #1: Type: text/plain, Size: 1950 bytes --]



gcc-patches-owner@gcc.gnu.org wrote on 28/04/2011 03:42:01 PM:

>
>
> Hi,
>
> Sometimes loop vectorization factor changes during the analysis, while
> statement analysis depends on it. This patch moves the update of the
> vectorization before statements, avoiding current difference between the
> analysis and the transformations phases that caused the problem described
> in the PR.
>
> Bootstrapped and now testing on powerpc64-suse-linux.
> I'll commit the patch once the testing completes.
>

I ended up committing a slightly different version of the patch that scans
the loop statements only if we decided to use SLP.

Bootstrapped and tested on powerpc64-suse-linux.
Committed revision 173132.

Ira

ChangeLog:

        PR tree-optimization/48765
        * tree-vectorizer.h (vect_make_slp_decision): Return bool.
        * tree-vect-loop.c (vect_analyze_loop_operations): Add new argument
        to indicate if loop aware SLP is being used.  Scan the statements
        and update the vectorization factor according to the type of
        vectorization before statement analysis.
        (vect_analyze_loop_2): Get a return value from
vect_make_slp_decision,
        pass it to vect_analyze_loop_operations.
        (vectorizable_reduction): Set number of copies to 1 in case of pure
        SLP statement.
        * tree-vect-stmts.c (vectorizable_conversion,
        vectorizable_assignment, vectorizable_shift,
        vectorizable_operation, vectorizable_type_demotion,
        vectorizable_type_promotion, vectorizable_store,
vectorizable_load):
        Likewise.
        (vectorizable_condition): Move the check that it is not SLP
        vectorization before the number of copies check.
        * tree-vect-slp.c (vect_make_slp_decision): Return TRUE if decided
        to vectorize the loop using SLP.


testsuite/ChangeLog:

        PR tree-optimization/48765
        * gcc.dg/vect/pr48765.c: New.

(See attached file: pr48765.txt)

[-- Attachment #2: pr48765.txt --]
[-- Type: text/plain, Size: 14526 bytes --]

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 173127)
+++ ChangeLog	(working copy)
@@ -1,3 +1,25 @@
+2011-04-28  Ira Rosen  <ira.rosen@linaro.org>
+
+	PR tree-optimization/48765
+	* tree-vectorizer.h (vect_make_slp_decision): Return bool.
+	* tree-vect-loop.c (vect_analyze_loop_operations): Add new argument
+	to indicate if loop aware SLP is being used.  Scan the statements
+	and update the vectorization factor according to the type of
+	vectorization before statement analysis.
+	(vect_analyze_loop_2): Get a return value from vect_make_slp_decision,
+	pass it to vect_analyze_loop_operations.
+	(vectorizable_reduction): Set number of copies to 1 in case of pure
+	SLP statement.
+	* tree-vect-stmts.c (vectorizable_conversion,
+	vectorizable_assignment, vectorizable_shift,
+	vectorizable_operation, vectorizable_type_demotion,
+	vectorizable_type_promotion, vectorizable_store, vectorizable_load):
+	Likewise.
+	(vectorizable_condition): Move the check that it is not SLP
+	vectorization before the number of copies check.
+	* tree-vect-slp.c (vect_make_slp_decision): Return TRUE if decided
+	to vectorize the loop using SLP.
+
 2011-04-28  Jakub Jelinek  <jakub@redhat.com>
 
 	PR middle-end/48597
Index: testsuite/gcc.dg/vect/pr48765.c
===================================================================
--- testsuite/gcc.dg/vect/pr48765.c	(revision 0)
+++ testsuite/gcc.dg/vect/pr48765.c	(revision 0)
@@ -0,0 +1,82 @@
+/* { dg-do compile { target powerpc*-*-* } } */
+/* { dg-options "-m64 -O3 -mcpu=power6" } */
+
+enum reg_class
+{
+  NO_REGS, AP_REG, XRF_REGS, GENERAL_REGS, AGRF_REGS, XGRF_REGS, ALL_REGS,
+    LIM_REG_CLASSES
+};
+enum machine_mode
+{
+  VOIDmode, QImode, HImode, PSImode, SImode, PDImode, DImode, TImode, OImode,
+    QFmode, HFmode, TQFmode, SFmode, DFmode, XFmode, TFmode, SCmode, DCmode,
+    XCmode, TCmode, CQImode, CHImode, CSImode, CDImode, CTImode, COImode,
+    BLKmode, CCmode, CCEVENmode, MAX_MACHINE_MODE
+};
+typedef struct rtx_def
+{
+  int mode:8;
+}
+ *rtx;
+extern rtx *regno_reg_rtx;
+typedef unsigned int HARD_REG_ELT_TYPE;
+typedef HARD_REG_ELT_TYPE HARD_REG_SET[((64 + 32 - 1) / 32)];
+extern int reg_alloc_order[64];
+extern int max_regno;
+extern int *reg_n_calls_crossed;
+extern short *reg_renumber;
+static int *reg_where_dead;
+static int *reg_where_born;
+static int *reg_order;
+static char *regs_change_size;
+static HARD_REG_SET *after_insn_hard_regs;
+static int stupid_find_reg (int, enum reg_class, enum machine_mode, int, int,
+			    int);
+void
+stupid_life_analysis (f, nregs, file)
+     rtx f;
+{
+  register int i;
+  for (i = (((64)) + 3) + 1; i < max_regno; i++)
+    {
+      register int r = reg_order[i];
+      if ((int) LIM_REG_CLASSES > 1)
+	reg_renumber[r] =
+	  stupid_find_reg (reg_n_calls_crossed[r], reg_preferred_class (r),
+			   ((regno_reg_rtx[r])->mode), reg_where_born[r],
+			   reg_where_dead[r], regs_change_size[r]);
+    }
+}
+
+static int
+stupid_find_reg (call_preserved, class, mode, born_insn, dead_insn,
+		 changes_size)
+     int call_preserved;
+     enum reg_class class;
+     enum machine_mode mode;
+{
+  register int i, ins;
+  HARD_REG_SET used, this_reg;
+  for (ins = born_insn; ins < dead_insn; ins++)
+    do
+      {
+	register HARD_REG_ELT_TYPE *scan_tp_ = (used), *scan_fp_ =
+	  (after_insn_hard_regs[ins]);
+	for (i = 0; i < ((64 + 32 - 1) / 32); i++)
+	  *scan_tp_++ |= *scan_fp_++;
+      }
+    while (0);
+  for (i = 0; i < 64; i++)
+    {
+      int regno = reg_alloc_order[i];
+      if (((used)[(regno) / ((unsigned) 32)] &
+	   (((HARD_REG_ELT_TYPE) (1)) << ((regno) % ((unsigned) 32)))))
+	{
+	  register int j;
+	  if (j == regno)
+	    return regno;
+	}
+    }
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/ChangeLog
===================================================================
--- testsuite/ChangeLog	(revision 173127)
+++ testsuite/ChangeLog	(working copy)
@@ -1,3 +1,8 @@
+2011-04-28  Ira Rosen  <ira.rosen@linaro.org>
+
+	PR tree-optimization/48765
+	* gcc.dg/vect/pr48765.c: New.
+
 2011-04-28  Rainer Orth  <ro@CeBiTec.Uni-Bielefeld.DE>
 
 	PR tree-optimization/48775
Index: tree-vectorizer.h
===================================================================
--- tree-vectorizer.h	(revision 173127)
+++ tree-vectorizer.h	(working copy)
@@ -870,7 +870,7 @@ extern bool vect_transform_slp_perm_load (gimple,
 extern bool vect_schedule_slp (loop_vec_info, bb_vec_info);
 extern void vect_update_slp_costs_according_to_vf (loop_vec_info);
 extern bool vect_analyze_slp (loop_vec_info, bb_vec_info);
-extern void vect_make_slp_decision (loop_vec_info);
+extern bool vect_make_slp_decision (loop_vec_info);
 extern void vect_detect_hybrid_slp (loop_vec_info);
 extern void vect_get_slp_defs (tree, tree, slp_tree, VEC (tree,heap) **,
                                VEC (tree,heap) **, int);
Index: tree-vect-loop.c
===================================================================
--- tree-vect-loop.c	(revision 173127)
+++ tree-vect-loop.c	(working copy)
@@ -1146,7 +1146,7 @@ vect_get_cost (enum vect_cost_for_stmt type_of_cos
    Scan the loop stmts and make sure they are all vectorizable.  */
 
 static bool
-vect_analyze_loop_operations (loop_vec_info loop_vinfo)
+vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
 {
   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
@@ -1167,7 +1167,41 @@ static bool
 
   gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
   vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+  if (slp)
+    {
+      /* If all the stmts in the loop can be SLPed, we perform only SLP, and
+	 vectorization factor of the loop is the unrolling factor required by
+	 the SLP instances.  If that unrolling factor is 1, we say, that we
+	 perform pure SLP on loop - cross iteration parallelism is not
+	 exploited.  */
+      for (i = 0; i < nbbs; i++)
+	{
+	  basic_block bb = bbs[i];
+	  for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
+	    {
+	      gimple stmt = gsi_stmt (si);
+	      stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+	      gcc_assert (stmt_info);
+	      if ((STMT_VINFO_RELEVANT_P (stmt_info)
+		   || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
+		  && !PURE_SLP_STMT (stmt_info))
+		/* STMT needs both SLP and loop-based vectorization.  */
+		only_slp_in_loop = false;
+	    }
+	}
 
+      if (only_slp_in_loop)
+	vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
+      else
+	vectorization_factor = least_common_multiple (vectorization_factor,
+				LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
+
+      LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
+      if (vect_print_dump_info (REPORT_DETAILS))
+	fprintf (vect_dump, "Updating vectorization factor to %d ",
+	 		    vectorization_factor);
+    }
+
   for (i = 0; i < nbbs; i++)
     {
       basic_block bb = bbs[i];
@@ -1272,18 +1306,8 @@ static bool
       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
         {
           gimple stmt = gsi_stmt (si);
-          stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
-
-          gcc_assert (stmt_info);
-
 	  if (!vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
 	    return false;
-
-          if ((STMT_VINFO_RELEVANT_P (stmt_info)
-               || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
-              && !PURE_SLP_STMT (stmt_info))
-            /* STMT needs both SLP and loop-based vectorization.  */
-            only_slp_in_loop = false;
         }
     } /* bbs */
 
@@ -1303,18 +1327,6 @@ static bool
       return false;
     }
 
-  /* If all the stmts in the loop can be SLPed, we perform only SLP, and
-     vectorization factor of the loop is the unrolling factor required by the
-     SLP instances.  If that unrolling factor is 1, we say, that we perform
-     pure SLP on loop - cross iteration parallelism is not exploited.  */
-  if (only_slp_in_loop)
-    vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
-  else
-    vectorization_factor = least_common_multiple (vectorization_factor,
-                                LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
-
-  LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
-
   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
       && vect_print_dump_info (REPORT_DETAILS))
     fprintf (vect_dump,
@@ -1410,7 +1422,7 @@ static bool
 static bool
 vect_analyze_loop_2 (loop_vec_info loop_vinfo)
 {
-  bool ok, dummy;
+  bool ok, dummy, slp = false;
   int max_vf = MAX_VECTORIZATION_FACTOR;
   int min_vf = 2;
 
@@ -1524,7 +1536,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo)
   if (ok)
     {
       /* Decide which possible SLP instances to SLP.  */
-      vect_make_slp_decision (loop_vinfo);
+      slp = vect_make_slp_decision (loop_vinfo);
 
       /* Find stmts that need to be both vectorized and SLPed.  */
       vect_detect_hybrid_slp (loop_vinfo);
@@ -1533,7 +1545,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo)
   /* Scan all the operations in the loop and make sure they are
      vectorizable.  */
 
-  ok = vect_analyze_loop_operations (loop_vinfo);
+  ok = vect_analyze_loop_operations (loop_vinfo, slp);
   if (!ok)
     {
       if (vect_print_dump_info (REPORT_DETAILS))
@@ -4136,7 +4148,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_i
   if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
     return false;
 
-  if (slp_node)
+  if (slp_node || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
   else
     ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
Index: tree-vect-stmts.c
===================================================================
--- tree-vect-stmts.c	(revision 173127)
+++ tree-vect-stmts.c	(working copy)
@@ -1747,7 +1747,7 @@ vectorizable_conversion (gimple stmt, gimple_stmt_
   /* Multiple types in SLP are handled by creating the appropriate number of
      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
      case of SLP.  */
-  if (slp_node)
+  if (slp_node || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
 
   /* Sanity check: make sure that at least one copy of the vectorized stmt
@@ -1940,7 +1940,7 @@ vectorizable_assignment (gimple stmt, gimple_stmt_
   /* Multiple types in SLP are handled by creating the appropriate number of
      vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
      case of SLP.  */
-  if (slp_node)
+  if (slp_node || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
   else
     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
@@ -2149,7 +2149,7 @@ vectorizable_shift (gimple stmt, gimple_stmt_itera
   /* Multiple types in SLP are handled by creating the appropriate number of
      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
      case of SLP.  */
-  if (slp_node)
+  if (slp_node || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
   else
     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
@@ -2497,7 +2497,7 @@ vectorizable_operation (gimple stmt, gimple_stmt_i
   /* Multiple types in SLP are handled by creating the appropriate number of
      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
      case of SLP.  */
-  if (slp_node)
+  if (slp_node || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
   else
     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
@@ -2895,7 +2895,7 @@ vectorizable_type_demotion (gimple stmt, gimple_st
   /* Multiple types in SLP are handled by creating the appropriate number of
      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
      case of SLP.  */
-  if (slp_node)
+  if (slp_node || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
   else
     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
@@ -3175,7 +3175,7 @@ vectorizable_type_promotion (gimple stmt, gimple_s
   /* Multiple types in SLP are handled by creating the appropriate number of
      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
      case of SLP.  */
-  if (slp_node)
+  if (slp_node || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
   else
     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
@@ -3358,7 +3358,7 @@ vectorizable_store (gimple stmt, gimple_stmt_itera
   /* Multiple types in SLP are handled by creating the appropriate number of
      vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
      case of SLP.  */
-  if (slp)
+  if (slp || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
   else
     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
@@ -3851,7 +3851,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterat
   /* Multiple types in SLP are handled by creating the appropriate number of
      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
      case of SLP.  */
-  if (slp)
+  if (slp || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
   else
     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
@@ -4457,6 +4457,10 @@ vectorizable_condition (gimple stmt, gimple_stmt_i
   /* FORNOW: unsupported in basic block SLP.  */
   gcc_assert (loop_vinfo);
 
+  /* FORNOW: SLP not supported.  */
+  if (STMT_SLP_TYPE (stmt_info))
+    return false;
+
   gcc_assert (ncopies >= 1);
   if (reduc_index && ncopies > 1)
     return false; /* FORNOW */
@@ -4469,10 +4473,6 @@ vectorizable_condition (gimple stmt, gimple_stmt_i
            && reduc_def))
     return false;
 
-  /* FORNOW: SLP not supported.  */
-  if (STMT_SLP_TYPE (stmt_info))
-    return false;
-
   /* FORNOW: not yet supported.  */
   if (STMT_VINFO_LIVE_P (stmt_info))
     {
Index: tree-vect-slp.c
===================================================================
--- tree-vect-slp.c	(revision 173127)
+++ tree-vect-slp.c	(working copy)
@@ -1351,9 +1351,10 @@ vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec
 
 
 /* For each possible SLP instance decide whether to SLP it and calculate overall
-   unrolling factor needed to SLP the loop.  */
+   unrolling factor needed to SLP the loop.  Return TRUE if decided to SLP at
+   least one instance.  */
 
-void
+bool
 vect_make_slp_decision (loop_vec_info loop_vinfo)
 {
   unsigned int i, unrolling_factor = 1;
@@ -1382,6 +1383,8 @@ vect_make_slp_decision (loop_vec_info loop_vinfo)
   if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
     fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
 	     decided_to_slp, unrolling_factor);
+
+  return (decided_to_slp > 0);
 }
 
 

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2011-04-28 19:51 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-04-28 13:39 [patch, vectorizer] Fix PR tree-optimization/48765 Ira Rosen
2011-04-28 20:24 ` Ira Rosen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).