public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Ira Rosen <IRAR@il.ibm.com>
To: Ira Rosen <IRAR@il.ibm.com>
Cc: gcc-patches@gcc.gnu.org, Patch Tracking <patches@linaro.org>
Subject: Re: [patch, vectorizer] Fix PR tree-optimization/48765
Date: Thu, 28 Apr 2011 20:24:00 -0000	[thread overview]
Message-ID: <OFD28EBEA5.2BE93CC1-ONC2257880.00610D57-C2257880.006D111A@il.ibm.com> (raw)
In-Reply-To: <OF93A25F4C.68B007ED-ONC2257880.004195B4-C2257880.0045C3D6@il.ibm.com>

[-- Attachment #1: Type: text/plain, Size: 1950 bytes --]



gcc-patches-owner@gcc.gnu.org wrote on 28/04/2011 03:42:01 PM:

>
>
> Hi,
>
> Sometimes loop vectorization factor changes during the analysis, while
> statement analysis depends on it. This patch moves the update of the
> vectorization before statements, avoiding current difference between the
> analysis and the transformations phases that caused the problem described
> in the PR.
>
> Bootstrapped and now testing on powerpc64-suse-linux.
> I'll commit the patch once the testing completes.
>

I ended up committing a slightly different version of the patch that scans
the loop statements only if we decided to use SLP.

Bootstrapped and tested on powerpc64-suse-linux.
Committed revision 173132.

Ira

ChangeLog:

        PR tree-optimization/48765
        * tree-vectorizer.h (vect_make_slp_decision): Return bool.
        * tree-vect-loop.c (vect_analyze_loop_operations): Add new argument
        to indicate if loop aware SLP is being used.  Scan the statements
        and update the vectorization factor according to the type of
        vectorization before statement analysis.
        (vect_analyze_loop_2): Get a return value from
vect_make_slp_decision,
        pass it to vect_analyze_loop_operations.
        (vectorizable_reduction): Set number of copies to 1 in case of pure
        SLP statement.
        * tree-vect-stmts.c (vectorizable_conversion,
        vectorizable_assignment, vectorizable_shift,
        vectorizable_operation, vectorizable_type_demotion,
        vectorizable_type_promotion, vectorizable_store,
vectorizable_load):
        Likewise.
        (vectorizable_condition): Move the check that it is not SLP
        vectorization before the number of copies check.
        * tree-vect-slp.c (vect_make_slp_decision): Return TRUE if decided
        to vectorize the loop using SLP.


testsuite/ChangeLog:

        PR tree-optimization/48765
        * gcc.dg/vect/pr48765.c: New.

(See attached file: pr48765.txt)

[-- Attachment #2: pr48765.txt --]
[-- Type: text/plain, Size: 14526 bytes --]

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 173127)
+++ ChangeLog	(working copy)
@@ -1,3 +1,25 @@
+2011-04-28  Ira Rosen  <ira.rosen@linaro.org>
+
+	PR tree-optimization/48765
+	* tree-vectorizer.h (vect_make_slp_decision): Return bool.
+	* tree-vect-loop.c (vect_analyze_loop_operations): Add new argument
+	to indicate if loop aware SLP is being used.  Scan the statements
+	and update the vectorization factor according to the type of
+	vectorization before statement analysis.
+	(vect_analyze_loop_2): Get a return value from vect_make_slp_decision,
+	pass it to vect_analyze_loop_operations.
+	(vectorizable_reduction): Set number of copies to 1 in case of pure
+	SLP statement.
+	* tree-vect-stmts.c (vectorizable_conversion,
+	vectorizable_assignment, vectorizable_shift,
+	vectorizable_operation, vectorizable_type_demotion,
+	vectorizable_type_promotion, vectorizable_store, vectorizable_load):
+	Likewise.
+	(vectorizable_condition): Move the check that it is not SLP
+	vectorization before the number of copies check.
+	* tree-vect-slp.c (vect_make_slp_decision): Return TRUE if decided
+	to vectorize the loop using SLP.
+
 2011-04-28  Jakub Jelinek  <jakub@redhat.com>
 
 	PR middle-end/48597
Index: testsuite/gcc.dg/vect/pr48765.c
===================================================================
--- testsuite/gcc.dg/vect/pr48765.c	(revision 0)
+++ testsuite/gcc.dg/vect/pr48765.c	(revision 0)
@@ -0,0 +1,82 @@
+/* { dg-do compile { target powerpc*-*-* } } */
+/* { dg-options "-m64 -O3 -mcpu=power6" } */
+
+enum reg_class
+{
+  NO_REGS, AP_REG, XRF_REGS, GENERAL_REGS, AGRF_REGS, XGRF_REGS, ALL_REGS,
+    LIM_REG_CLASSES
+};
+enum machine_mode
+{
+  VOIDmode, QImode, HImode, PSImode, SImode, PDImode, DImode, TImode, OImode,
+    QFmode, HFmode, TQFmode, SFmode, DFmode, XFmode, TFmode, SCmode, DCmode,
+    XCmode, TCmode, CQImode, CHImode, CSImode, CDImode, CTImode, COImode,
+    BLKmode, CCmode, CCEVENmode, MAX_MACHINE_MODE
+};
+typedef struct rtx_def
+{
+  int mode:8;
+}
+ *rtx;
+extern rtx *regno_reg_rtx;
+typedef unsigned int HARD_REG_ELT_TYPE;
+typedef HARD_REG_ELT_TYPE HARD_REG_SET[((64 + 32 - 1) / 32)];
+extern int reg_alloc_order[64];
+extern int max_regno;
+extern int *reg_n_calls_crossed;
+extern short *reg_renumber;
+static int *reg_where_dead;
+static int *reg_where_born;
+static int *reg_order;
+static char *regs_change_size;
+static HARD_REG_SET *after_insn_hard_regs;
+static int stupid_find_reg (int, enum reg_class, enum machine_mode, int, int,
+			    int);
+void
+stupid_life_analysis (f, nregs, file)
+     rtx f;
+{
+  register int i;
+  for (i = (((64)) + 3) + 1; i < max_regno; i++)
+    {
+      register int r = reg_order[i];
+      if ((int) LIM_REG_CLASSES > 1)
+	reg_renumber[r] =
+	  stupid_find_reg (reg_n_calls_crossed[r], reg_preferred_class (r),
+			   ((regno_reg_rtx[r])->mode), reg_where_born[r],
+			   reg_where_dead[r], regs_change_size[r]);
+    }
+}
+
+static int
+stupid_find_reg (call_preserved, class, mode, born_insn, dead_insn,
+		 changes_size)
+     int call_preserved;
+     enum reg_class class;
+     enum machine_mode mode;
+{
+  register int i, ins;
+  HARD_REG_SET used, this_reg;
+  for (ins = born_insn; ins < dead_insn; ins++)
+    do
+      {
+	register HARD_REG_ELT_TYPE *scan_tp_ = (used), *scan_fp_ =
+	  (after_insn_hard_regs[ins]);
+	for (i = 0; i < ((64 + 32 - 1) / 32); i++)
+	  *scan_tp_++ |= *scan_fp_++;
+      }
+    while (0);
+  for (i = 0; i < 64; i++)
+    {
+      int regno = reg_alloc_order[i];
+      if (((used)[(regno) / ((unsigned) 32)] &
+	   (((HARD_REG_ELT_TYPE) (1)) << ((regno) % ((unsigned) 32)))))
+	{
+	  register int j;
+	  if (j == regno)
+	    return regno;
+	}
+    }
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/ChangeLog
===================================================================
--- testsuite/ChangeLog	(revision 173127)
+++ testsuite/ChangeLog	(working copy)
@@ -1,3 +1,8 @@
+2011-04-28  Ira Rosen  <ira.rosen@linaro.org>
+
+	PR tree-optimization/48765
+	* gcc.dg/vect/pr48765.c: New.
+
 2011-04-28  Rainer Orth  <ro@CeBiTec.Uni-Bielefeld.DE>
 
 	PR tree-optimization/48775
Index: tree-vectorizer.h
===================================================================
--- tree-vectorizer.h	(revision 173127)
+++ tree-vectorizer.h	(working copy)
@@ -870,7 +870,7 @@ extern bool vect_transform_slp_perm_load (gimple,
 extern bool vect_schedule_slp (loop_vec_info, bb_vec_info);
 extern void vect_update_slp_costs_according_to_vf (loop_vec_info);
 extern bool vect_analyze_slp (loop_vec_info, bb_vec_info);
-extern void vect_make_slp_decision (loop_vec_info);
+extern bool vect_make_slp_decision (loop_vec_info);
 extern void vect_detect_hybrid_slp (loop_vec_info);
 extern void vect_get_slp_defs (tree, tree, slp_tree, VEC (tree,heap) **,
                                VEC (tree,heap) **, int);
Index: tree-vect-loop.c
===================================================================
--- tree-vect-loop.c	(revision 173127)
+++ tree-vect-loop.c	(working copy)
@@ -1146,7 +1146,7 @@ vect_get_cost (enum vect_cost_for_stmt type_of_cos
    Scan the loop stmts and make sure they are all vectorizable.  */
 
 static bool
-vect_analyze_loop_operations (loop_vec_info loop_vinfo)
+vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
 {
   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
@@ -1167,7 +1167,41 @@ static bool
 
   gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
   vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+  if (slp)
+    {
+      /* If all the stmts in the loop can be SLPed, we perform only SLP, and
+	 vectorization factor of the loop is the unrolling factor required by
+	 the SLP instances.  If that unrolling factor is 1, we say, that we
+	 perform pure SLP on loop - cross iteration parallelism is not
+	 exploited.  */
+      for (i = 0; i < nbbs; i++)
+	{
+	  basic_block bb = bbs[i];
+	  for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
+	    {
+	      gimple stmt = gsi_stmt (si);
+	      stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+	      gcc_assert (stmt_info);
+	      if ((STMT_VINFO_RELEVANT_P (stmt_info)
+		   || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
+		  && !PURE_SLP_STMT (stmt_info))
+		/* STMT needs both SLP and loop-based vectorization.  */
+		only_slp_in_loop = false;
+	    }
+	}
 
+      if (only_slp_in_loop)
+	vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
+      else
+	vectorization_factor = least_common_multiple (vectorization_factor,
+				LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
+
+      LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
+      if (vect_print_dump_info (REPORT_DETAILS))
+	fprintf (vect_dump, "Updating vectorization factor to %d ",
+	 		    vectorization_factor);
+    }
+
   for (i = 0; i < nbbs; i++)
     {
       basic_block bb = bbs[i];
@@ -1272,18 +1306,8 @@ static bool
       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
         {
           gimple stmt = gsi_stmt (si);
-          stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
-
-          gcc_assert (stmt_info);
-
 	  if (!vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
 	    return false;
-
-          if ((STMT_VINFO_RELEVANT_P (stmt_info)
-               || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
-              && !PURE_SLP_STMT (stmt_info))
-            /* STMT needs both SLP and loop-based vectorization.  */
-            only_slp_in_loop = false;
         }
     } /* bbs */
 
@@ -1303,18 +1327,6 @@ static bool
       return false;
     }
 
-  /* If all the stmts in the loop can be SLPed, we perform only SLP, and
-     vectorization factor of the loop is the unrolling factor required by the
-     SLP instances.  If that unrolling factor is 1, we say, that we perform
-     pure SLP on loop - cross iteration parallelism is not exploited.  */
-  if (only_slp_in_loop)
-    vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
-  else
-    vectorization_factor = least_common_multiple (vectorization_factor,
-                                LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
-
-  LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
-
   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
       && vect_print_dump_info (REPORT_DETAILS))
     fprintf (vect_dump,
@@ -1410,7 +1422,7 @@ static bool
 static bool
 vect_analyze_loop_2 (loop_vec_info loop_vinfo)
 {
-  bool ok, dummy;
+  bool ok, dummy, slp = false;
   int max_vf = MAX_VECTORIZATION_FACTOR;
   int min_vf = 2;
 
@@ -1524,7 +1536,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo)
   if (ok)
     {
       /* Decide which possible SLP instances to SLP.  */
-      vect_make_slp_decision (loop_vinfo);
+      slp = vect_make_slp_decision (loop_vinfo);
 
       /* Find stmts that need to be both vectorized and SLPed.  */
       vect_detect_hybrid_slp (loop_vinfo);
@@ -1533,7 +1545,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo)
   /* Scan all the operations in the loop and make sure they are
      vectorizable.  */
 
-  ok = vect_analyze_loop_operations (loop_vinfo);
+  ok = vect_analyze_loop_operations (loop_vinfo, slp);
   if (!ok)
     {
       if (vect_print_dump_info (REPORT_DETAILS))
@@ -4136,7 +4148,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_i
   if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
     return false;
 
-  if (slp_node)
+  if (slp_node || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
   else
     ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
Index: tree-vect-stmts.c
===================================================================
--- tree-vect-stmts.c	(revision 173127)
+++ tree-vect-stmts.c	(working copy)
@@ -1747,7 +1747,7 @@ vectorizable_conversion (gimple stmt, gimple_stmt_
   /* Multiple types in SLP are handled by creating the appropriate number of
      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
      case of SLP.  */
-  if (slp_node)
+  if (slp_node || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
 
   /* Sanity check: make sure that at least one copy of the vectorized stmt
@@ -1940,7 +1940,7 @@ vectorizable_assignment (gimple stmt, gimple_stmt_
   /* Multiple types in SLP are handled by creating the appropriate number of
      vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
      case of SLP.  */
-  if (slp_node)
+  if (slp_node || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
   else
     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
@@ -2149,7 +2149,7 @@ vectorizable_shift (gimple stmt, gimple_stmt_itera
   /* Multiple types in SLP are handled by creating the appropriate number of
      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
      case of SLP.  */
-  if (slp_node)
+  if (slp_node || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
   else
     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
@@ -2497,7 +2497,7 @@ vectorizable_operation (gimple stmt, gimple_stmt_i
   /* Multiple types in SLP are handled by creating the appropriate number of
      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
      case of SLP.  */
-  if (slp_node)
+  if (slp_node || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
   else
     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
@@ -2895,7 +2895,7 @@ vectorizable_type_demotion (gimple stmt, gimple_st
   /* Multiple types in SLP are handled by creating the appropriate number of
      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
      case of SLP.  */
-  if (slp_node)
+  if (slp_node || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
   else
     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
@@ -3175,7 +3175,7 @@ vectorizable_type_promotion (gimple stmt, gimple_s
   /* Multiple types in SLP are handled by creating the appropriate number of
      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
      case of SLP.  */
-  if (slp_node)
+  if (slp_node || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
   else
     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
@@ -3358,7 +3358,7 @@ vectorizable_store (gimple stmt, gimple_stmt_itera
   /* Multiple types in SLP are handled by creating the appropriate number of
      vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
      case of SLP.  */
-  if (slp)
+  if (slp || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
   else
     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
@@ -3851,7 +3851,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterat
   /* Multiple types in SLP are handled by creating the appropriate number of
      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
      case of SLP.  */
-  if (slp)
+  if (slp || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
   else
     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
@@ -4457,6 +4457,10 @@ vectorizable_condition (gimple stmt, gimple_stmt_i
   /* FORNOW: unsupported in basic block SLP.  */
   gcc_assert (loop_vinfo);
 
+  /* FORNOW: SLP not supported.  */
+  if (STMT_SLP_TYPE (stmt_info))
+    return false;
+
   gcc_assert (ncopies >= 1);
   if (reduc_index && ncopies > 1)
     return false; /* FORNOW */
@@ -4469,10 +4473,6 @@ vectorizable_condition (gimple stmt, gimple_stmt_i
            && reduc_def))
     return false;
 
-  /* FORNOW: SLP not supported.  */
-  if (STMT_SLP_TYPE (stmt_info))
-    return false;
-
   /* FORNOW: not yet supported.  */
   if (STMT_VINFO_LIVE_P (stmt_info))
     {
Index: tree-vect-slp.c
===================================================================
--- tree-vect-slp.c	(revision 173127)
+++ tree-vect-slp.c	(working copy)
@@ -1351,9 +1351,10 @@ vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec
 
 
 /* For each possible SLP instance decide whether to SLP it and calculate overall
-   unrolling factor needed to SLP the loop.  */
+   unrolling factor needed to SLP the loop.  Return TRUE if decided to SLP at
+   least one instance.  */
 
-void
+bool
 vect_make_slp_decision (loop_vec_info loop_vinfo)
 {
   unsigned int i, unrolling_factor = 1;
@@ -1382,6 +1383,8 @@ vect_make_slp_decision (loop_vec_info loop_vinfo)
   if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
     fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
 	     decided_to_slp, unrolling_factor);
+
+  return (decided_to_slp > 0);
 }
 
 

      reply	other threads:[~2011-04-28 19:51 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-04-28 13:39 Ira Rosen
2011-04-28 20:24 ` Ira Rosen [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=OFD28EBEA5.2BE93CC1-ONC2257880.00610D57-C2257880.006D111A@il.ibm.com \
    --to=irar@il.ibm.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=patches@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).