* [patch, vectorizer] Fix PR tree-optimization/48765
@ 2011-04-28 13:39 Ira Rosen
2011-04-28 20:24 ` Ira Rosen
0 siblings, 1 reply; 2+ messages in thread
From: Ira Rosen @ 2011-04-28 13:39 UTC (permalink / raw)
To: gcc-patches; +Cc: Patch Tracking
Hi,
Sometimes loop vectorization factor changes during the analysis, while
statement analysis depends on it. This patch moves the update of the
vectorization before statements, avoiding current difference between the
analysis and the transformations phases that caused the problem described
in the PR.
Bootstrapped and now testing on powerpc64-suse-linux.
I'll commit the patch once the testing completes.
Ira
ChangeLog:
PR tree-optimization/48765
* tree-vect-loop.c (vect_analyze_loop_operations): Scan the
statements
and update the vectorization factor according to the type of
vectorization before statement analysis.
(vectorizable_reduction): Set number of copies to 1 in case of pure
SLP
statement.
* tree-vect-stmts.c (vectorizable_conversion,
vectorizable_assignment,
vectorizable_shift, vectorizable_operation,
vectorizable_type_demotion,
vectorizable_type_promotion, vectorizable_store, vectorizable_load):
Likewise.
(vectorizable_condition): Move the check that it is not SLP
vectorization before the number of copies check.
testsuite/ChangeLog:
PR tree-optimization/48765
* gcc.dg/vect/pr48765.c: New.
Index: tree-vect-loop.c
===================================================================
--- tree-vect-loop.c (revision 173018)
+++ tree-vect-loop.c (working copy)
@@ -1167,7 +1167,38 @@ vect_analyze_loop_operations (loop_vec_info loop_v
gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ /* If all the stmts in the loop can be SLPed, we perform only SLP, and
+ vectorization factor of the loop is the unrolling factor required by
the
+ SLP instances. If that unrolling factor is 1, we say, that we
perform
+ pure SLP on loop - cross iteration parallelism is not exploited. */
+ for (i = 0; i < nbbs; i++)
+ {
+ basic_block bb = bbs[i];
+ for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
+ {
+ gimple stmt = gsi_stmt (si);
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ gcc_assert (stmt_info);
+ if ((STMT_VINFO_RELEVANT_P (stmt_info)
+ || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE
(stmt_info)))
+ && !PURE_SLP_STMT (stmt_info))
+ /* STMT needs both SLP and loop-based vectorization. */
+ only_slp_in_loop = false;
+ }
+ }
+ if (only_slp_in_loop)
+ vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
+ else
+ vectorization_factor = least_common_multiple (vectorization_factor,
+ LOOP_VINFO_SLP_UNROLLING_FACTOR
(loop_vinfo));
+
+ LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "Updating vectorization factor to %d ",
+ vectorization_factor);
+
for (i = 0; i < nbbs; i++)
{
basic_block bb = bbs[i];
@@ -1272,18 +1303,8 @@ vect_analyze_loop_operations (loop_vec_info loop_v
for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
{
gimple stmt = gsi_stmt (si);
- stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
-
- gcc_assert (stmt_info);
-
if (!vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
return false;
-
- if ((STMT_VINFO_RELEVANT_P (stmt_info)
- || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE
(stmt_info)))
- && !PURE_SLP_STMT (stmt_info))
- /* STMT needs both SLP and loop-based vectorization. */
- only_slp_in_loop = false;
}
} /* bbs */
@@ -1303,18 +1324,6 @@ vect_analyze_loop_operations (loop_vec_info loop_v
return false;
}
- /* If all the stmts in the loop can be SLPed, we perform only SLP, and
- vectorization factor of the loop is the unrolling factor required by
the
- SLP instances. If that unrolling factor is 1, we say, that we
perform
- pure SLP on loop - cross iteration parallelism is not exploited. */
- if (only_slp_in_loop)
- vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
- else
- vectorization_factor = least_common_multiple (vectorization_factor,
- LOOP_VINFO_SLP_UNROLLING_FACTOR
(loop_vinfo));
-
- LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
-
if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
&& vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump,
@@ -4136,7 +4145,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_i
if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
return false;
- if (slp_node)
+ if (slp_node || PURE_SLP_STMT (stmt_info))
ncopies = 1;
else
ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
Index: tree-vect-stmts.c
===================================================================
--- tree-vect-stmts.c (revision 173018)
+++ tree-vect-stmts.c (working copy)
@@ -1747,7 +1747,7 @@ vectorizable_conversion (gimple stmt, gimple_stmt_
/* Multiple types in SLP are handled by creating the appropriate number
of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */
- if (slp_node)
+ if (slp_node || PURE_SLP_STMT (stmt_info))
ncopies = 1;
/* Sanity check: make sure that at least one copy of the vectorized stmt
@@ -1940,7 +1940,7 @@ vectorizable_assignment (gimple stmt, gimple_stmt_
/* Multiple types in SLP are handled by creating the appropriate number
of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */
- if (slp_node)
+ if (slp_node || PURE_SLP_STMT (stmt_info))
ncopies = 1;
else
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
@@ -2149,7 +2149,7 @@ vectorizable_shift (gimple stmt, gimple_stmt_itera
/* Multiple types in SLP are handled by creating the appropriate number
of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */
- if (slp_node)
+ if (slp_node || PURE_SLP_STMT (stmt_info))
ncopies = 1;
else
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
@@ -2497,7 +2497,7 @@ vectorizable_operation (gimple stmt, gimple_stmt_i
/* Multiple types in SLP are handled by creating the appropriate number
of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */
- if (slp_node)
+ if (slp_node || PURE_SLP_STMT (stmt_info))
ncopies = 1;
else
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
@@ -2895,7 +2895,7 @@ vectorizable_type_demotion (gimple stmt, gimple_st
/* Multiple types in SLP are handled by creating the appropriate number
of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */
- if (slp_node)
+ if (slp_node || PURE_SLP_STMT (stmt_info))
ncopies = 1;
else
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
@@ -3175,7 +3175,7 @@ vectorizable_type_promotion (gimple stmt, gimple_s
/* Multiple types in SLP are handled by creating the appropriate number
of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */
- if (slp_node)
+ if (slp_node || PURE_SLP_STMT (stmt_info))
ncopies = 1;
else
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
@@ -3358,7 +3358,7 @@ vectorizable_store (gimple stmt, gimple_stmt_itera
/* Multiple types in SLP are handled by creating the appropriate number
of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */
- if (slp)
+ if (slp || PURE_SLP_STMT (stmt_info))
ncopies = 1;
else
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
@@ -3851,7 +3851,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterat
/* Multiple types in SLP are handled by creating the appropriate number
of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */
- if (slp)
+ if (slp || PURE_SLP_STMT (stmt_info))
ncopies = 1;
else
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
@@ -4457,6 +4457,10 @@ vectorizable_condition (gimple stmt, gimple_stmt_i
/* FORNOW: unsupported in basic block SLP. */
gcc_assert (loop_vinfo);
+ /* FORNOW: SLP not supported. */
+ if (STMT_SLP_TYPE (stmt_info))
+ return false;
+
gcc_assert (ncopies >= 1);
if (reduc_index && ncopies > 1)
return false; /* FORNOW */
@@ -4469,10 +4473,6 @@ vectorizable_condition (gimple stmt, gimple_stmt_i
&& reduc_def))
return false;
- /* FORNOW: SLP not supported. */
- if (STMT_SLP_TYPE (stmt_info))
- return false;
-
/* FORNOW: not yet supported. */
if (STMT_VINFO_LIVE_P (stmt_info))
{
Index: testsuite/gcc.dg/vect/pr48765.c
===================================================================
--- testsuite/gcc.dg/vect/pr48765.c (revision 0)
+++ testsuite/gcc.dg/vect/pr48765.c (revision 0)
@@ -0,0 +1,82 @@
+/* { dg-do compile { target powerpc*-*-* } } */
+/* { dg-options "-m64 -O3 -mcpu=power6" } */
+
+enum reg_class
+{
+ NO_REGS, AP_REG, XRF_REGS, GENERAL_REGS, AGRF_REGS, XGRF_REGS, ALL_REGS,
+ LIM_REG_CLASSES
+};
+enum machine_mode
+{
+ VOIDmode, QImode, HImode, PSImode, SImode, PDImode, DImode, TImode,
OImode,
+ QFmode, HFmode, TQFmode, SFmode, DFmode, XFmode, TFmode, SCmode,
DCmode,
+ XCmode, TCmode, CQImode, CHImode, CSImode, CDImode, CTImode, COImode,
+ BLKmode, CCmode, CCEVENmode, MAX_MACHINE_MODE
+};
+typedef struct rtx_def
+{
+ int mode:8;
+}
+ *rtx;
+extern rtx *regno_reg_rtx;
+typedef unsigned int HARD_REG_ELT_TYPE;
+typedef HARD_REG_ELT_TYPE HARD_REG_SET[((64 + 32 - 1) / 32)];
+extern int reg_alloc_order[64];
+extern int max_regno;
+extern int *reg_n_calls_crossed;
+extern short *reg_renumber;
+static int *reg_where_dead;
+static int *reg_where_born;
+static int *reg_order;
+static char *regs_change_size;
+static HARD_REG_SET *after_insn_hard_regs;
+static int stupid_find_reg (int, enum reg_class, enum machine_mode, int,
int,
+ int);
+void
+stupid_life_analysis (f, nregs, file)
+ rtx f;
+{
+ register int i;
+ for (i = (((64)) + 3) + 1; i < max_regno; i++)
+ {
+ register int r = reg_order[i];
+ if ((int) LIM_REG_CLASSES > 1)
+ reg_renumber[r] =
+ stupid_find_reg (reg_n_calls_crossed[r], reg_preferred_class (r),
+ ((regno_reg_rtx[r])->mode), reg_where_born[r],
+ reg_where_dead[r], regs_change_size[r]);
+ }
+}
+
+static int
+stupid_find_reg (call_preserved, class, mode, born_insn, dead_insn,
+ changes_size)
+ int call_preserved;
+ enum reg_class class;
+ enum machine_mode mode;
+{
+ register int i, ins;
+ HARD_REG_SET used, this_reg;
+ for (ins = born_insn; ins < dead_insn; ins++)
+ do
+ {
+ register HARD_REG_ELT_TYPE *scan_tp_ = (used), *scan_fp_ =
+ (after_insn_hard_regs[ins]);
+ for (i = 0; i < ((64 + 32 - 1) / 32); i++)
+ *scan_tp_++ |= *scan_fp_++;
+ }
+ while (0);
+ for (i = 0; i < 64; i++)
+ {
+ int regno = reg_alloc_order[i];
+ if (((used)[(regno) / ((unsigned) 32)] &
+ (((HARD_REG_ELT_TYPE) (1)) << ((regno) % ((unsigned) 32)))))
+ {
+ register int j;
+ if (j == regno)
+ return regno;
+ }
+ }
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [patch, vectorizer] Fix PR tree-optimization/48765
2011-04-28 13:39 [patch, vectorizer] Fix PR tree-optimization/48765 Ira Rosen
@ 2011-04-28 20:24 ` Ira Rosen
0 siblings, 0 replies; 2+ messages in thread
From: Ira Rosen @ 2011-04-28 20:24 UTC (permalink / raw)
To: Ira Rosen; +Cc: gcc-patches, Patch Tracking
[-- Attachment #1: Type: text/plain, Size: 1950 bytes --]
gcc-patches-owner@gcc.gnu.org wrote on 28/04/2011 03:42:01 PM:
>
>
> Hi,
>
> Sometimes loop vectorization factor changes during the analysis, while
> statement analysis depends on it. This patch moves the update of the
> vectorization before statements, avoiding current difference between the
> analysis and the transformations phases that caused the problem described
> in the PR.
>
> Bootstrapped and now testing on powerpc64-suse-linux.
> I'll commit the patch once the testing completes.
>
I ended up committing a slightly different version of the patch that scans
the loop statements only if we decided to use SLP.
Bootstrapped and tested on powerpc64-suse-linux.
Committed revision 173132.
Ira
ChangeLog:
PR tree-optimization/48765
* tree-vectorizer.h (vect_make_slp_decision): Return bool.
* tree-vect-loop.c (vect_analyze_loop_operations): Add new argument
to indicate if loop aware SLP is being used. Scan the statements
and update the vectorization factor according to the type of
vectorization before statement analysis.
(vect_analyze_loop_2): Get a return value from
vect_make_slp_decision,
pass it to vect_analyze_loop_operations.
(vectorizable_reduction): Set number of copies to 1 in case of pure
SLP statement.
* tree-vect-stmts.c (vectorizable_conversion,
vectorizable_assignment, vectorizable_shift,
vectorizable_operation, vectorizable_type_demotion,
vectorizable_type_promotion, vectorizable_store,
vectorizable_load):
Likewise.
(vectorizable_condition): Move the check that it is not SLP
vectorization before the number of copies check.
* tree-vect-slp.c (vect_make_slp_decision): Return TRUE if decided
to vectorize the loop using SLP.
testsuite/ChangeLog:
PR tree-optimization/48765
* gcc.dg/vect/pr48765.c: New.
(See attached file: pr48765.txt)
[-- Attachment #2: pr48765.txt --]
[-- Type: text/plain, Size: 14526 bytes --]
Index: ChangeLog
===================================================================
--- ChangeLog (revision 173127)
+++ ChangeLog (working copy)
@@ -1,3 +1,25 @@
+2011-04-28 Ira Rosen <ira.rosen@linaro.org>
+
+ PR tree-optimization/48765
+ * tree-vectorizer.h (vect_make_slp_decision): Return bool.
+ * tree-vect-loop.c (vect_analyze_loop_operations): Add new argument
+ to indicate if loop aware SLP is being used. Scan the statements
+ and update the vectorization factor according to the type of
+ vectorization before statement analysis.
+ (vect_analyze_loop_2): Get a return value from vect_make_slp_decision,
+ pass it to vect_analyze_loop_operations.
+ (vectorizable_reduction): Set number of copies to 1 in case of pure
+ SLP statement.
+ * tree-vect-stmts.c (vectorizable_conversion,
+ vectorizable_assignment, vectorizable_shift,
+ vectorizable_operation, vectorizable_type_demotion,
+ vectorizable_type_promotion, vectorizable_store, vectorizable_load):
+ Likewise.
+ (vectorizable_condition): Move the check that it is not SLP
+ vectorization before the number of copies check.
+ * tree-vect-slp.c (vect_make_slp_decision): Return TRUE if decided
+ to vectorize the loop using SLP.
+
2011-04-28 Jakub Jelinek <jakub@redhat.com>
PR middle-end/48597
Index: testsuite/gcc.dg/vect/pr48765.c
===================================================================
--- testsuite/gcc.dg/vect/pr48765.c (revision 0)
+++ testsuite/gcc.dg/vect/pr48765.c (revision 0)
@@ -0,0 +1,82 @@
+/* { dg-do compile { target powerpc*-*-* } } */
+/* { dg-options "-m64 -O3 -mcpu=power6" } */
+
+enum reg_class
+{
+ NO_REGS, AP_REG, XRF_REGS, GENERAL_REGS, AGRF_REGS, XGRF_REGS, ALL_REGS,
+ LIM_REG_CLASSES
+};
+enum machine_mode
+{
+ VOIDmode, QImode, HImode, PSImode, SImode, PDImode, DImode, TImode, OImode,
+ QFmode, HFmode, TQFmode, SFmode, DFmode, XFmode, TFmode, SCmode, DCmode,
+ XCmode, TCmode, CQImode, CHImode, CSImode, CDImode, CTImode, COImode,
+ BLKmode, CCmode, CCEVENmode, MAX_MACHINE_MODE
+};
+typedef struct rtx_def
+{
+ int mode:8;
+}
+ *rtx;
+extern rtx *regno_reg_rtx;
+typedef unsigned int HARD_REG_ELT_TYPE;
+typedef HARD_REG_ELT_TYPE HARD_REG_SET[((64 + 32 - 1) / 32)];
+extern int reg_alloc_order[64];
+extern int max_regno;
+extern int *reg_n_calls_crossed;
+extern short *reg_renumber;
+static int *reg_where_dead;
+static int *reg_where_born;
+static int *reg_order;
+static char *regs_change_size;
+static HARD_REG_SET *after_insn_hard_regs;
+static int stupid_find_reg (int, enum reg_class, enum machine_mode, int, int,
+ int);
+void
+stupid_life_analysis (f, nregs, file)
+ rtx f;
+{
+ register int i;
+ for (i = (((64)) + 3) + 1; i < max_regno; i++)
+ {
+ register int r = reg_order[i];
+ if ((int) LIM_REG_CLASSES > 1)
+ reg_renumber[r] =
+ stupid_find_reg (reg_n_calls_crossed[r], reg_preferred_class (r),
+ ((regno_reg_rtx[r])->mode), reg_where_born[r],
+ reg_where_dead[r], regs_change_size[r]);
+ }
+}
+
+static int
+stupid_find_reg (call_preserved, class, mode, born_insn, dead_insn,
+ changes_size)
+ int call_preserved;
+ enum reg_class class;
+ enum machine_mode mode;
+{
+ register int i, ins;
+ HARD_REG_SET used, this_reg;
+ for (ins = born_insn; ins < dead_insn; ins++)
+ do
+ {
+ register HARD_REG_ELT_TYPE *scan_tp_ = (used), *scan_fp_ =
+ (after_insn_hard_regs[ins]);
+ for (i = 0; i < ((64 + 32 - 1) / 32); i++)
+ *scan_tp_++ |= *scan_fp_++;
+ }
+ while (0);
+ for (i = 0; i < 64; i++)
+ {
+ int regno = reg_alloc_order[i];
+ if (((used)[(regno) / ((unsigned) 32)] &
+ (((HARD_REG_ELT_TYPE) (1)) << ((regno) % ((unsigned) 32)))))
+ {
+ register int j;
+ if (j == regno)
+ return regno;
+ }
+ }
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/ChangeLog
===================================================================
--- testsuite/ChangeLog (revision 173127)
+++ testsuite/ChangeLog (working copy)
@@ -1,3 +1,8 @@
+2011-04-28 Ira Rosen <ira.rosen@linaro.org>
+
+ PR tree-optimization/48765
+ * gcc.dg/vect/pr48765.c: New.
+
2011-04-28 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE>
PR tree-optimization/48775
Index: tree-vectorizer.h
===================================================================
--- tree-vectorizer.h (revision 173127)
+++ tree-vectorizer.h (working copy)
@@ -870,7 +870,7 @@ extern bool vect_transform_slp_perm_load (gimple,
extern bool vect_schedule_slp (loop_vec_info, bb_vec_info);
extern void vect_update_slp_costs_according_to_vf (loop_vec_info);
extern bool vect_analyze_slp (loop_vec_info, bb_vec_info);
-extern void vect_make_slp_decision (loop_vec_info);
+extern bool vect_make_slp_decision (loop_vec_info);
extern void vect_detect_hybrid_slp (loop_vec_info);
extern void vect_get_slp_defs (tree, tree, slp_tree, VEC (tree,heap) **,
VEC (tree,heap) **, int);
Index: tree-vect-loop.c
===================================================================
--- tree-vect-loop.c (revision 173127)
+++ tree-vect-loop.c (working copy)
@@ -1146,7 +1146,7 @@ vect_get_cost (enum vect_cost_for_stmt type_of_cos
Scan the loop stmts and make sure they are all vectorizable. */
static bool
-vect_analyze_loop_operations (loop_vec_info loop_vinfo)
+vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
{
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
@@ -1167,7 +1167,41 @@ static bool
gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ if (slp)
+ {
+ /* If all the stmts in the loop can be SLPed, we perform only SLP, and
+ vectorization factor of the loop is the unrolling factor required by
+ the SLP instances. If that unrolling factor is 1, we say, that we
+ perform pure SLP on loop - cross iteration parallelism is not
+ exploited. */
+ for (i = 0; i < nbbs; i++)
+ {
+ basic_block bb = bbs[i];
+ for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
+ {
+ gimple stmt = gsi_stmt (si);
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ gcc_assert (stmt_info);
+ if ((STMT_VINFO_RELEVANT_P (stmt_info)
+ || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
+ && !PURE_SLP_STMT (stmt_info))
+ /* STMT needs both SLP and loop-based vectorization. */
+ only_slp_in_loop = false;
+ }
+ }
+ if (only_slp_in_loop)
+ vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
+ else
+ vectorization_factor = least_common_multiple (vectorization_factor,
+ LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
+
+ LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "Updating vectorization factor to %d ",
+ vectorization_factor);
+ }
+
for (i = 0; i < nbbs; i++)
{
basic_block bb = bbs[i];
@@ -1272,18 +1306,8 @@ static bool
for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
{
gimple stmt = gsi_stmt (si);
- stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
-
- gcc_assert (stmt_info);
-
if (!vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
return false;
-
- if ((STMT_VINFO_RELEVANT_P (stmt_info)
- || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
- && !PURE_SLP_STMT (stmt_info))
- /* STMT needs both SLP and loop-based vectorization. */
- only_slp_in_loop = false;
}
} /* bbs */
@@ -1303,18 +1327,6 @@ static bool
return false;
}
- /* If all the stmts in the loop can be SLPed, we perform only SLP, and
- vectorization factor of the loop is the unrolling factor required by the
- SLP instances. If that unrolling factor is 1, we say, that we perform
- pure SLP on loop - cross iteration parallelism is not exploited. */
- if (only_slp_in_loop)
- vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
- else
- vectorization_factor = least_common_multiple (vectorization_factor,
- LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
-
- LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
-
if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
&& vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump,
@@ -1410,7 +1422,7 @@ static bool
static bool
vect_analyze_loop_2 (loop_vec_info loop_vinfo)
{
- bool ok, dummy;
+ bool ok, dummy, slp = false;
int max_vf = MAX_VECTORIZATION_FACTOR;
int min_vf = 2;
@@ -1524,7 +1536,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo)
if (ok)
{
/* Decide which possible SLP instances to SLP. */
- vect_make_slp_decision (loop_vinfo);
+ slp = vect_make_slp_decision (loop_vinfo);
/* Find stmts that need to be both vectorized and SLPed. */
vect_detect_hybrid_slp (loop_vinfo);
@@ -1533,7 +1545,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo)
/* Scan all the operations in the loop and make sure they are
vectorizable. */
- ok = vect_analyze_loop_operations (loop_vinfo);
+ ok = vect_analyze_loop_operations (loop_vinfo, slp);
if (!ok)
{
if (vect_print_dump_info (REPORT_DETAILS))
@@ -4136,7 +4148,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_i
if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
return false;
- if (slp_node)
+ if (slp_node || PURE_SLP_STMT (stmt_info))
ncopies = 1;
else
ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
Index: tree-vect-stmts.c
===================================================================
--- tree-vect-stmts.c (revision 173127)
+++ tree-vect-stmts.c (working copy)
@@ -1747,7 +1747,7 @@ vectorizable_conversion (gimple stmt, gimple_stmt_
/* Multiple types in SLP are handled by creating the appropriate number of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */
- if (slp_node)
+ if (slp_node || PURE_SLP_STMT (stmt_info))
ncopies = 1;
/* Sanity check: make sure that at least one copy of the vectorized stmt
@@ -1940,7 +1940,7 @@ vectorizable_assignment (gimple stmt, gimple_stmt_
/* Multiple types in SLP are handled by creating the appropriate number of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */
- if (slp_node)
+ if (slp_node || PURE_SLP_STMT (stmt_info))
ncopies = 1;
else
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
@@ -2149,7 +2149,7 @@ vectorizable_shift (gimple stmt, gimple_stmt_itera
/* Multiple types in SLP are handled by creating the appropriate number of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */
- if (slp_node)
+ if (slp_node || PURE_SLP_STMT (stmt_info))
ncopies = 1;
else
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
@@ -2497,7 +2497,7 @@ vectorizable_operation (gimple stmt, gimple_stmt_i
/* Multiple types in SLP are handled by creating the appropriate number of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */
- if (slp_node)
+ if (slp_node || PURE_SLP_STMT (stmt_info))
ncopies = 1;
else
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
@@ -2895,7 +2895,7 @@ vectorizable_type_demotion (gimple stmt, gimple_st
/* Multiple types in SLP are handled by creating the appropriate number of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */
- if (slp_node)
+ if (slp_node || PURE_SLP_STMT (stmt_info))
ncopies = 1;
else
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
@@ -3175,7 +3175,7 @@ vectorizable_type_promotion (gimple stmt, gimple_s
/* Multiple types in SLP are handled by creating the appropriate number of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */
- if (slp_node)
+ if (slp_node || PURE_SLP_STMT (stmt_info))
ncopies = 1;
else
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
@@ -3358,7 +3358,7 @@ vectorizable_store (gimple stmt, gimple_stmt_itera
/* Multiple types in SLP are handled by creating the appropriate number of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */
- if (slp)
+ if (slp || PURE_SLP_STMT (stmt_info))
ncopies = 1;
else
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
@@ -3851,7 +3851,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterat
/* Multiple types in SLP are handled by creating the appropriate number of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */
- if (slp)
+ if (slp || PURE_SLP_STMT (stmt_info))
ncopies = 1;
else
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
@@ -4457,6 +4457,10 @@ vectorizable_condition (gimple stmt, gimple_stmt_i
/* FORNOW: unsupported in basic block SLP. */
gcc_assert (loop_vinfo);
+ /* FORNOW: SLP not supported. */
+ if (STMT_SLP_TYPE (stmt_info))
+ return false;
+
gcc_assert (ncopies >= 1);
if (reduc_index && ncopies > 1)
return false; /* FORNOW */
@@ -4469,10 +4473,6 @@ vectorizable_condition (gimple stmt, gimple_stmt_i
&& reduc_def))
return false;
- /* FORNOW: SLP not supported. */
- if (STMT_SLP_TYPE (stmt_info))
- return false;
-
/* FORNOW: not yet supported. */
if (STMT_VINFO_LIVE_P (stmt_info))
{
Index: tree-vect-slp.c
===================================================================
--- tree-vect-slp.c (revision 173127)
+++ tree-vect-slp.c (working copy)
@@ -1351,9 +1351,10 @@ vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec
/* For each possible SLP instance decide whether to SLP it and calculate overall
- unrolling factor needed to SLP the loop. */
+ unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
+ least one instance. */
-void
+bool
vect_make_slp_decision (loop_vec_info loop_vinfo)
{
unsigned int i, unrolling_factor = 1;
@@ -1382,6 +1383,8 @@ vect_make_slp_decision (loop_vec_info loop_vinfo)
if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
decided_to_slp, unrolling_factor);
+
+ return (decided_to_slp > 0);
}
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2011-04-28 19:51 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-04-28 13:39 [patch, vectorizer] Fix PR tree-optimization/48765 Ira Rosen
2011-04-28 20:24 ` Ira Rosen
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).