From: "Kewen.Lin" <linkw@linux.ibm.com>
To: GCC Patches <gcc-patches@gcc.gnu.org>
Cc: Segher Boessenkool <segher@kernel.crashing.org>,
Bill Schmidt <wschmidt@linux.ibm.com>,
"bin.cheng" <bin.cheng@linux.alibaba.com>,
Richard Guenther <rguenther@suse.de>
Subject: [PATCH 1/4 GCC11] Add middle-end unroll factor estimation
Date: Thu, 16 Jan 2020 09:43:00 -0000 [thread overview]
Message-ID: <131a3294-1951-3678-453b-085744366af6@linux.ibm.com> (raw)
In-Reply-To: <ddd8c186-fc88-96df-b1c0-f99edec654f2@linux.ibm.com>
[-- Attachment #1: Type: text/plain, Size: 731 bytes --]
gcc/ChangeLog
2020-01-16 Kewen Lin <linkw@gcc.gnu.org>
* cfgloop.h (struct loop): New field estimated_uf.
* config/rs6000/rs6000.c (TARGET_LOOP_UNROLL_ADJUST_TREE): New macro.
(rs6000_loop_unroll_adjust_tree): New function.
* doc/tm.texi: Regenerate.
* doc/tm.texi.in (TARGET_LOOP_UNROLL_ADJUST_TREE): New hook.
* target.def (loop_unroll_adjust_tree): New hook.
* tree-ssa-loop-manip.c (decide_uf_const_iter): New function.
(decide_uf_runtime_iter): Likewise.
(decide_uf_stupid): Likewise.
(estimate_unroll_factor): Likewise.
* tree-ssa-loop-manip.h (estimate_unroll_factor): New declare.
* tree-ssa-loop.c (tree_average_num_loop_insns): New function.
* tree-ssa-loop.h (tree_average_num_loop_insns): New declare.
[-- Attachment #2: 0001.patch --]
[-- Type: text/plain, Size: 15184 bytes --]
gcc/cfgloop.h | 3 +
gcc/config/rs6000/rs6000.c | 16 ++-
gcc/doc/tm.texi | 6 ++
gcc/doc/tm.texi.in | 2 +
gcc/target.def | 8 ++
gcc/tree-ssa-loop-manip.c | 254 +++++++++++++++++++++++++++++++++++++++++++++
gcc/tree-ssa-loop-manip.h | 3 +-
gcc/tree-ssa-loop.c | 33 ++++++
gcc/tree-ssa-loop.h | 2 +
9 files changed, 324 insertions(+), 3 deletions(-)
diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h
index e3590d7..feceed6 100644
--- a/gcc/cfgloop.h
+++ b/gcc/cfgloop.h
@@ -232,6 +232,9 @@ public:
Other values means unroll with the given unrolling factor. */
unsigned short unroll;
+ /* Like unroll field above, but it's estimated in middle-end. */
+ unsigned short estimated_uf;
+
/* If this loop was inlined the main clique of the callee which does
not need remapping when copying the loop body. */
unsigned short owned_clique;
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 2995348..0dabaa6 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -1431,6 +1431,9 @@ static const struct attribute_spec rs6000_attribute_table[] =
#undef TARGET_LOOP_UNROLL_ADJUST
#define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
+#undef TARGET_LOOP_UNROLL_ADJUST_TREE
+#define TARGET_LOOP_UNROLL_ADJUST_TREE rs6000_loop_unroll_adjust_tree
+
#undef TARGET_INIT_BUILTINS
#define TARGET_INIT_BUILTINS rs6000_init_builtins
#undef TARGET_BUILTIN_DECL
@@ -5090,7 +5093,8 @@ rs6000_destroy_cost_data (void *data)
free (data);
}
-/* Implement targetm.loop_unroll_adjust. */
+/* Implement targetm.loop_unroll_adjust. Don't forget to update
+ loop_unroll_adjust_tree for any changes. */
static unsigned
rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
@@ -5109,6 +5113,16 @@ rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
return nunroll;
}
+/* Implement targetm.loop_unroll_adjust_tree, strictly refers to
+ targetm.loop_unroll_adjust. */
+
+static unsigned
+rs6000_loop_unroll_adjust_tree (unsigned nunroll, struct loop *loop)
+{
+ /* For now loop_unroll_adjust is simple, just invoke directly. */
+ return rs6000_loop_unroll_adjust (nunroll, loop);
+}
+
/* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
library with vectorized intrinsics. */
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 2244df4..86ad278 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -11875,6 +11875,12 @@ is required only when the target has special constraints like maximum
number of memory accesses.
@end deftypefn
+@deftypefn {Target Hook} unsigned TARGET_LOOP_UNROLL_ADJUST_TREE (unsigned @var{nunroll}, class loop *@var{loop})
+This target hook is the same as @code{loop_unroll_adjust}, but it's for
+middle-end unroll factor estimation computation. See
+@code{loop_unroll_adjust} for the function description.
+@end deftypefn
+
@defmac POWI_MAX_MULTS
If defined, this macro is interpreted as a signed integer C expression
that specifies the maximum number of floating point multiplications
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 52cd603..fd9769e 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -8008,6 +8008,8 @@ lists.
@hook TARGET_LOOP_UNROLL_ADJUST
+@hook TARGET_LOOP_UNROLL_ADJUST_TREE
+
@defmac POWI_MAX_MULTS
If defined, this macro is interpreted as a signed integer C expression
that specifies the maximum number of floating point multiplications
diff --git a/gcc/target.def b/gcc/target.def
index e705c5d..f61c831 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -2725,6 +2725,14 @@ number of memory accesses.",
unsigned, (unsigned nunroll, class loop *loop),
NULL)
+DEFHOOK
+(loop_unroll_adjust_tree,
+ "This target hook is the same as @code{loop_unroll_adjust}, but it's for\n\
+middle-end unroll factor estimation computation. See\n\
+@code{loop_unroll_adjust} for the function description.",
+ unsigned, (unsigned nunroll, class loop *loop),
+ NULL)
+
/* True if X is a legitimate MODE-mode immediate operand. */
DEFHOOK
(legitimate_constant_p,
diff --git a/gcc/tree-ssa-loop-manip.c b/gcc/tree-ssa-loop-manip.c
index a79912a..db7f6e6 100644
--- a/gcc/tree-ssa-loop-manip.c
+++ b/gcc/tree-ssa-loop-manip.c
@@ -21,6 +21,7 @@ along with GCC; see the file COPYING3. If not see
#include "system.h"
#include "coretypes.h"
#include "backend.h"
+#include "target.h"
#include "tree.h"
#include "gimple.h"
#include "cfghooks.h"
@@ -42,6 +43,7 @@ along with GCC; see the file COPYING3. If not see
#include "cfgloop.h"
#include "tree-scalar-evolution.h"
#include "tree-inline.h"
+#include "wide-int.h"
/* All bitmaps for rewriting into loop-closed SSA go on this obstack,
so that we can free them all at once. */
@@ -1592,3 +1594,255 @@ canonicalize_loop_ivs (class loop *loop, tree *nit, bool bump_in_latch)
return var_before;
}
+
+/* Try to determine estimated unroll factor for given LOOP with constant number
+ of iterations, mainly refer to decide_unroll_constant_iterations.
+ - NITER_DESC holds number of iteration description if it isn't NULL.
+ - NUNROLL holds a unroll factor value computed with instruction numbers.
+ - ITER holds estimated or likely max loop iterations.
+ Return true if it succeeds, also update estimated_uf. */
+
+static bool
+decide_uf_const_iter (class loop *loop, const tree_niter_desc *niter_desc,
+ unsigned nunroll, const widest_int *iter)
+{
+ /* Skip big loops. */
+ if (nunroll <= 1)
+ return false;
+
+ gcc_assert (niter_desc && niter_desc->assumptions);
+
+ /* Check number of iterations is constant. */
+ if ((niter_desc->may_be_zero && !integer_zerop (niter_desc->may_be_zero))
+ || !tree_fits_uhwi_p (niter_desc->niter))
+ return false;
+
+ unsigned HOST_WIDE_INT const_niter = tree_to_uhwi (niter_desc->niter);
+
+ /* Check for an explicit unrolling factor. */
+ if (loop->unroll > 0 && loop->unroll < USHRT_MAX)
+ {
+ /* It should have been peeled instead. */
+ if (const_niter == 0 || (unsigned) loop->unroll > const_niter - 1)
+ loop->estimated_uf = 1;
+ else
+ loop->estimated_uf = loop->unroll;
+ return true;
+ }
+
+ /* Check whether the loop rolls enough to consider. */
+ if (const_niter < 2 * nunroll || wi::ltu_p (*iter, 2 * nunroll))
+ return false;
+
+ /* Success; now compute number of iterations to unroll. */
+ unsigned best_unroll = 0, n_copies = 0;
+ unsigned best_copies = 2 * nunroll + 10;
+ unsigned i = 2 * nunroll + 2;
+
+ if (i > const_niter - 2)
+ i = const_niter - 2;
+
+ for (; i >= nunroll - 1; i--)
+ {
+ unsigned exit_mod = const_niter % (i + 1);
+
+ if (!empty_block_p (loop->latch))
+ n_copies = exit_mod + i + 1;
+ else if (exit_mod != i)
+ n_copies = exit_mod + i + 2;
+ else
+ n_copies = i + 1;
+
+ if (n_copies < best_copies)
+ {
+ best_copies = n_copies;
+ best_unroll = i;
+ }
+ }
+
+ loop->estimated_uf = best_unroll + 1;
+ return true;
+}
+
+/* Try to determine estimated unroll factor for given LOOP with countable but
+ non-constant number of iterations, mainly refer to
+ decide_unroll_runtime_iterations.
+ - NITER_DESC holds number of iteration description if it isn't NULL.
+ - NUNROLL_IN holds a unroll factor value computed with instruction numbers.
+ - ITER holds estimated or likely max loop iterations.
+ Return true if it succeeds, also update estimated_uf. */
+
+static bool
+decide_uf_runtime_iter (class loop *loop, const tree_niter_desc *niter_desc,
+ unsigned nunroll_in, const widest_int *iter)
+{
+ unsigned nunroll = nunroll_in;
+ if (loop->unroll > 0 && loop->unroll < USHRT_MAX)
+ nunroll = loop->unroll;
+
+ /* Skip big loops. */
+ if (nunroll <= 1)
+ return false;
+
+ gcc_assert (niter_desc && niter_desc->assumptions);
+
+ /* Skip constant number of iterations. */
+ if ((!niter_desc->may_be_zero || !integer_zerop (niter_desc->may_be_zero))
+ && tree_fits_uhwi_p (niter_desc->niter))
+ return false;
+
+ /* Check whether the loop rolls. */
+ if (wi::ltu_p (*iter, 2 * nunroll))
+ return false;
+
+ /* Success; now force nunroll to be power of 2. */
+ unsigned i;
+ for (i = 1; 2 * i <= nunroll; i *= 2)
+ continue;
+
+ loop->estimated_uf = i;
+ return true;
+}
+
+/* Try to determine estimated unroll factor for given LOOP with uncountable
+ number of iterations, mainly refer to decide_unroll_stupid.
+ - NITER_DESC holds number of iteration description if it isn't NULL.
+ - NUNROLL_IN holds a unroll factor value computed with instruction numbers.
+ - ITER holds estimated or likely max loop iterations.
+ Return true if it succeeds, also update estimated_uf. */
+
+static bool
+decide_uf_stupid (class loop *loop, const tree_niter_desc *niter_desc,
+ unsigned nunroll_in, const widest_int *iter)
+{
+ if (!flag_unroll_all_loops && !loop->unroll)
+ return false;
+
+ unsigned nunroll = nunroll_in;
+ if (loop->unroll > 0 && loop->unroll < USHRT_MAX)
+ nunroll = loop->unroll;
+
+ /* Skip big loops. */
+ if (nunroll <= 1)
+ return false;
+
+ gcc_assert (!niter_desc || !niter_desc->assumptions);
+
+ /* Skip loop with multiple branches for now. */
+ if (num_loop_branches (loop) > 1)
+ return false;
+
+ /* Check whether the loop rolls. */
+ if (wi::ltu_p (*iter, 2 * nunroll))
+ return false;
+
+ /* Success; now force nunroll to be power of 2. */
+ unsigned i;
+ for (i = 1; 2 * i <= nunroll; i *= 2)
+ continue;
+
+ loop->estimated_uf = i;
+ return true;
+}
+
+/* Try to estimate whether this given LOOP can be unrolled or not, and compute
+ its estimated unroll factor if it can. To avoid duplicated computation, you
+ can pass number of iterations information by DESC. The heuristics mainly
+ refer to decide_unrolling in loop-unroll.c. */
+
+void
+estimate_unroll_factor (class loop *loop, tree_niter_desc *desc)
+{
+ /* Return the existing estimated unroll factor. */
+ if (loop->estimated_uf)
+ return;
+
+ /* Don't unroll explicitly. */
+ if (loop->unroll == 1)
+ {
+ loop->estimated_uf = loop->unroll;
+ return;
+ }
+
+ /* Like decide_unrolling, don't unroll if:
+ 1) the loop is cold.
+ 2) the loop can't be manipulated.
+ 3) the loop isn't innermost. */
+ if (optimize_loop_for_size_p (loop)
+ || !can_duplicate_loop_p (loop)
+ || loop->inner != NULL)
+ {
+ loop->estimated_uf = 1;
+ return;
+ }
+
+ /* Don't unroll without explicit information. */
+ if (!loop->unroll && !flag_unroll_loops && !flag_unroll_all_loops)
+ {
+ loop->estimated_uf = 1;
+ return;
+ }
+
+ /* Check for instruction number and average instruction number. */
+ loop->ninsns = tree_num_loop_insns (loop, &eni_size_weights);
+ loop->av_ninsns = tree_average_num_loop_insns (loop, &eni_size_weights);
+ unsigned nunroll = param_max_unrolled_insns / loop->ninsns;
+ unsigned nunroll_by_av = param_max_average_unrolled_insns / loop->av_ninsns;
+
+ if (nunroll > nunroll_by_av)
+ nunroll = nunroll_by_av;
+ if (nunroll > (unsigned) param_max_unroll_times)
+ nunroll = param_max_unroll_times;
+
+ if (targetm.loop_unroll_adjust_tree)
+ nunroll = targetm.loop_unroll_adjust_tree (nunroll, loop);
+
+ tree_niter_desc *niter_desc = NULL;
+ bool desc_need_delete = false;
+
+ /* Compute number of iterations if need. */
+ if (!desc)
+ {
+ /* For now, use single_dom_exit for simplicity. TODO: Support multiple
+ exits like find_simple_exit if we finds some profitable cases. */
+ niter_desc = XNEW (class tree_niter_desc);
+ gcc_assert (niter_desc);
+ edge exit = single_dom_exit (loop);
+ if (!exit || !number_of_iterations_exit (loop, exit, niter_desc, true))
+ {
+ XDELETE (niter_desc);
+ niter_desc = NULL;
+ }
+ else
+ desc_need_delete = true;
+ }
+ else
+ niter_desc = desc;
+
+ /* For checking the loop rolls enough to consider, also consult loop bounds
+ and profile. */
+ widest_int iterations;
+ if (!get_estimated_loop_iterations (loop, &iterations)
+ && !get_likely_max_loop_iterations (loop, &iterations))
+ iterations = 0;
+
+ if (niter_desc && niter_desc->assumptions)
+ {
+ /* For countable loops. */
+ if (!decide_uf_const_iter (loop, niter_desc, nunroll, &iterations)
+ && !decide_uf_runtime_iter (loop, niter_desc, nunroll, &iterations))
+ loop->estimated_uf = 1;
+ }
+ else
+ {
+ if (!decide_uf_stupid (loop, niter_desc, nunroll, &iterations))
+ loop->estimated_uf = 1;
+ }
+
+ if (desc_need_delete)
+ {
+ XDELETE (niter_desc);
+ niter_desc = NULL;
+ }
+}
+
diff --git a/gcc/tree-ssa-loop-manip.h b/gcc/tree-ssa-loop-manip.h
index 8263a67..46aec40 100644
--- a/gcc/tree-ssa-loop-manip.h
+++ b/gcc/tree-ssa-loop-manip.h
@@ -55,7 +55,6 @@ extern void tree_transform_and_unroll_loop (class loop *, unsigned,
extern void tree_unroll_loop (class loop *, unsigned,
edge, class tree_niter_desc *);
extern tree canonicalize_loop_ivs (class loop *, tree *, bool);
-
-
+extern void estimate_unroll_factor (class loop *, tree_niter_desc *);
#endif /* GCC_TREE_SSA_LOOP_MANIP_H */
diff --git a/gcc/tree-ssa-loop.c b/gcc/tree-ssa-loop.c
index fc9f083..d07422e 100644
--- a/gcc/tree-ssa-loop.c
+++ b/gcc/tree-ssa-loop.c
@@ -40,6 +40,7 @@ along with GCC; see the file COPYING3. If not see
#include "diagnostic-core.h"
#include "stringpool.h"
#include "attribs.h"
+#include "sreal.h"
/* A pass making sure loops are fixed up. */
@@ -790,5 +791,37 @@ tree_num_loop_insns (class loop *loop, eni_weights *weights)
return size;
}
+/* Computes an estimated number of insns on average per iteration in LOOP,
+ weighted by WEIGHTS. Refer to function average_num_loop_insns. */
+unsigned
+tree_average_num_loop_insns (class loop *loop, eni_weights *weights)
+{
+ basic_block *body = get_loop_body (loop);
+ gimple_stmt_iterator gsi;
+ unsigned bb_size, i;
+ sreal nsize = 0;
+
+ for (i = 0; i < loop->num_nodes; i++)
+ {
+ bb_size = 0;
+ for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
+ bb_size += estimate_num_insns (gsi_stmt (gsi), weights);
+ nsize += (sreal) bb_size
+ * body[i]->count.to_sreal_scale (loop->header->count);
+ /* Avoid overflows. */
+ if (nsize > 1000000)
+ {
+ free (body);
+ return 1000000;
+ }
+ }
+ free (body);
+
+ unsigned ret = nsize.to_int ();
+ if (!ret)
+ ret = 1; /* To avoid division by zero. */
+
+ return ret;
+}
diff --git a/gcc/tree-ssa-loop.h b/gcc/tree-ssa-loop.h
index e523de2..7bf6ba7 100644
--- a/gcc/tree-ssa-loop.h
+++ b/gcc/tree-ssa-loop.h
@@ -67,6 +67,8 @@ public:
extern bool for_each_index (tree *, bool (*) (tree, tree *, void *), void *);
extern char *get_lsm_tmp_name (tree ref, unsigned n, const char *suffix = NULL);
extern unsigned tree_num_loop_insns (class loop *, struct eni_weights *);
+extern unsigned tree_average_num_loop_insns (class loop *,
+ struct eni_weights *);
/* Returns the loop of the statement STMT. */
--
2.7.4
next prev parent reply other threads:[~2020-01-16 9:40 UTC|newest]
Thread overview: 45+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-01-16 9:41 [PATCH 0/4 GCC11] IVOPTs consider step cost for different forms when unrolling Kewen.Lin
2020-01-16 9:43 ` Kewen.Lin [this message]
2020-01-20 13:12 ` [PATCH 1/4 GCC11] Add middle-end unroll factor estimation Segher Boessenkool
2020-02-10 6:20 ` [PATCH 1/4 v2 " Kewen.Lin
2020-02-10 23:34 ` Segher Boessenkool
2020-02-11 6:51 ` [PATCH 1/4 v3 " Kewen.Lin
2020-02-11 7:00 ` Segher Boessenkool
2020-02-11 2:15 ` [PATCH 1/4 v2 " Jiufu Guo
2020-02-11 3:04 ` Kewen.Lin
2020-01-16 10:02 ` [PATCH 2/4 GCC11] Add target hook stride_dform_valid_p Kewen.Lin
2020-01-20 10:53 ` Richard Sandiford
2020-01-20 11:47 ` Richard Biener
2020-01-20 13:20 ` Segher Boessenkool
2020-02-25 9:46 ` Kewen.Lin
2020-03-02 11:09 ` Richard Sandiford
2020-03-03 12:26 ` Kewen.Lin
2020-05-13 5:50 ` Kewen.Lin
2020-05-28 2:17 ` Ping^1 [PATCH 2/4 V3] " Kewen.Lin
2020-05-28 10:54 ` Richard Sandiford
2020-01-16 10:06 ` [PATCH 3/4 GCC11] IVOPTs Consider cost_step on different forms during unrolling Kewen.Lin
2020-02-25 9:48 ` [PATCH 3/4 V2 " Kewen.Lin
2020-05-13 5:42 ` [PATCH 3/4 V3 " Kewen.Lin
2020-01-16 10:12 ` [PATCH 4/4 GCC11] rs6000: P9 D-form test cases Kewen.Lin
2020-01-20 13:37 ` Segher Boessenkool
2020-02-10 6:25 ` [PATCH 4/4 v2 " Kewen.Lin
2020-02-10 23:51 ` Segher Boessenkool
2020-01-20 13:03 ` [PATCH 0/4 GCC11] IVOPTs consider step cost for different forms when unrolling Segher Boessenkool
2020-02-10 6:17 ` Kewen.Lin
2020-02-10 21:29 ` Segher Boessenkool
2020-02-11 2:56 ` Kewen.Lin
2020-02-11 7:34 ` Richard Biener
2020-02-11 7:49 ` Segher Boessenkool
2020-02-11 8:01 ` Richard Biener
2020-02-11 12:46 ` Roman Zhuykov
2020-02-11 13:58 ` Richard Biener
2020-02-11 18:00 ` Segher Boessenkool
2020-02-12 8:07 ` Richard Biener
2020-02-12 21:53 ` Segher Boessenkool
2020-02-11 18:12 ` Segher Boessenkool
2020-02-12 8:13 ` Richard Biener
2020-02-12 10:02 ` Segher Boessenkool
2020-02-12 10:53 ` Richard Biener
2020-02-12 22:05 ` Segher Boessenkool
2020-02-13 7:48 ` Richard Biener
2020-02-13 9:02 ` Segher Boessenkool
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=131a3294-1951-3678-453b-085744366af6@linux.ibm.com \
--to=linkw@linux.ibm.com \
--cc=bin.cheng@linux.alibaba.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=rguenther@suse.de \
--cc=segher@kernel.crashing.org \
--cc=wschmidt@linux.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).