public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r14-5613] tree-ssa-math-opts: popcount (X) == 1 to (X ^ (X - 1)) > (X - 1) optimization for direct optab [PR90
@ 2023-11-20 9:04 Jakub Jelinek
0 siblings, 0 replies; only message in thread
From: Jakub Jelinek @ 2023-11-20 9:04 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:103a3966bc7b68f91b6cd3c5beb330c4b8570c90
commit r14-5613-g103a3966bc7b68f91b6cd3c5beb330c4b8570c90
Author: Jakub Jelinek <jakub@redhat.com>
Date: Mon Nov 20 10:03:20 2023 +0100
tree-ssa-math-opts: popcount (X) == 1 to (X ^ (X - 1)) > (X - 1) optimization for direct optab [PR90693]
On Fri, Nov 17, 2023 at 03:01:04PM +0100, Jakub Jelinek wrote:
> As a follow-up, I'm considering changing in this routine the popcount
> call to IFN_POPCOUNT with 2 arguments and during expansion test costs.
Here is the follow-up which does the rtx costs testing.
2023-11-20 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/90693
* tree-ssa-math-opts.cc (match_single_bit_test): Mark POPCOUNT with
result only used in equality comparison against 1 with direct optab
support as .POPCOUNT call with 2 arguments.
* internal-fn.h (expand_POPCOUNT): Declare.
* internal-fn.def (DEF_INTERNAL_INT_EXT_FN): New macro, document it,
undefine at the end.
(POPCOUNT): Use it instead of DEF_INTERNAL_INT_FN.
* internal-fn.cc (DEF_INTERNAL_INT_EXT_FN): Define to nothing before
inclusion to define expanders.
(expand_POPCOUNT): New function.
Diff:
---
gcc/internal-fn.cc | 65 +++++++++++++++++++++++++++++++++++++++++++++++
gcc/internal-fn.def | 13 +++++++++-
gcc/internal-fn.h | 1 +
gcc/tree-ssa-math-opts.cc | 11 +++++++-
4 files changed, 88 insertions(+), 2 deletions(-)
diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index cd46875696a..cce73b3b2e0 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -4285,6 +4285,7 @@ set_edom_supported_p (void)
{ \
expand_##TYPE##_optab_fn (fn, stmt, OPTAB##_optab); \
}
+#define DEF_INTERNAL_INT_EXT_FN(CODE, FLAGS, OPTAB, TYPE)
#define DEF_INTERNAL_SIGNED_OPTAB_FN(CODE, FLAGS, SELECTOR, SIGNED_OPTAB, \
UNSIGNED_OPTAB, TYPE) \
static void \
@@ -5097,3 +5098,67 @@ expand_BITINTTOFLOAT (internal_fn, gcall *stmt)
if (val != target)
emit_move_insn (target, val);
}
+
+void
+expand_POPCOUNT (internal_fn fn, gcall *stmt)
+{
+ if (gimple_call_num_args (stmt) == 1)
+ {
+ expand_unary_optab_fn (fn, stmt, popcount_optab);
+ return;
+ }
+ /* If .POPCOUNT call has 2 arguments, match_single_bit_test marked it
+ because the result is only used in an equality comparison against 1.
+ Use rtx costs in that case to determine if .POPCOUNT (arg) == 1
+ or (arg ^ (arg - 1)) > arg - 1 is cheaper. */
+ bool speed_p = optimize_insn_for_speed_p ();
+ tree lhs = gimple_call_lhs (stmt);
+ tree arg = gimple_call_arg (stmt, 0);
+ tree type = TREE_TYPE (arg);
+ machine_mode mode = TYPE_MODE (type);
+ do_pending_stack_adjust ();
+ start_sequence ();
+ expand_unary_optab_fn (fn, stmt, popcount_optab);
+ rtx_insn *popcount_insns = get_insns ();
+ end_sequence ();
+ start_sequence ();
+ rtx plhs = expand_normal (lhs);
+ rtx pcmp = emit_store_flag (NULL_RTX, EQ, plhs, const1_rtx, mode, 0, 0);
+ if (pcmp == NULL_RTX)
+ {
+ fail:
+ end_sequence ();
+ emit_insn (popcount_insns);
+ return;
+ }
+ rtx_insn *popcount_cmp_insns = get_insns ();
+ end_sequence ();
+ start_sequence ();
+ rtx op0 = expand_normal (arg);
+ rtx argm1 = expand_simple_binop (mode, PLUS, op0, constm1_rtx, NULL_RTX,
+ 1, OPTAB_DIRECT);
+ if (argm1 == NULL_RTX)
+ goto fail;
+ rtx argxorargm1 = expand_simple_binop (mode, XOR, op0, argm1, NULL_RTX,
+ 1, OPTAB_DIRECT);
+ if (argxorargm1 == NULL_RTX)
+ goto fail;
+ rtx cmp = emit_store_flag (NULL_RTX, GTU, argxorargm1, argm1, mode, 1, 1);
+ if (cmp == NULL_RTX)
+ goto fail;
+ rtx_insn *cmp_insns = get_insns ();
+ end_sequence ();
+ unsigned popcount_cost = (seq_cost (popcount_insns, speed_p)
+ + seq_cost (popcount_cmp_insns, speed_p));
+ unsigned cmp_cost = seq_cost (cmp_insns, speed_p);
+ if (popcount_cost <= cmp_cost)
+ emit_insn (popcount_insns);
+ else
+ {
+ emit_insn (cmp_insns);
+ plhs = expand_normal (lhs);
+ if (GET_MODE (cmp) != GET_MODE (plhs))
+ cmp = convert_to_mode (GET_MODE (plhs), cmp, 1);
+ emit_move_insn (plhs, cmp);
+ }
+}
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index d645075de53..ba5e4ce272c 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -35,6 +35,7 @@ along with GCC; see the file COPYING3. If not see
DEF_INTERNAL_FLT_FN (NAME, FLAGS, OPTAB, TYPE)
DEF_INTERNAL_FLT_FLOATN_FN (NAME, FLAGS, OPTAB, TYPE)
DEF_INTERNAL_INT_FN (NAME, FLAGS, OPTAB, TYPE)
+ DEF_INTERNAL_INT_EXT_FN (NAME, FLAGS, OPTAB, TYPE)
DEF_INTERNAL_COND_FN (NAME, FLAGS, OPTAB, TYPE)
DEF_INTERNAL_SIGNED_COND_FN (NAME, FLAGS, OPTAB, TYPE)
DEF_INTERNAL_WIDENING_OPTAB_FN (NAME, FLAGS, SELECTOR, SOPTAB, UOPTAB,
@@ -100,6 +101,10 @@ along with GCC; see the file COPYING3. If not see
says that the function extends the C-level BUILT_IN_<NAME>{,L,LL,IMAX}
group of functions to any integral mode (including vector modes).
+ DEF_INTERNAL_INT_EXT_FN is like DEF_INTERNAL_INT_FN, except that it
+ has expand_##NAME defined in internal-fn.cc to override the
+ DEF_INTERNAL_INT_FN expansion behavior.
+
DEF_INTERNAL_WIDENING_OPTAB_FN is a wrapper that defines five internal
functions with DEF_INTERNAL_SIGNED_OPTAB_FN:
- one that describes a widening operation with the same number of elements
@@ -163,6 +168,11 @@ along with GCC; see the file COPYING3. If not see
DEF_INTERNAL_OPTAB_FN (NAME, FLAGS, OPTAB, TYPE)
#endif
+#ifndef DEF_INTERNAL_INT_EXT_FN
+#define DEF_INTERNAL_INT_EXT_FN(NAME, FLAGS, OPTAB, TYPE) \
+ DEF_INTERNAL_INT_FN (NAME, FLAGS, OPTAB, TYPE)
+#endif
+
#ifndef DEF_INTERNAL_WIDENING_OPTAB_FN
#define DEF_INTERNAL_WIDENING_OPTAB_FN(NAME, FLAGS, SELECTOR, SOPTAB, UOPTAB, TYPE) \
DEF_INTERNAL_SIGNED_OPTAB_FN (NAME, FLAGS, SELECTOR, SOPTAB, UOPTAB, TYPE) \
@@ -435,7 +445,7 @@ DEF_INTERNAL_INT_FN (CLZ, ECF_CONST | ECF_NOTHROW, clz, unary)
DEF_INTERNAL_INT_FN (CTZ, ECF_CONST | ECF_NOTHROW, ctz, unary)
DEF_INTERNAL_INT_FN (FFS, ECF_CONST | ECF_NOTHROW, ffs, unary)
DEF_INTERNAL_INT_FN (PARITY, ECF_CONST | ECF_NOTHROW, parity, unary)
-DEF_INTERNAL_INT_FN (POPCOUNT, ECF_CONST | ECF_NOTHROW, popcount, unary)
+DEF_INTERNAL_INT_EXT_FN (POPCOUNT, ECF_CONST | ECF_NOTHROW, popcount, unary)
DEF_INTERNAL_FN (GOMP_TARGET_REV, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (GOMP_USE_SIMT, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
@@ -578,6 +588,7 @@ DEF_INTERNAL_FN (BITINTTOFLOAT, ECF_PURE | ECF_LEAF, ". R . ")
#undef DEF_INTERNAL_WIDENING_OPTAB_FN
#undef DEF_INTERNAL_SIGNED_COND_FN
#undef DEF_INTERNAL_COND_FN
+#undef DEF_INTERNAL_INT_EXT_FN
#undef DEF_INTERNAL_INT_FN
#undef DEF_INTERNAL_FLT_FN
#undef DEF_INTERNAL_FLT_FLOATN_FN
diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
index 7d72f4db2d0..93598fac1c2 100644
--- a/gcc/internal-fn.h
+++ b/gcc/internal-fn.h
@@ -262,6 +262,7 @@ extern void expand_MULBITINT (internal_fn, gcall *);
extern void expand_DIVMODBITINT (internal_fn, gcall *);
extern void expand_FLOATTOBITINT (internal_fn, gcall *);
extern void expand_BITINTTOFLOAT (internal_fn, gcall *);
+extern void expand_POPCOUNT (internal_fn, gcall *);
extern bool vectorized_internal_fn_supported_p (internal_fn, tree);
diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
index 9bfc2d3f68f..bcc02aec6cb 100644
--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
@@ -5195,7 +5195,16 @@ match_single_bit_test (gimple_stmt_iterator *gsi, gimple *stmt)
if (!INTEGRAL_TYPE_P (type))
return;
if (direct_internal_fn_supported_p (IFN_POPCOUNT, type, OPTIMIZE_FOR_BOTH))
- return;
+ {
+ /* Tell expand_POPCOUNT the popcount result is only used in equality
+ comparison with one, so that it can decide based on rtx costs. */
+ gimple *g = gimple_build_call_internal (IFN_POPCOUNT, 2, arg,
+ integer_one_node);
+ gimple_call_set_lhs (g, gimple_call_lhs (call));
+ gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
+ gsi_replace (&gsi2, g, true);
+ return;
+ }
tree argm1 = make_ssa_name (type);
gimple *g = gimple_build_assign (argm1, PLUS_EXPR, arg,
build_int_cst (type, -1));
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2023-11-20 9:04 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-11-20 9:04 [gcc r14-5613] tree-ssa-math-opts: popcount (X) == 1 to (X ^ (X - 1)) > (X - 1) optimization for direct optab [PR90 Jakub Jelinek
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).