public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r14-5613] tree-ssa-math-opts: popcount (X) == 1 to (X ^ (X - 1)) > (X - 1) optimization for direct optab [PR90
@ 2023-11-20  9:04 Jakub Jelinek
  0 siblings, 0 replies; only message in thread
From: Jakub Jelinek @ 2023-11-20  9:04 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:103a3966bc7b68f91b6cd3c5beb330c4b8570c90

commit r14-5613-g103a3966bc7b68f91b6cd3c5beb330c4b8570c90
Author: Jakub Jelinek <jakub@redhat.com>
Date:   Mon Nov 20 10:03:20 2023 +0100

    tree-ssa-math-opts: popcount (X) == 1 to (X ^ (X - 1)) > (X - 1) optimization for direct optab [PR90693]
    
    On Fri, Nov 17, 2023 at 03:01:04PM +0100, Jakub Jelinek wrote:
    > As a follow-up, I'm considering changing in this routine the popcount
    > call to IFN_POPCOUNT with 2 arguments and during expansion test costs.
    
    Here is the follow-up which does the rtx costs testing.
    
    2023-11-20  Jakub Jelinek  <jakub@redhat.com>
    
            PR tree-optimization/90693
            * tree-ssa-math-opts.cc (match_single_bit_test): Mark POPCOUNT with
            result only used in equality comparison against 1 with direct optab
            support as .POPCOUNT call with 2 arguments.
            * internal-fn.h (expand_POPCOUNT): Declare.
            * internal-fn.def (DEF_INTERNAL_INT_EXT_FN): New macro, document it,
            undefine at the end.
            (POPCOUNT): Use it instead of DEF_INTERNAL_INT_FN.
            * internal-fn.cc (DEF_INTERNAL_INT_EXT_FN): Define to nothing before
            inclusion to define expanders.
            (expand_POPCOUNT): New function.

Diff:
---
 gcc/internal-fn.cc        | 65 +++++++++++++++++++++++++++++++++++++++++++++++
 gcc/internal-fn.def       | 13 +++++++++-
 gcc/internal-fn.h         |  1 +
 gcc/tree-ssa-math-opts.cc | 11 +++++++-
 4 files changed, 88 insertions(+), 2 deletions(-)

diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index cd46875696a..cce73b3b2e0 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -4285,6 +4285,7 @@ set_edom_supported_p (void)
   {							\
     expand_##TYPE##_optab_fn (fn, stmt, OPTAB##_optab);	\
   }
+#define DEF_INTERNAL_INT_EXT_FN(CODE, FLAGS, OPTAB, TYPE)
 #define DEF_INTERNAL_SIGNED_OPTAB_FN(CODE, FLAGS, SELECTOR, SIGNED_OPTAB, \
 				     UNSIGNED_OPTAB, TYPE)		\
   static void								\
@@ -5097,3 +5098,67 @@ expand_BITINTTOFLOAT (internal_fn, gcall *stmt)
   if (val != target)
     emit_move_insn (target, val);
 }
+
+void
+expand_POPCOUNT (internal_fn fn, gcall *stmt)
+{
+  if (gimple_call_num_args (stmt) == 1)
+    {
+      expand_unary_optab_fn (fn, stmt, popcount_optab);
+      return;
+    }
+  /* If .POPCOUNT call has 2 arguments, match_single_bit_test marked it
+     because the result is only used in an equality comparison against 1.
+     Use rtx costs in that case to determine if .POPCOUNT (arg) == 1
+     or (arg ^ (arg - 1)) > arg - 1 is cheaper.  */
+  bool speed_p = optimize_insn_for_speed_p ();
+  tree lhs = gimple_call_lhs (stmt);
+  tree arg = gimple_call_arg (stmt, 0);
+  tree type = TREE_TYPE (arg);
+  machine_mode mode = TYPE_MODE (type);
+  do_pending_stack_adjust ();
+  start_sequence ();
+  expand_unary_optab_fn (fn, stmt, popcount_optab);
+  rtx_insn *popcount_insns = get_insns ();
+  end_sequence ();
+  start_sequence ();
+  rtx plhs = expand_normal (lhs);
+  rtx pcmp = emit_store_flag (NULL_RTX, EQ, plhs, const1_rtx, mode, 0, 0);
+  if (pcmp == NULL_RTX)
+    {
+    fail:
+      end_sequence ();
+      emit_insn (popcount_insns);
+      return;
+    }
+  rtx_insn *popcount_cmp_insns = get_insns ();
+  end_sequence ();
+  start_sequence ();
+  rtx op0 = expand_normal (arg);
+  rtx argm1 = expand_simple_binop (mode, PLUS, op0, constm1_rtx, NULL_RTX,
+				   1, OPTAB_DIRECT);
+  if (argm1 == NULL_RTX)
+    goto fail;
+  rtx argxorargm1 = expand_simple_binop (mode, XOR, op0, argm1, NULL_RTX,
+					 1, OPTAB_DIRECT);
+  if (argxorargm1 == NULL_RTX)
+    goto fail;
+  rtx cmp = emit_store_flag (NULL_RTX, GTU, argxorargm1, argm1, mode, 1, 1);
+  if (cmp == NULL_RTX)
+    goto fail;
+  rtx_insn *cmp_insns = get_insns ();
+  end_sequence ();
+  unsigned popcount_cost = (seq_cost (popcount_insns, speed_p)
+			    + seq_cost (popcount_cmp_insns, speed_p));
+  unsigned cmp_cost = seq_cost (cmp_insns, speed_p);
+  if (popcount_cost <= cmp_cost)
+    emit_insn (popcount_insns);
+  else
+    {
+      emit_insn (cmp_insns);
+      plhs = expand_normal (lhs);
+      if (GET_MODE (cmp) != GET_MODE (plhs))
+	cmp = convert_to_mode (GET_MODE (plhs), cmp, 1);
+      emit_move_insn (plhs, cmp);
+    }
+}
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index d645075de53..ba5e4ce272c 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -35,6 +35,7 @@ along with GCC; see the file COPYING3.  If not see
      DEF_INTERNAL_FLT_FN (NAME, FLAGS, OPTAB, TYPE)
      DEF_INTERNAL_FLT_FLOATN_FN (NAME, FLAGS, OPTAB, TYPE)
      DEF_INTERNAL_INT_FN (NAME, FLAGS, OPTAB, TYPE)
+     DEF_INTERNAL_INT_EXT_FN (NAME, FLAGS, OPTAB, TYPE)
      DEF_INTERNAL_COND_FN (NAME, FLAGS, OPTAB, TYPE)
      DEF_INTERNAL_SIGNED_COND_FN (NAME, FLAGS, OPTAB, TYPE)
      DEF_INTERNAL_WIDENING_OPTAB_FN (NAME, FLAGS, SELECTOR, SOPTAB, UOPTAB,
@@ -100,6 +101,10 @@ along with GCC; see the file COPYING3.  If not see
    says that the function extends the C-level BUILT_IN_<NAME>{,L,LL,IMAX}
    group of functions to any integral mode (including vector modes).
 
+   DEF_INTERNAL_INT_EXT_FN is like DEF_INTERNAL_INT_FN, except that it
+   has expand_##NAME defined in internal-fn.cc to override the
+   DEF_INTERNAL_INT_FN expansion behavior.
+
    DEF_INTERNAL_WIDENING_OPTAB_FN is a wrapper that defines five internal
    functions with DEF_INTERNAL_SIGNED_OPTAB_FN:
    - one that describes a widening operation with the same number of elements
@@ -163,6 +168,11 @@ along with GCC; see the file COPYING3.  If not see
   DEF_INTERNAL_OPTAB_FN (NAME, FLAGS, OPTAB, TYPE)
 #endif
 
+#ifndef DEF_INTERNAL_INT_EXT_FN
+#define DEF_INTERNAL_INT_EXT_FN(NAME, FLAGS, OPTAB, TYPE) \
+  DEF_INTERNAL_INT_FN (NAME, FLAGS, OPTAB, TYPE)
+#endif
+
 #ifndef DEF_INTERNAL_WIDENING_OPTAB_FN
 #define DEF_INTERNAL_WIDENING_OPTAB_FN(NAME, FLAGS, SELECTOR, SOPTAB, UOPTAB, TYPE)		    \
   DEF_INTERNAL_SIGNED_OPTAB_FN (NAME, FLAGS, SELECTOR, SOPTAB, UOPTAB, TYPE)			    \
@@ -435,7 +445,7 @@ DEF_INTERNAL_INT_FN (CLZ, ECF_CONST | ECF_NOTHROW, clz, unary)
 DEF_INTERNAL_INT_FN (CTZ, ECF_CONST | ECF_NOTHROW, ctz, unary)
 DEF_INTERNAL_INT_FN (FFS, ECF_CONST | ECF_NOTHROW, ffs, unary)
 DEF_INTERNAL_INT_FN (PARITY, ECF_CONST | ECF_NOTHROW, parity, unary)
-DEF_INTERNAL_INT_FN (POPCOUNT, ECF_CONST | ECF_NOTHROW, popcount, unary)
+DEF_INTERNAL_INT_EXT_FN (POPCOUNT, ECF_CONST | ECF_NOTHROW, popcount, unary)
 
 DEF_INTERNAL_FN (GOMP_TARGET_REV, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (GOMP_USE_SIMT, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
@@ -578,6 +588,7 @@ DEF_INTERNAL_FN (BITINTTOFLOAT, ECF_PURE | ECF_LEAF, ". R . ")
 #undef DEF_INTERNAL_WIDENING_OPTAB_FN
 #undef DEF_INTERNAL_SIGNED_COND_FN
 #undef DEF_INTERNAL_COND_FN
+#undef DEF_INTERNAL_INT_EXT_FN
 #undef DEF_INTERNAL_INT_FN
 #undef DEF_INTERNAL_FLT_FN
 #undef DEF_INTERNAL_FLT_FLOATN_FN
diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
index 7d72f4db2d0..93598fac1c2 100644
--- a/gcc/internal-fn.h
+++ b/gcc/internal-fn.h
@@ -262,6 +262,7 @@ extern void expand_MULBITINT (internal_fn, gcall *);
 extern void expand_DIVMODBITINT (internal_fn, gcall *);
 extern void expand_FLOATTOBITINT (internal_fn, gcall *);
 extern void expand_BITINTTOFLOAT (internal_fn, gcall *);
+extern void expand_POPCOUNT (internal_fn, gcall *);
 
 extern bool vectorized_internal_fn_supported_p (internal_fn, tree);
 
diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
index 9bfc2d3f68f..bcc02aec6cb 100644
--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
@@ -5195,7 +5195,16 @@ match_single_bit_test (gimple_stmt_iterator *gsi, gimple *stmt)
   if (!INTEGRAL_TYPE_P (type))
     return;
   if (direct_internal_fn_supported_p (IFN_POPCOUNT, type, OPTIMIZE_FOR_BOTH))
-    return;
+    {
+      /* Tell expand_POPCOUNT the popcount result is only used in equality
+	 comparison with one, so that it can decide based on rtx costs.  */
+      gimple *g = gimple_build_call_internal (IFN_POPCOUNT, 2, arg,
+					      integer_one_node);
+      gimple_call_set_lhs (g, gimple_call_lhs (call));
+      gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
+      gsi_replace (&gsi2, g, true);
+      return;
+    }
   tree argm1 = make_ssa_name (type);
   gimple *g = gimple_build_assign (argm1, PLUS_EXPR, arg,
 				   build_int_cst (type, -1));

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-11-20  9:04 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-11-20  9:04 [gcc r14-5613] tree-ssa-math-opts: popcount (X) == 1 to (X ^ (X - 1)) > (X - 1) optimization for direct optab [PR90 Jakub Jelinek

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).