public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r11-3671] divmod: Match and expand DIVMOD even in some cases of constant divisor [PR97282]
@ 2020-10-06  8:33 Jakub Jelinek
  0 siblings, 0 replies; only message in thread
From: Jakub Jelinek @ 2020-10-06  8:33 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:bf510679bb3f9bfd6019666065016bb26a5b5466

commit r11-3671-gbf510679bb3f9bfd6019666065016bb26a5b5466
Author: Jakub Jelinek <jakub@redhat.com>
Date:   Tue Oct 6 10:32:22 2020 +0200

    divmod: Match and expand DIVMOD even in some cases of constant divisor [PR97282]
    
    As written in the comment, tree-ssa-math-opts.c wouldn't create a DIVMOD
    ifn call for division + modulo by constant for the fear that during
    expansion we could generate better code for those cases.
    If the divisoris a power of two, that is certainly the case always,
    but otherwise expand_divmod can punt in many cases, e.g. if the division
    type's precision is above HOST_BITS_PER_WIDE_INT, we don't even call
    choose_multiplier, because it works on HOST_WIDE_INTs (true, something
    we should fix eventually now that we have wide_ints), or if pre/post shift
    is larger than BITS_PER_WORD.
    
    So, the following patch recognizes DIVMOD with constant last argument even
    when it is unclear if expand_divmod will be able to optimize it, and then
    during DIVMOD expansion if the divisor is constant attempts to expand it as
    division + modulo and if they actually don't contain any libcalls or
    division/modulo, they are kept as is, otherwise that sequence is thrown away
    and divmod optab or libcall is used.
    
    2020-10-06  Jakub Jelinek  <jakub@redhat.com>
    
            PR rtl-optimization/97282
            * tree-ssa-math-opts.c (divmod_candidate_p): Don't return false for
            constant op2 if it is not a power of two and the type has precision
            larger than HOST_BITS_PER_WIDE_INT or BITS_PER_WORD.
            * internal-fn.c (contains_call_div_mod): New function.
            (expand_DIVMOD): If last argument is a constant, try to expand it as
            TRUNC_DIV_EXPR followed by TRUNC_MOD_EXPR, but if the sequence
            contains any calls or {,U}{DIV,MOD} rtxes, throw it away and use
            divmod optab or divmod libfunc.
    
            * gcc.target/i386/pr97282.c: New test.

Diff:
---
 gcc/internal-fn.c                       | 67 +++++++++++++++++++++++++++++++--
 gcc/testsuite/gcc.target/i386/pr97282.c | 25 ++++++++++++
 gcc/tree-ssa-math-opts.c                | 17 ++++++++-
 3 files changed, 105 insertions(+), 4 deletions(-)

diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index c8970820026..92cb3cd845a 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -50,6 +50,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-phinodes.h"
 #include "ssa-iterators.h"
 #include "explow.h"
+#include "rtl-iter.h"
 
 /* The names of each internal function, indexed by function number.  */
 const char *const internal_fn_name_array[] = {
@@ -2985,6 +2986,32 @@ expand_gather_load_optab_fn (internal_fn, gcall *stmt, direct_optab optab)
     emit_move_insn (lhs_rtx, ops[0].value);
 }
 
+/* Helper for expand_DIVMOD.  Return true if the sequence starting with
+   INSN contains any call insns or insns with {,U}{DIV,MOD} rtxes.  */
+
+static bool
+contains_call_div_mod (rtx_insn *insn)
+{
+  subrtx_iterator::array_type array;
+  for (; insn; insn = NEXT_INSN (insn))
+    if (CALL_P (insn))
+      return true;
+    else if (INSN_P (insn))
+      FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
+	switch (GET_CODE (*iter))
+	  {
+	  case CALL:
+	  case DIV:
+	  case UDIV:
+	  case MOD:
+	  case UMOD:
+	    return true;
+	  default:
+	    break;
+	  }
+  return false;
+ }
+
 /* Expand DIVMOD() using:
  a) optab handler for udivmod/sdivmod if it is available.
  b) If optab_handler doesn't exist, generate call to
@@ -3007,10 +3034,44 @@ expand_DIVMOD (internal_fn, gcall *call_stmt)
   rtx op1 = expand_normal (arg1);
   rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
 
-  rtx quotient, remainder, libfunc;
+  rtx quotient = NULL_RTX, remainder = NULL_RTX;
+  rtx_insn *insns = NULL;
+
+  if (TREE_CODE (arg1) == INTEGER_CST)
+    {
+      /* For DIVMOD by integral constants, there could be efficient code
+	 expanded inline e.g. using shifts and plus/minus.  Try to expand
+	 the division and modulo and if it emits any library calls or any
+	 {,U}{DIV,MOD} rtxes throw it away and use a divmod optab or
+	 divmod libcall.  */
+      struct separate_ops ops;
+      ops.code = TRUNC_DIV_EXPR;
+      ops.type = type;
+      ops.op0 = make_tree (ops.type, op0);
+      ops.op1 = arg1;
+      ops.op2 = NULL_TREE;
+      ops.location = gimple_location (call_stmt);
+      start_sequence ();
+      quotient = expand_expr_real_2 (&ops, NULL_RTX, mode, EXPAND_NORMAL);
+      if (contains_call_div_mod (get_insns ()))
+	quotient = NULL_RTX;
+      else
+	{
+	  ops.code = TRUNC_MOD_EXPR;
+	  remainder = expand_expr_real_2 (&ops, NULL_RTX, mode, EXPAND_NORMAL);
+	  if (contains_call_div_mod (get_insns ()))
+	    remainder = NULL_RTX;
+	}
+      if (remainder)
+	insns = get_insns ();
+      end_sequence ();
+    }
+
+  if (remainder)
+    emit_insn (insns);
 
   /* Check if optab_handler exists for divmod_optab for given mode.  */
-  if (optab_handler (tab, mode) != CODE_FOR_nothing)
+  else if (optab_handler (tab, mode) != CODE_FOR_nothing)
     {
       quotient = gen_reg_rtx (mode);
       remainder = gen_reg_rtx (mode);
@@ -3018,7 +3079,7 @@ expand_DIVMOD (internal_fn, gcall *call_stmt)
     }
 
   /* Generate call to divmod libfunc if it exists.  */
-  else if ((libfunc = optab_libfunc (tab, mode)) != NULL_RTX)
+  else if (rtx libfunc = optab_libfunc (tab, mode))
     targetm.expand_divmod_libfunc (libfunc, mode, op0, op1,
 				   &quotient, &remainder);
 
diff --git a/gcc/testsuite/gcc.target/i386/pr97282.c b/gcc/testsuite/gcc.target/i386/pr97282.c
new file mode 100644
index 00000000000..6fb10c8bb82
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr97282.c
@@ -0,0 +1,25 @@
+/* PR rtl-optimization/97282 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler "call\[^\n\r]*__udivmod\[dt]i4" } } */
+
+#ifdef __SIZEOF_INT128__
+typedef __uint128_t T;
+#else
+typedef unsigned long long T;
+#endif
+
+unsigned long
+foo (T x)
+{
+  if (x == 0)
+    return 0;
+
+  unsigned long ret = 0;
+  while (x > 0)
+    {
+      ret = ret + x % 10;
+      x = x / 10;
+    }
+  return ret;
+}
diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
index bdbb9d965f0..4927255d456 100644
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -3567,9 +3567,24 @@ divmod_candidate_p (gassign *stmt)
 
   /* Disable the transform if either is a constant, since division-by-constant
      may have specialized expansion.  */
-  if (CONSTANT_CLASS_P (op1) || CONSTANT_CLASS_P (op2))
+  if (CONSTANT_CLASS_P (op1))
     return false;
 
+  if (CONSTANT_CLASS_P (op2))
+    {
+      if (integer_pow2p (op2))
+	return false;
+
+      if (TYPE_PRECISION (type) <= HOST_BITS_PER_WIDE_INT
+	  && TYPE_PRECISION (type) <= BITS_PER_WORD)
+	return false;
+
+      /* If the divisor is not power of 2 and the precision wider than
+	 HWI, expand_divmod punts on that, so in that case it is better
+	 to use divmod optab or libfunc.  Similarly if choose_multiplier
+	 might need pre/post shifts of BITS_PER_WORD or more.  */
+    }
+
   /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
      expand using the [su]divv optabs.  */
   if (TYPE_OVERFLOW_TRAPS (type))


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2020-10-06  8:33 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-06  8:33 [gcc r11-3671] divmod: Match and expand DIVMOD even in some cases of constant divisor [PR97282] Jakub Jelinek

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).