* [PATCH] divmod: Match and expand DIVMOD even in some cases of constant divisor [PR97282]
@ 2020-10-06 7:37 Jakub Jelinek
2020-10-06 8:13 ` Richard Biener
0 siblings, 1 reply; 4+ messages in thread
From: Jakub Jelinek @ 2020-10-06 7:37 UTC (permalink / raw)
To: Richard Biener; +Cc: gcc-patches
Hi!
As written in the comment, tree-ssa-math-opts.c wouldn't create a DIVMOD
ifn call for division + modulo by constant for the fear that during
expansion we could generate better code for those cases.
If the divisoris a power of two, that is certainly the case always,
but otherwise expand_divmod can punt in many cases, e.g. if the division
type's precision is above HOST_BITS_PER_WIDE_INT, we don't even call
choose_multiplier, because it works on HOST_WIDE_INTs (true, something
we should fix eventually now that we have wide_ints), or if pre/post shift
is larger than BITS_PER_WORD.
So, the following patch recognizes DIVMOD with constant last argument even
when it is unclear if expand_divmod will be able to optimize it, and then
during DIVMOD expansion if the divisor is constant attempts to expand it as
division + modulo and if they actually don't contain any libcalls or
division/modulo, they are kept as is, otherwise that sequence is thrown away
and divmod optab or libcall is used.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
2020-10-06 Jakub Jelinek <jakub@redhat.com>
PR rtl-optimization/97282
* tree-ssa-math-opts.c (divmod_candidate_p): Don't return false for
constant op2 if it is not a power of two and the type has precision
larger than HOST_BITS_PER_WIDE_INT or BITS_PER_WORD.
* internal-fn.c (contains_call_div_mod): New function.
(expand_DIVMOD): If last argument is a constant, try to expand it as
TRUNC_DIV_EXPR followed by TRUNC_MOD_EXPR, but if the sequence
contains any calls or {,U}{DIV,MOD} rtxes, throw it away and use
divmod optab or divmod libfunc.
* gcc.target/i386/pr97282.c: New test.
--- gcc/tree-ssa-math-opts.c.jj 2020-10-01 10:40:10.104755999 +0200
+++ gcc/tree-ssa-math-opts.c 2020-10-05 13:51:54.476628287 +0200
@@ -3567,9 +3567,24 @@ divmod_candidate_p (gassign *stmt)
/* Disable the transform if either is a constant, since division-by-constant
may have specialized expansion. */
- if (CONSTANT_CLASS_P (op1) || CONSTANT_CLASS_P (op2))
+ if (CONSTANT_CLASS_P (op1))
return false;
+ if (CONSTANT_CLASS_P (op2))
+ {
+ if (integer_pow2p (op2))
+ return false;
+
+ if (TYPE_PRECISION (type) <= HOST_BITS_PER_WIDE_INT
+ && TYPE_PRECISION (type) <= BITS_PER_WORD)
+ return false;
+
+ /* If the divisor is not power of 2 and the precision wider than
+ HWI, expand_divmod punts on that, so in that case it is better
+ to use divmod optab or libfunc. Similarly if choose_multiplier
+ might need pre/post shifts of BITS_PER_WORD or more. */
+ }
+
/* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
expand using the [su]divv optabs. */
if (TYPE_OVERFLOW_TRAPS (type))
--- gcc/internal-fn.c.jj 2020-10-02 10:36:43.272290992 +0200
+++ gcc/internal-fn.c 2020-10-05 15:15:12.498349327 +0200
@@ -50,6 +50,7 @@ along with GCC; see the file COPYING3.
#include "tree-phinodes.h"
#include "ssa-iterators.h"
#include "explow.h"
+#include "rtl-iter.h"
/* The names of each internal function, indexed by function number. */
const char *const internal_fn_name_array[] = {
@@ -2985,6 +2986,32 @@ expand_gather_load_optab_fn (internal_fn
emit_move_insn (lhs_rtx, ops[0].value);
}
+/* Helper for expand_DIVMOD. Return true if the sequence starting with
+ INSN contains any call insns or insns with {,U}{DIV,MOD} rtxes. */
+
+static bool
+contains_call_div_mod (rtx_insn *insn)
+{
+ subrtx_iterator::array_type array;
+ for (; insn; insn = NEXT_INSN (insn))
+ if (CALL_P (insn))
+ return true;
+ else if (INSN_P (insn))
+ FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
+ switch (GET_CODE (*iter))
+ {
+ case CALL:
+ case DIV:
+ case UDIV:
+ case MOD:
+ case UMOD:
+ return true;
+ default:
+ break;
+ }
+ return false;
+ }
+
/* Expand DIVMOD() using:
a) optab handler for udivmod/sdivmod if it is available.
b) If optab_handler doesn't exist, generate call to
@@ -3007,10 +3034,44 @@ expand_DIVMOD (internal_fn, gcall *call_
rtx op1 = expand_normal (arg1);
rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
- rtx quotient, remainder, libfunc;
+ rtx quotient = NULL_RTX, remainder = NULL_RTX;
+ rtx_insn *insns = NULL;
+
+ if (TREE_CODE (arg1) == INTEGER_CST)
+ {
+ /* For DIVMOD by integral constants, there could be efficient code
+ expanded inline e.g. using shifts and plus/minus. Try to expand
+ the division and modulo and if it emits any library calls or any
+ {,U}{DIV,MOD} rtxes throw it away and use a divmod optab or
+ divmod libcall. */
+ struct separate_ops ops;
+ ops.code = TRUNC_DIV_EXPR;
+ ops.type = type;
+ ops.op0 = make_tree (ops.type, op0);
+ ops.op1 = arg1;
+ ops.op2 = NULL_TREE;
+ ops.location = gimple_location (call_stmt);
+ start_sequence ();
+ quotient = expand_expr_real_2 (&ops, NULL_RTX, mode, EXPAND_NORMAL);
+ if (contains_call_div_mod (get_insns ()))
+ quotient = NULL_RTX;
+ else
+ {
+ ops.code = TRUNC_MOD_EXPR;
+ remainder = expand_expr_real_2 (&ops, NULL_RTX, mode, EXPAND_NORMAL);
+ if (contains_call_div_mod (get_insns ()))
+ remainder = NULL_RTX;
+ }
+ if (remainder)
+ insns = get_insns ();
+ end_sequence ();
+ }
+
+ if (remainder)
+ emit_insn (insns);
/* Check if optab_handler exists for divmod_optab for given mode. */
- if (optab_handler (tab, mode) != CODE_FOR_nothing)
+ else if (optab_handler (tab, mode) != CODE_FOR_nothing)
{
quotient = gen_reg_rtx (mode);
remainder = gen_reg_rtx (mode);
@@ -3018,7 +3079,7 @@ expand_DIVMOD (internal_fn, gcall *call_
}
/* Generate call to divmod libfunc if it exists. */
- else if ((libfunc = optab_libfunc (tab, mode)) != NULL_RTX)
+ else if (rtx libfunc = optab_libfunc (tab, mode))
targetm.expand_divmod_libfunc (libfunc, mode, op0, op1,
"ient, &remainder);
--- gcc/testsuite/gcc.target/i386/pr97282.c.jj 2020-10-05 15:31:19.500363710 +0200
+++ gcc/testsuite/gcc.target/i386/pr97282.c 2020-10-05 15:28:51.864499619 +0200
@@ -0,0 +1,25 @@
+/* PR rtl-optimization/97282 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler "call\[^\n\r]*__udivmod\[dt]i4" } } */
+
+#ifdef __SIZEOF_INT128__
+typedef __uint128_t T;
+#else
+typedef unsigned long long T;
+#endif
+
+unsigned long
+foo (T x)
+{
+ if (x == 0)
+ return 0;
+
+ unsigned long ret = 0;
+ while (x > 0)
+ {
+ ret = ret + x % 10;
+ x = x / 10;
+ }
+ return ret;
+}
Jakub
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] divmod: Match and expand DIVMOD even in some cases of constant divisor [PR97282]
2020-10-06 7:37 [PATCH] divmod: Match and expand DIVMOD even in some cases of constant divisor [PR97282] Jakub Jelinek
@ 2020-10-06 8:13 ` Richard Biener
2020-10-06 9:42 ` Christophe Lyon
0 siblings, 1 reply; 4+ messages in thread
From: Richard Biener @ 2020-10-06 8:13 UTC (permalink / raw)
To: Jakub Jelinek; +Cc: gcc-patches
On Tue, 6 Oct 2020, Jakub Jelinek wrote:
> Hi!
>
> As written in the comment, tree-ssa-math-opts.c wouldn't create a DIVMOD
> ifn call for division + modulo by constant for the fear that during
> expansion we could generate better code for those cases.
> If the divisoris a power of two, that is certainly the case always,
> but otherwise expand_divmod can punt in many cases, e.g. if the division
> type's precision is above HOST_BITS_PER_WIDE_INT, we don't even call
> choose_multiplier, because it works on HOST_WIDE_INTs (true, something
> we should fix eventually now that we have wide_ints), or if pre/post shift
> is larger than BITS_PER_WORD.
>
> So, the following patch recognizes DIVMOD with constant last argument even
> when it is unclear if expand_divmod will be able to optimize it, and then
> during DIVMOD expansion if the divisor is constant attempts to expand it as
> division + modulo and if they actually don't contain any libcalls or
> division/modulo, they are kept as is, otherwise that sequence is thrown away
> and divmod optab or libcall is used.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
OK.
Richard.
> 2020-10-06 Jakub Jelinek <jakub@redhat.com>
>
> PR rtl-optimization/97282
> * tree-ssa-math-opts.c (divmod_candidate_p): Don't return false for
> constant op2 if it is not a power of two and the type has precision
> larger than HOST_BITS_PER_WIDE_INT or BITS_PER_WORD.
> * internal-fn.c (contains_call_div_mod): New function.
> (expand_DIVMOD): If last argument is a constant, try to expand it as
> TRUNC_DIV_EXPR followed by TRUNC_MOD_EXPR, but if the sequence
> contains any calls or {,U}{DIV,MOD} rtxes, throw it away and use
> divmod optab or divmod libfunc.
>
> * gcc.target/i386/pr97282.c: New test.
>
> --- gcc/tree-ssa-math-opts.c.jj 2020-10-01 10:40:10.104755999 +0200
> +++ gcc/tree-ssa-math-opts.c 2020-10-05 13:51:54.476628287 +0200
> @@ -3567,9 +3567,24 @@ divmod_candidate_p (gassign *stmt)
>
> /* Disable the transform if either is a constant, since division-by-constant
> may have specialized expansion. */
> - if (CONSTANT_CLASS_P (op1) || CONSTANT_CLASS_P (op2))
> + if (CONSTANT_CLASS_P (op1))
> return false;
>
> + if (CONSTANT_CLASS_P (op2))
> + {
> + if (integer_pow2p (op2))
> + return false;
> +
> + if (TYPE_PRECISION (type) <= HOST_BITS_PER_WIDE_INT
> + && TYPE_PRECISION (type) <= BITS_PER_WORD)
> + return false;
> +
> + /* If the divisor is not power of 2 and the precision wider than
> + HWI, expand_divmod punts on that, so in that case it is better
> + to use divmod optab or libfunc. Similarly if choose_multiplier
> + might need pre/post shifts of BITS_PER_WORD or more. */
> + }
> +
> /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
> expand using the [su]divv optabs. */
> if (TYPE_OVERFLOW_TRAPS (type))
> --- gcc/internal-fn.c.jj 2020-10-02 10:36:43.272290992 +0200
> +++ gcc/internal-fn.c 2020-10-05 15:15:12.498349327 +0200
> @@ -50,6 +50,7 @@ along with GCC; see the file COPYING3.
> #include "tree-phinodes.h"
> #include "ssa-iterators.h"
> #include "explow.h"
> +#include "rtl-iter.h"
>
> /* The names of each internal function, indexed by function number. */
> const char *const internal_fn_name_array[] = {
> @@ -2985,6 +2986,32 @@ expand_gather_load_optab_fn (internal_fn
> emit_move_insn (lhs_rtx, ops[0].value);
> }
>
> +/* Helper for expand_DIVMOD. Return true if the sequence starting with
> + INSN contains any call insns or insns with {,U}{DIV,MOD} rtxes. */
> +
> +static bool
> +contains_call_div_mod (rtx_insn *insn)
> +{
> + subrtx_iterator::array_type array;
> + for (; insn; insn = NEXT_INSN (insn))
> + if (CALL_P (insn))
> + return true;
> + else if (INSN_P (insn))
> + FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
> + switch (GET_CODE (*iter))
> + {
> + case CALL:
> + case DIV:
> + case UDIV:
> + case MOD:
> + case UMOD:
> + return true;
> + default:
> + break;
> + }
> + return false;
> + }
> +
> /* Expand DIVMOD() using:
> a) optab handler for udivmod/sdivmod if it is available.
> b) If optab_handler doesn't exist, generate call to
> @@ -3007,10 +3034,44 @@ expand_DIVMOD (internal_fn, gcall *call_
> rtx op1 = expand_normal (arg1);
> rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
>
> - rtx quotient, remainder, libfunc;
> + rtx quotient = NULL_RTX, remainder = NULL_RTX;
> + rtx_insn *insns = NULL;
> +
> + if (TREE_CODE (arg1) == INTEGER_CST)
> + {
> + /* For DIVMOD by integral constants, there could be efficient code
> + expanded inline e.g. using shifts and plus/minus. Try to expand
> + the division and modulo and if it emits any library calls or any
> + {,U}{DIV,MOD} rtxes throw it away and use a divmod optab or
> + divmod libcall. */
> + struct separate_ops ops;
> + ops.code = TRUNC_DIV_EXPR;
> + ops.type = type;
> + ops.op0 = make_tree (ops.type, op0);
> + ops.op1 = arg1;
> + ops.op2 = NULL_TREE;
> + ops.location = gimple_location (call_stmt);
> + start_sequence ();
> + quotient = expand_expr_real_2 (&ops, NULL_RTX, mode, EXPAND_NORMAL);
> + if (contains_call_div_mod (get_insns ()))
> + quotient = NULL_RTX;
> + else
> + {
> + ops.code = TRUNC_MOD_EXPR;
> + remainder = expand_expr_real_2 (&ops, NULL_RTX, mode, EXPAND_NORMAL);
> + if (contains_call_div_mod (get_insns ()))
> + remainder = NULL_RTX;
> + }
> + if (remainder)
> + insns = get_insns ();
> + end_sequence ();
> + }
> +
> + if (remainder)
> + emit_insn (insns);
>
> /* Check if optab_handler exists for divmod_optab for given mode. */
> - if (optab_handler (tab, mode) != CODE_FOR_nothing)
> + else if (optab_handler (tab, mode) != CODE_FOR_nothing)
> {
> quotient = gen_reg_rtx (mode);
> remainder = gen_reg_rtx (mode);
> @@ -3018,7 +3079,7 @@ expand_DIVMOD (internal_fn, gcall *call_
> }
>
> /* Generate call to divmod libfunc if it exists. */
> - else if ((libfunc = optab_libfunc (tab, mode)) != NULL_RTX)
> + else if (rtx libfunc = optab_libfunc (tab, mode))
> targetm.expand_divmod_libfunc (libfunc, mode, op0, op1,
> "ient, &remainder);
>
> --- gcc/testsuite/gcc.target/i386/pr97282.c.jj 2020-10-05 15:31:19.500363710 +0200
> +++ gcc/testsuite/gcc.target/i386/pr97282.c 2020-10-05 15:28:51.864499619 +0200
> @@ -0,0 +1,25 @@
> +/* PR rtl-optimization/97282 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +/* { dg-final { scan-assembler "call\[^\n\r]*__udivmod\[dt]i4" } } */
> +
> +#ifdef __SIZEOF_INT128__
> +typedef __uint128_t T;
> +#else
> +typedef unsigned long long T;
> +#endif
> +
> +unsigned long
> +foo (T x)
> +{
> + if (x == 0)
> + return 0;
> +
> + unsigned long ret = 0;
> + while (x > 0)
> + {
> + ret = ret + x % 10;
> + x = x / 10;
> + }
> + return ret;
> +}
>
> Jakub
>
>
--
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imend
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] divmod: Match and expand DIVMOD even in some cases of constant divisor [PR97282]
2020-10-06 8:13 ` Richard Biener
@ 2020-10-06 9:42 ` Christophe Lyon
2020-10-07 15:06 ` Christophe Lyon
0 siblings, 1 reply; 4+ messages in thread
From: Christophe Lyon @ 2020-10-06 9:42 UTC (permalink / raw)
To: Richard Biener; +Cc: Jakub Jelinek, gcc Patches
Hi Jakub,
On Tue, 6 Oct 2020 at 10:13, Richard Biener <rguenther@suse.de> wrote:
>
> On Tue, 6 Oct 2020, Jakub Jelinek wrote:
>
> > Hi!
> >
> > As written in the comment, tree-ssa-math-opts.c wouldn't create a DIVMOD
> > ifn call for division + modulo by constant for the fear that during
> > expansion we could generate better code for those cases.
> > If the divisoris a power of two, that is certainly the case always,
> > but otherwise expand_divmod can punt in many cases, e.g. if the division
> > type's precision is above HOST_BITS_PER_WIDE_INT, we don't even call
> > choose_multiplier, because it works on HOST_WIDE_INTs (true, something
> > we should fix eventually now that we have wide_ints), or if pre/post shift
> > is larger than BITS_PER_WORD.
> >
> > So, the following patch recognizes DIVMOD with constant last argument even
> > when it is unclear if expand_divmod will be able to optimize it, and then
> > during DIVMOD expansion if the divisor is constant attempts to expand it as
> > division + modulo and if they actually don't contain any libcalls or
> > division/modulo, they are kept as is, otherwise that sequence is thrown away
> > and divmod optab or libcall is used.
> >
> > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> OK.
>
> Richard.
>
> > 2020-10-06 Jakub Jelinek <jakub@redhat.com>
> >
> > PR rtl-optimization/97282
> > * tree-ssa-math-opts.c (divmod_candidate_p): Don't return false for
> > constant op2 if it is not a power of two and the type has precision
> > larger than HOST_BITS_PER_WIDE_INT or BITS_PER_WORD.
> > * internal-fn.c (contains_call_div_mod): New function.
> > (expand_DIVMOD): If last argument is a constant, try to expand it as
> > TRUNC_DIV_EXPR followed by TRUNC_MOD_EXPR, but if the sequence
> > contains any calls or {,U}{DIV,MOD} rtxes, throw it away and use
> > divmod optab or divmod libfunc.
> >
This patch causes ICEs on arm while building newlib or glibc
For instance with newlib when compiling vfwprintf.o:
during RTL pass: expand
In file included from
/tmp/1435347_7.tmpdir/aci-gcc-fsf/sources/newlib/newlib/libc/stdio/vfprintf.c:153:
/tmp/1435347_7.tmpdir/aci-gcc-fsf/sources/newlib/newlib/libc/include/stdio.h:
In function '_vfprintf_r':
/tmp/1435347_7.tmpdir/aci-gcc-fsf/sources/newlib/newlib/libc/include/stdio.h:503:9:
internal compiler error: in int_mode_for_mode, at stor-layout.c:404
503 | int _vfprintf_r (struct _reent *, FILE *__restrict, const
char *__restrict, __VALIST)
| ^~~~~~~~~~~
0xaed4e3 int_mode_for_mode(machine_mode)
/tmp/1435347_7.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/stor-layout.c:404
0x7ff73d emit_move_via_integer
/tmp/1435347_7.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/expr.c:3425
0x808f2d emit_move_insn_1(rtx_def*, rtx_def*)
/tmp/1435347_7.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/expr.c:3793
0x8092d7 emit_move_insn(rtx_def*, rtx_def*)
/tmp/1435347_7.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/expr.c:3935
0x6e703f emit_library_call_value_1(int, rtx_def*, rtx_def*,
libcall_type, machine_mode, int, std::pair<rtx_def*, machine_mode>*)
/tmp/1435347_7.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/calls.c:5601
0xdff642 emit_library_call_value
/tmp/1435347_7.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/rtl.h:4258
0xdff642 arm_expand_divmod_libfunc
/tmp/1435347_7.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/config/arm/arm.c:33256
0x8c69af expand_DIVMOD
/tmp/1435347_7.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/internal-fn.c:3084
0x7021b7 expand_call_stmt
/tmp/1435347_7.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/cfgexpand.c:2612
0x7021b7 expand_gimple_stmt_1
/tmp/1435347_7.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/cfgexpand.c:3686
0x7021b7 expand_gimple_stmt
/tmp/1435347_7.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/cfgexpand.c:3851
0x702cfd expand_gimple_basic_block
/tmp/1435347_7.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/cfgexpand.c:5892
0x70533e execute
/tmp/1435347_7.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/cfgexpand.c:6576
Christophe
> > * gcc.target/i386/pr97282.c: New test.
> >
> > --- gcc/tree-ssa-math-opts.c.jj 2020-10-01 10:40:10.104755999 +0200
> > +++ gcc/tree-ssa-math-opts.c 2020-10-05 13:51:54.476628287 +0200
> > @@ -3567,9 +3567,24 @@ divmod_candidate_p (gassign *stmt)
> >
> > /* Disable the transform if either is a constant, since division-by-constant
> > may have specialized expansion. */
> > - if (CONSTANT_CLASS_P (op1) || CONSTANT_CLASS_P (op2))
> > + if (CONSTANT_CLASS_P (op1))
> > return false;
> >
> > + if (CONSTANT_CLASS_P (op2))
> > + {
> > + if (integer_pow2p (op2))
> > + return false;
> > +
> > + if (TYPE_PRECISION (type) <= HOST_BITS_PER_WIDE_INT
> > + && TYPE_PRECISION (type) <= BITS_PER_WORD)
> > + return false;
> > +
> > + /* If the divisor is not power of 2 and the precision wider than
> > + HWI, expand_divmod punts on that, so in that case it is better
> > + to use divmod optab or libfunc. Similarly if choose_multiplier
> > + might need pre/post shifts of BITS_PER_WORD or more. */
> > + }
> > +
> > /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
> > expand using the [su]divv optabs. */
> > if (TYPE_OVERFLOW_TRAPS (type))
> > --- gcc/internal-fn.c.jj 2020-10-02 10:36:43.272290992 +0200
> > +++ gcc/internal-fn.c 2020-10-05 15:15:12.498349327 +0200
> > @@ -50,6 +50,7 @@ along with GCC; see the file COPYING3.
> > #include "tree-phinodes.h"
> > #include "ssa-iterators.h"
> > #include "explow.h"
> > +#include "rtl-iter.h"
> >
> > /* The names of each internal function, indexed by function number. */
> > const char *const internal_fn_name_array[] = {
> > @@ -2985,6 +2986,32 @@ expand_gather_load_optab_fn (internal_fn
> > emit_move_insn (lhs_rtx, ops[0].value);
> > }
> >
> > +/* Helper for expand_DIVMOD. Return true if the sequence starting with
> > + INSN contains any call insns or insns with {,U}{DIV,MOD} rtxes. */
> > +
> > +static bool
> > +contains_call_div_mod (rtx_insn *insn)
> > +{
> > + subrtx_iterator::array_type array;
> > + for (; insn; insn = NEXT_INSN (insn))
> > + if (CALL_P (insn))
> > + return true;
> > + else if (INSN_P (insn))
> > + FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
> > + switch (GET_CODE (*iter))
> > + {
> > + case CALL:
> > + case DIV:
> > + case UDIV:
> > + case MOD:
> > + case UMOD:
> > + return true;
> > + default:
> > + break;
> > + }
> > + return false;
> > + }
> > +
> > /* Expand DIVMOD() using:
> > a) optab handler for udivmod/sdivmod if it is available.
> > b) If optab_handler doesn't exist, generate call to
> > @@ -3007,10 +3034,44 @@ expand_DIVMOD (internal_fn, gcall *call_
> > rtx op1 = expand_normal (arg1);
> > rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
> >
> > - rtx quotient, remainder, libfunc;
> > + rtx quotient = NULL_RTX, remainder = NULL_RTX;
> > + rtx_insn *insns = NULL;
> > +
> > + if (TREE_CODE (arg1) == INTEGER_CST)
> > + {
> > + /* For DIVMOD by integral constants, there could be efficient code
> > + expanded inline e.g. using shifts and plus/minus. Try to expand
> > + the division and modulo and if it emits any library calls or any
> > + {,U}{DIV,MOD} rtxes throw it away and use a divmod optab or
> > + divmod libcall. */
> > + struct separate_ops ops;
> > + ops.code = TRUNC_DIV_EXPR;
> > + ops.type = type;
> > + ops.op0 = make_tree (ops.type, op0);
> > + ops.op1 = arg1;
> > + ops.op2 = NULL_TREE;
> > + ops.location = gimple_location (call_stmt);
> > + start_sequence ();
> > + quotient = expand_expr_real_2 (&ops, NULL_RTX, mode, EXPAND_NORMAL);
> > + if (contains_call_div_mod (get_insns ()))
> > + quotient = NULL_RTX;
> > + else
> > + {
> > + ops.code = TRUNC_MOD_EXPR;
> > + remainder = expand_expr_real_2 (&ops, NULL_RTX, mode, EXPAND_NORMAL);
> > + if (contains_call_div_mod (get_insns ()))
> > + remainder = NULL_RTX;
> > + }
> > + if (remainder)
> > + insns = get_insns ();
> > + end_sequence ();
> > + }
> > +
> > + if (remainder)
> > + emit_insn (insns);
> >
> > /* Check if optab_handler exists for divmod_optab for given mode. */
> > - if (optab_handler (tab, mode) != CODE_FOR_nothing)
> > + else if (optab_handler (tab, mode) != CODE_FOR_nothing)
> > {
> > quotient = gen_reg_rtx (mode);
> > remainder = gen_reg_rtx (mode);
> > @@ -3018,7 +3079,7 @@ expand_DIVMOD (internal_fn, gcall *call_
> > }
> >
> > /* Generate call to divmod libfunc if it exists. */
> > - else if ((libfunc = optab_libfunc (tab, mode)) != NULL_RTX)
> > + else if (rtx libfunc = optab_libfunc (tab, mode))
> > targetm.expand_divmod_libfunc (libfunc, mode, op0, op1,
> > "ient, &remainder);
> >
> > --- gcc/testsuite/gcc.target/i386/pr97282.c.jj 2020-10-05 15:31:19.500363710 +0200
> > +++ gcc/testsuite/gcc.target/i386/pr97282.c 2020-10-05 15:28:51.864499619 +0200
> > @@ -0,0 +1,25 @@
> > +/* PR rtl-optimization/97282 */
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2" } */
> > +/* { dg-final { scan-assembler "call\[^\n\r]*__udivmod\[dt]i4" } } */
> > +
> > +#ifdef __SIZEOF_INT128__
> > +typedef __uint128_t T;
> > +#else
> > +typedef unsigned long long T;
> > +#endif
> > +
> > +unsigned long
> > +foo (T x)
> > +{
> > + if (x == 0)
> > + return 0;
> > +
> > + unsigned long ret = 0;
> > + while (x > 0)
> > + {
> > + ret = ret + x % 10;
> > + x = x / 10;
> > + }
> > + return ret;
> > +}
> >
> > Jakub
> >
> >
>
> --
> Richard Biener <rguenther@suse.de>
> SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
> Germany; GF: Felix Imend
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] divmod: Match and expand DIVMOD even in some cases of constant divisor [PR97282]
2020-10-06 9:42 ` Christophe Lyon
@ 2020-10-07 15:06 ` Christophe Lyon
0 siblings, 0 replies; 4+ messages in thread
From: Christophe Lyon @ 2020-10-07 15:06 UTC (permalink / raw)
To: Richard Biener; +Cc: Jakub Jelinek, gcc Patches
On Tue, 6 Oct 2020 at 11:42, Christophe Lyon <christophe.lyon@linaro.org> wrote:
>
> Hi Jakub,
>
> On Tue, 6 Oct 2020 at 10:13, Richard Biener <rguenther@suse.de> wrote:
> >
> > On Tue, 6 Oct 2020, Jakub Jelinek wrote:
> >
> > > Hi!
> > >
> > > As written in the comment, tree-ssa-math-opts.c wouldn't create a DIVMOD
> > > ifn call for division + modulo by constant for the fear that during
> > > expansion we could generate better code for those cases.
> > > If the divisoris a power of two, that is certainly the case always,
> > > but otherwise expand_divmod can punt in many cases, e.g. if the division
> > > type's precision is above HOST_BITS_PER_WIDE_INT, we don't even call
> > > choose_multiplier, because it works on HOST_WIDE_INTs (true, something
> > > we should fix eventually now that we have wide_ints), or if pre/post shift
> > > is larger than BITS_PER_WORD.
> > >
> > > So, the following patch recognizes DIVMOD with constant last argument even
> > > when it is unclear if expand_divmod will be able to optimize it, and then
> > > during DIVMOD expansion if the divisor is constant attempts to expand it as
> > > division + modulo and if they actually don't contain any libcalls or
> > > division/modulo, they are kept as is, otherwise that sequence is thrown away
> > > and divmod optab or libcall is used.
> > >
> > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> >
> > OK.
> >
> > Richard.
> >
> > > 2020-10-06 Jakub Jelinek <jakub@redhat.com>
> > >
> > > PR rtl-optimization/97282
> > > * tree-ssa-math-opts.c (divmod_candidate_p): Don't return false for
> > > constant op2 if it is not a power of two and the type has precision
> > > larger than HOST_BITS_PER_WIDE_INT or BITS_PER_WORD.
> > > * internal-fn.c (contains_call_div_mod): New function.
> > > (expand_DIVMOD): If last argument is a constant, try to expand it as
> > > TRUNC_DIV_EXPR followed by TRUNC_MOD_EXPR, but if the sequence
> > > contains any calls or {,U}{DIV,MOD} rtxes, throw it away and use
> > > divmod optab or divmod libfunc.
> > >
>
> This patch causes ICEs on arm while building newlib or glibc
>
> For instance with newlib when compiling vfwprintf.o:
> during RTL pass: expand
> In file included from
> /tmp/1435347_7.tmpdir/aci-gcc-fsf/sources/newlib/newlib/libc/stdio/vfprintf.c:153:
> /tmp/1435347_7.tmpdir/aci-gcc-fsf/sources/newlib/newlib/libc/include/stdio.h:
> In function '_vfprintf_r':
> /tmp/1435347_7.tmpdir/aci-gcc-fsf/sources/newlib/newlib/libc/include/stdio.h:503:9:
> internal compiler error: in int_mode_for_mode, at stor-layout.c:404
> 503 | int _vfprintf_r (struct _reent *, FILE *__restrict, const
> char *__restrict, __VALIST)
> | ^~~~~~~~~~~
> 0xaed4e3 int_mode_for_mode(machine_mode)
> /tmp/1435347_7.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/stor-layout.c:404
> 0x7ff73d emit_move_via_integer
> /tmp/1435347_7.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/expr.c:3425
> 0x808f2d emit_move_insn_1(rtx_def*, rtx_def*)
> /tmp/1435347_7.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/expr.c:3793
> 0x8092d7 emit_move_insn(rtx_def*, rtx_def*)
> /tmp/1435347_7.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/expr.c:3935
> 0x6e703f emit_library_call_value_1(int, rtx_def*, rtx_def*,
> libcall_type, machine_mode, int, std::pair<rtx_def*, machine_mode>*)
> /tmp/1435347_7.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/calls.c:5601
> 0xdff642 emit_library_call_value
> /tmp/1435347_7.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/rtl.h:4258
> 0xdff642 arm_expand_divmod_libfunc
> /tmp/1435347_7.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/config/arm/arm.c:33256
> 0x8c69af expand_DIVMOD
> /tmp/1435347_7.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/internal-fn.c:3084
> 0x7021b7 expand_call_stmt
> /tmp/1435347_7.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/cfgexpand.c:2612
> 0x7021b7 expand_gimple_stmt_1
> /tmp/1435347_7.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/cfgexpand.c:3686
> 0x7021b7 expand_gimple_stmt
> /tmp/1435347_7.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/cfgexpand.c:3851
> 0x702cfd expand_gimple_basic_block
> /tmp/1435347_7.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/cfgexpand.c:5892
> 0x70533e execute
> /tmp/1435347_7.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/cfgexpand.c:6576
>
I have just filed https://gcc.gnu.org/bugzilla/show_bug.cgi?id=97322
to track this.
Thanks
> Christophe
>
>
>
> > > * gcc.target/i386/pr97282.c: New test.
> > >
> > > --- gcc/tree-ssa-math-opts.c.jj 2020-10-01 10:40:10.104755999 +0200
> > > +++ gcc/tree-ssa-math-opts.c 2020-10-05 13:51:54.476628287 +0200
> > > @@ -3567,9 +3567,24 @@ divmod_candidate_p (gassign *stmt)
> > >
> > > /* Disable the transform if either is a constant, since division-by-constant
> > > may have specialized expansion. */
> > > - if (CONSTANT_CLASS_P (op1) || CONSTANT_CLASS_P (op2))
> > > + if (CONSTANT_CLASS_P (op1))
> > > return false;
> > >
> > > + if (CONSTANT_CLASS_P (op2))
> > > + {
> > > + if (integer_pow2p (op2))
> > > + return false;
> > > +
> > > + if (TYPE_PRECISION (type) <= HOST_BITS_PER_WIDE_INT
> > > + && TYPE_PRECISION (type) <= BITS_PER_WORD)
> > > + return false;
> > > +
> > > + /* If the divisor is not power of 2 and the precision wider than
> > > + HWI, expand_divmod punts on that, so in that case it is better
> > > + to use divmod optab or libfunc. Similarly if choose_multiplier
> > > + might need pre/post shifts of BITS_PER_WORD or more. */
> > > + }
> > > +
> > > /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
> > > expand using the [su]divv optabs. */
> > > if (TYPE_OVERFLOW_TRAPS (type))
> > > --- gcc/internal-fn.c.jj 2020-10-02 10:36:43.272290992 +0200
> > > +++ gcc/internal-fn.c 2020-10-05 15:15:12.498349327 +0200
> > > @@ -50,6 +50,7 @@ along with GCC; see the file COPYING3.
> > > #include "tree-phinodes.h"
> > > #include "ssa-iterators.h"
> > > #include "explow.h"
> > > +#include "rtl-iter.h"
> > >
> > > /* The names of each internal function, indexed by function number. */
> > > const char *const internal_fn_name_array[] = {
> > > @@ -2985,6 +2986,32 @@ expand_gather_load_optab_fn (internal_fn
> > > emit_move_insn (lhs_rtx, ops[0].value);
> > > }
> > >
> > > +/* Helper for expand_DIVMOD. Return true if the sequence starting with
> > > + INSN contains any call insns or insns with {,U}{DIV,MOD} rtxes. */
> > > +
> > > +static bool
> > > +contains_call_div_mod (rtx_insn *insn)
> > > +{
> > > + subrtx_iterator::array_type array;
> > > + for (; insn; insn = NEXT_INSN (insn))
> > > + if (CALL_P (insn))
> > > + return true;
> > > + else if (INSN_P (insn))
> > > + FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
> > > + switch (GET_CODE (*iter))
> > > + {
> > > + case CALL:
> > > + case DIV:
> > > + case UDIV:
> > > + case MOD:
> > > + case UMOD:
> > > + return true;
> > > + default:
> > > + break;
> > > + }
> > > + return false;
> > > + }
> > > +
> > > /* Expand DIVMOD() using:
> > > a) optab handler for udivmod/sdivmod if it is available.
> > > b) If optab_handler doesn't exist, generate call to
> > > @@ -3007,10 +3034,44 @@ expand_DIVMOD (internal_fn, gcall *call_
> > > rtx op1 = expand_normal (arg1);
> > > rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
> > >
> > > - rtx quotient, remainder, libfunc;
> > > + rtx quotient = NULL_RTX, remainder = NULL_RTX;
> > > + rtx_insn *insns = NULL;
> > > +
> > > + if (TREE_CODE (arg1) == INTEGER_CST)
> > > + {
> > > + /* For DIVMOD by integral constants, there could be efficient code
> > > + expanded inline e.g. using shifts and plus/minus. Try to expand
> > > + the division and modulo and if it emits any library calls or any
> > > + {,U}{DIV,MOD} rtxes throw it away and use a divmod optab or
> > > + divmod libcall. */
> > > + struct separate_ops ops;
> > > + ops.code = TRUNC_DIV_EXPR;
> > > + ops.type = type;
> > > + ops.op0 = make_tree (ops.type, op0);
> > > + ops.op1 = arg1;
> > > + ops.op2 = NULL_TREE;
> > > + ops.location = gimple_location (call_stmt);
> > > + start_sequence ();
> > > + quotient = expand_expr_real_2 (&ops, NULL_RTX, mode, EXPAND_NORMAL);
> > > + if (contains_call_div_mod (get_insns ()))
> > > + quotient = NULL_RTX;
> > > + else
> > > + {
> > > + ops.code = TRUNC_MOD_EXPR;
> > > + remainder = expand_expr_real_2 (&ops, NULL_RTX, mode, EXPAND_NORMAL);
> > > + if (contains_call_div_mod (get_insns ()))
> > > + remainder = NULL_RTX;
> > > + }
> > > + if (remainder)
> > > + insns = get_insns ();
> > > + end_sequence ();
> > > + }
> > > +
> > > + if (remainder)
> > > + emit_insn (insns);
> > >
> > > /* Check if optab_handler exists for divmod_optab for given mode. */
> > > - if (optab_handler (tab, mode) != CODE_FOR_nothing)
> > > + else if (optab_handler (tab, mode) != CODE_FOR_nothing)
> > > {
> > > quotient = gen_reg_rtx (mode);
> > > remainder = gen_reg_rtx (mode);
> > > @@ -3018,7 +3079,7 @@ expand_DIVMOD (internal_fn, gcall *call_
> > > }
> > >
> > > /* Generate call to divmod libfunc if it exists. */
> > > - else if ((libfunc = optab_libfunc (tab, mode)) != NULL_RTX)
> > > + else if (rtx libfunc = optab_libfunc (tab, mode))
> > > targetm.expand_divmod_libfunc (libfunc, mode, op0, op1,
> > > "ient, &remainder);
> > >
> > > --- gcc/testsuite/gcc.target/i386/pr97282.c.jj 2020-10-05 15:31:19.500363710 +0200
> > > +++ gcc/testsuite/gcc.target/i386/pr97282.c 2020-10-05 15:28:51.864499619 +0200
> > > @@ -0,0 +1,25 @@
> > > +/* PR rtl-optimization/97282 */
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2" } */
> > > +/* { dg-final { scan-assembler "call\[^\n\r]*__udivmod\[dt]i4" } } */
> > > +
> > > +#ifdef __SIZEOF_INT128__
> > > +typedef __uint128_t T;
> > > +#else
> > > +typedef unsigned long long T;
> > > +#endif
> > > +
> > > +unsigned long
> > > +foo (T x)
> > > +{
> > > + if (x == 0)
> > > + return 0;
> > > +
> > > + unsigned long ret = 0;
> > > + while (x > 0)
> > > + {
> > > + ret = ret + x % 10;
> > > + x = x / 10;
> > > + }
> > > + return ret;
> > > +}
> > >
> > > Jakub
> > >
> > >
> >
> > --
> > Richard Biener <rguenther@suse.de>
> > SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
> > Germany; GF: Felix Imend
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2020-10-07 15:07 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-06 7:37 [PATCH] divmod: Match and expand DIVMOD even in some cases of constant divisor [PR97282] Jakub Jelinek
2020-10-06 8:13 ` Richard Biener
2020-10-06 9:42 ` Christophe Lyon
2020-10-07 15:06 ` Christophe Lyon
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).