public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-3017] expand: Add new clrsb fallback expansion [PR101950]
@ 2021-08-19 9:02 Jakub Jelinek
0 siblings, 0 replies; only message in thread
From: Jakub Jelinek @ 2021-08-19 9:02 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:301dc6011cbceb7ea9debd86aaec7cadb37213c8
commit r12-3017-g301dc6011cbceb7ea9debd86aaec7cadb37213c8
Author: Jakub Jelinek <jakub@redhat.com>
Date: Thu Aug 19 11:00:27 2021 +0200
expand: Add new clrsb fallback expansion [PR101950]
As suggested in the PR, the following patch adds two new clrsb
expansion possibilities if target doesn't have clrsb_optab for the
requested nor wider modes, but does have clz_optab for the requested
mode.
One expansion is
clrsb (op0)
expands as
clz (op0 ^ (((stype)op0) >> (prec-1))) - 1
which is usable if CLZ_DEFINED_VALUE_AT_ZERO is 2 with value
of prec, because the clz argument can be 0 and clrsb should give
prec-1 in that case.
The other expansion is
clz (((op0 << 1) ^ (((stype)op0) >> (prec-1))) | 1)
where the clz argument is never 0, but it is one operation longer.
E.g. on x86_64-linux with -O2 -mno-lzcnt, this results for
int foo (int x) { return __builtin_clrsb (x); }
in
- subq $8, %rsp
- movslq %edi, %rdi
- call __clrsbdi2
- addq $8, %rsp
- subl $32, %eax
+ leal (%rdi,%rdi), %eax
+ sarl $31, %edi
+ xorl %edi, %eax
+ orl $1, %eax
+ bsrl %eax, %eax
+ xorl $31, %eax
and with -O2 -mlzcnt:
+ movl %edi, %eax
+ sarl $31, %eax
+ xorl %edi, %eax
+ lzcntl %eax, %eax
+ subl $1, %eax
On armv7hl-linux-gnueabi with -O2:
- push {r4, lr}
- bl __clrsbsi2
- pop {r4, pc}
+ @ link register save eliminated.
+ eor r0, r0, r0, asr #31
+ clz r0, r0
+ sub r0, r0, #1
+ bx lr
As it (at least usually) will make code larger, it is
disabled for -Os or cold instructions.
2021-08-19 Jakub Jelinek <jakub@redhat.com>
PR middle-end/101950
* optabs.c (expand_clrsb_using_clz): New function.
(expand_unop): Use it as another clrsb expansion fallback.
* gcc.target/i386/pr101950-1.c: New test.
* gcc.target/i386/pr101950-2.c: New test.
Diff:
---
gcc/optabs.c | 79 ++++++++++++++++++++++++++++++
gcc/testsuite/gcc.target/i386/pr101950-1.c | 20 ++++++++
gcc/testsuite/gcc.target/i386/pr101950-2.c | 19 +++++++
3 files changed, 118 insertions(+)
diff --git a/gcc/optabs.c b/gcc/optabs.c
index 14d8ad2f33f..ebed78fda3f 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -2600,6 +2600,82 @@ widen_leading (scalar_int_mode mode, rtx op0, rtx target, optab unoptab)
return 0;
}
+/* Attempt to emit (clrsb:mode op0) as
+ (plus:mode (clz:mode (xor:mode op0 (ashr:mode op0 (const_int prec-1))))
+ (const_int -1))
+ if CLZ_DEFINED_VALUE_AT_ZERO (mode, val) is 2 and val is prec,
+ or as
+ (clz:mode (ior:mode (xor:mode (ashl:mode op0 (const_int 1))
+ (ashr:mode op0 (const_int prec-1)))
+ (const_int 1)))
+ otherwise. */
+
+static rtx
+expand_clrsb_using_clz (scalar_int_mode mode, rtx op0, rtx target)
+{
+ if (optimize_insn_for_size_p ()
+ || optab_handler (clz_optab, mode) == CODE_FOR_nothing)
+ return NULL_RTX;
+
+ start_sequence ();
+ HOST_WIDE_INT val = 0;
+ if (CLZ_DEFINED_VALUE_AT_ZERO (mode, val) != 2
+ || val != GET_MODE_PRECISION (mode))
+ val = 0;
+ else
+ val = 1;
+
+ rtx temp2 = op0;
+ if (!val)
+ {
+ temp2 = expand_binop (mode, ashl_optab, op0, const1_rtx,
+ NULL_RTX, 0, OPTAB_DIRECT);
+ if (!temp2)
+ {
+ fail:
+ end_sequence ();
+ return NULL_RTX;
+ }
+ }
+
+ rtx temp = expand_binop (mode, ashr_optab, op0,
+ GEN_INT (GET_MODE_PRECISION (mode) - 1),
+ NULL_RTX, 0, OPTAB_DIRECT);
+ if (!temp)
+ goto fail;
+
+ temp = expand_binop (mode, xor_optab, temp2, temp, NULL_RTX, 0,
+ OPTAB_DIRECT);
+ if (!temp)
+ goto fail;
+
+ if (!val)
+ {
+ temp = expand_binop (mode, ior_optab, temp, const1_rtx,
+ NULL_RTX, 0, OPTAB_DIRECT);
+ if (!temp)
+ goto fail;
+ }
+ temp = expand_unop_direct (mode, clz_optab, temp, val ? NULL_RTX : target,
+ true);
+ if (!temp)
+ goto fail;
+ if (val)
+ {
+ temp = expand_binop (mode, add_optab, temp, constm1_rtx,
+ target, 0, OPTAB_DIRECT);
+ if (!temp)
+ goto fail;
+ }
+
+ rtx_insn *seq = get_insns ();
+ end_sequence ();
+
+ add_equal_note (seq, temp, CLRSB, op0, NULL_RTX, mode);
+ emit_insn (seq);
+ return temp;
+}
+
/* Try calculating clz of a double-word quantity as two clz's of word-sized
quantities, choosing which based on whether the high word is nonzero. */
static rtx
@@ -3171,6 +3247,9 @@ expand_unop (machine_mode mode, optab unoptab, rtx op0, rtx target,
temp = widen_leading (int_mode, op0, target, unoptab);
if (temp)
return temp;
+ temp = expand_clrsb_using_clz (int_mode, op0, target);
+ if (temp)
+ return temp;
}
goto try_libcall;
}
diff --git a/gcc/testsuite/gcc.target/i386/pr101950-1.c b/gcc/testsuite/gcc.target/i386/pr101950-1.c
new file mode 100644
index 00000000000..cc980646f6c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr101950-1.c
@@ -0,0 +1,20 @@
+/* PR middle-end/101950 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-lzcnt" } */
+/* { dg-final { scan-assembler-not "call\[^\n\r]*__clrsb.i2" } } */
+/* { dg-final { scan-assembler-times "\tbsr\[ql]\t" 2 } } */
+/* { dg-final { scan-assembler-times "\txor\[ql]\t" 4 } } */
+/* { dg-final { scan-assembler-times "\tor\[ql]\t" 2 } } */
+/* { dg-final { scan-assembler-times "\tsar\[ql]\t|\tcltd" 2 } } */
+
+int
+foo (long x)
+{
+ return __builtin_clrsbl (x);
+}
+
+int
+bar (int x)
+{
+ return __builtin_clrsb (x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr101950-2.c b/gcc/testsuite/gcc.target/i386/pr101950-2.c
new file mode 100644
index 00000000000..896f1b46414
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr101950-2.c
@@ -0,0 +1,19 @@
+/* PR middle-end/101950 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mlzcnt" } */
+/* { dg-final { scan-assembler-not "call\[^\n\r]*__clrsb.i2" } } */
+/* { dg-final { scan-assembler-times "\tlzcnt\[ql]\t" 2 } } */
+/* { dg-final { scan-assembler-times "\txor\[ql]\t" 2 } } */
+/* { dg-final { scan-assembler-times "\tsar\[ql]\t|\tcltd" 2 } } */
+
+int
+foo (long x)
+{
+ return __builtin_clrsbl (x);
+}
+
+int
+bar (int x)
+{
+ return __builtin_clrsb (x);
+}
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2021-08-19 9:02 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-19 9:02 [gcc r12-3017] expand: Add new clrsb fallback expansion [PR101950] Jakub Jelinek
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).