public inbox for gcc-cvs@sourceware.org help / color / mirror / Atom feed
From: Jakub Jelinek <jakub@gcc.gnu.org> To: gcc-cvs@gcc.gnu.org Subject: [gcc r12-3017] expand: Add new clrsb fallback expansion [PR101950] Date: Thu, 19 Aug 2021 09:02:38 +0000 (GMT) [thread overview] Message-ID: <20210819090238.9D950385840C@sourceware.org> (raw) https://gcc.gnu.org/g:301dc6011cbceb7ea9debd86aaec7cadb37213c8 commit r12-3017-g301dc6011cbceb7ea9debd86aaec7cadb37213c8 Author: Jakub Jelinek <jakub@redhat.com> Date: Thu Aug 19 11:00:27 2021 +0200 expand: Add new clrsb fallback expansion [PR101950] As suggested in the PR, the following patch adds two new clrsb expansion possibilities if target doesn't have clrsb_optab for the requested nor wider modes, but does have clz_optab for the requested mode. One expansion is clrsb (op0) expands as clz (op0 ^ (((stype)op0) >> (prec-1))) - 1 which is usable if CLZ_DEFINED_VALUE_AT_ZERO is 2 with value of prec, because the clz argument can be 0 and clrsb should give prec-1 in that case. The other expansion is clz (((op0 << 1) ^ (((stype)op0) >> (prec-1))) | 1) where the clz argument is never 0, but it is one operation longer. E.g. on x86_64-linux with -O2 -mno-lzcnt, this results for int foo (int x) { return __builtin_clrsb (x); } in - subq $8, %rsp - movslq %edi, %rdi - call __clrsbdi2 - addq $8, %rsp - subl $32, %eax + leal (%rdi,%rdi), %eax + sarl $31, %edi + xorl %edi, %eax + orl $1, %eax + bsrl %eax, %eax + xorl $31, %eax and with -O2 -mlzcnt: + movl %edi, %eax + sarl $31, %eax + xorl %edi, %eax + lzcntl %eax, %eax + subl $1, %eax On armv7hl-linux-gnueabi with -O2: - push {r4, lr} - bl __clrsbsi2 - pop {r4, pc} + @ link register save eliminated. + eor r0, r0, r0, asr #31 + clz r0, r0 + sub r0, r0, #1 + bx lr As it (at least usually) will make code larger, it is disabled for -Os or cold instructions. 2021-08-19 Jakub Jelinek <jakub@redhat.com> PR middle-end/101950 * optabs.c (expand_clrsb_using_clz): New function. (expand_unop): Use it as another clrsb expansion fallback. * gcc.target/i386/pr101950-1.c: New test. * gcc.target/i386/pr101950-2.c: New test. Diff: --- gcc/optabs.c | 79 ++++++++++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr101950-1.c | 20 ++++++++ gcc/testsuite/gcc.target/i386/pr101950-2.c | 19 +++++++ 3 files changed, 118 insertions(+) diff --git a/gcc/optabs.c b/gcc/optabs.c index 14d8ad2f33f..ebed78fda3f 100644 --- a/gcc/optabs.c +++ b/gcc/optabs.c @@ -2600,6 +2600,82 @@ widen_leading (scalar_int_mode mode, rtx op0, rtx target, optab unoptab) return 0; } +/* Attempt to emit (clrsb:mode op0) as + (plus:mode (clz:mode (xor:mode op0 (ashr:mode op0 (const_int prec-1)))) + (const_int -1)) + if CLZ_DEFINED_VALUE_AT_ZERO (mode, val) is 2 and val is prec, + or as + (clz:mode (ior:mode (xor:mode (ashl:mode op0 (const_int 1)) + (ashr:mode op0 (const_int prec-1))) + (const_int 1))) + otherwise. */ + +static rtx +expand_clrsb_using_clz (scalar_int_mode mode, rtx op0, rtx target) +{ + if (optimize_insn_for_size_p () + || optab_handler (clz_optab, mode) == CODE_FOR_nothing) + return NULL_RTX; + + start_sequence (); + HOST_WIDE_INT val = 0; + if (CLZ_DEFINED_VALUE_AT_ZERO (mode, val) != 2 + || val != GET_MODE_PRECISION (mode)) + val = 0; + else + val = 1; + + rtx temp2 = op0; + if (!val) + { + temp2 = expand_binop (mode, ashl_optab, op0, const1_rtx, + NULL_RTX, 0, OPTAB_DIRECT); + if (!temp2) + { + fail: + end_sequence (); + return NULL_RTX; + } + } + + rtx temp = expand_binop (mode, ashr_optab, op0, + GEN_INT (GET_MODE_PRECISION (mode) - 1), + NULL_RTX, 0, OPTAB_DIRECT); + if (!temp) + goto fail; + + temp = expand_binop (mode, xor_optab, temp2, temp, NULL_RTX, 0, + OPTAB_DIRECT); + if (!temp) + goto fail; + + if (!val) + { + temp = expand_binop (mode, ior_optab, temp, const1_rtx, + NULL_RTX, 0, OPTAB_DIRECT); + if (!temp) + goto fail; + } + temp = expand_unop_direct (mode, clz_optab, temp, val ? NULL_RTX : target, + true); + if (!temp) + goto fail; + if (val) + { + temp = expand_binop (mode, add_optab, temp, constm1_rtx, + target, 0, OPTAB_DIRECT); + if (!temp) + goto fail; + } + + rtx_insn *seq = get_insns (); + end_sequence (); + + add_equal_note (seq, temp, CLRSB, op0, NULL_RTX, mode); + emit_insn (seq); + return temp; +} + /* Try calculating clz of a double-word quantity as two clz's of word-sized quantities, choosing which based on whether the high word is nonzero. */ static rtx @@ -3171,6 +3247,9 @@ expand_unop (machine_mode mode, optab unoptab, rtx op0, rtx target, temp = widen_leading (int_mode, op0, target, unoptab); if (temp) return temp; + temp = expand_clrsb_using_clz (int_mode, op0, target); + if (temp) + return temp; } goto try_libcall; } diff --git a/gcc/testsuite/gcc.target/i386/pr101950-1.c b/gcc/testsuite/gcc.target/i386/pr101950-1.c new file mode 100644 index 00000000000..cc980646f6c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr101950-1.c @@ -0,0 +1,20 @@ +/* PR middle-end/101950 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mno-lzcnt" } */ +/* { dg-final { scan-assembler-not "call\[^\n\r]*__clrsb.i2" } } */ +/* { dg-final { scan-assembler-times "\tbsr\[ql]\t" 2 } } */ +/* { dg-final { scan-assembler-times "\txor\[ql]\t" 4 } } */ +/* { dg-final { scan-assembler-times "\tor\[ql]\t" 2 } } */ +/* { dg-final { scan-assembler-times "\tsar\[ql]\t|\tcltd" 2 } } */ + +int +foo (long x) +{ + return __builtin_clrsbl (x); +} + +int +bar (int x) +{ + return __builtin_clrsb (x); +} diff --git a/gcc/testsuite/gcc.target/i386/pr101950-2.c b/gcc/testsuite/gcc.target/i386/pr101950-2.c new file mode 100644 index 00000000000..896f1b46414 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr101950-2.c @@ -0,0 +1,19 @@ +/* PR middle-end/101950 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mlzcnt" } */ +/* { dg-final { scan-assembler-not "call\[^\n\r]*__clrsb.i2" } } */ +/* { dg-final { scan-assembler-times "\tlzcnt\[ql]\t" 2 } } */ +/* { dg-final { scan-assembler-times "\txor\[ql]\t" 2 } } */ +/* { dg-final { scan-assembler-times "\tsar\[ql]\t|\tcltd" 2 } } */ + +int +foo (long x) +{ + return __builtin_clrsbl (x); +} + +int +bar (int x) +{ + return __builtin_clrsb (x); +}
reply other threads:[~2021-08-19 9:02 UTC|newest] Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20210819090238.9D950385840C@sourceware.org \ --to=jakub@gcc.gnu.org \ --cc=gcc-cvs@gcc.gnu.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).