public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
From: Jakub Jelinek <jakub@gcc.gnu.org>
To: gcc-cvs@gcc.gnu.org
Subject: [gcc r12-3017] expand: Add new clrsb fallback expansion [PR101950]
Date: Thu, 19 Aug 2021 09:02:38 +0000 (GMT)	[thread overview]
Message-ID: <20210819090238.9D950385840C@sourceware.org> (raw)

https://gcc.gnu.org/g:301dc6011cbceb7ea9debd86aaec7cadb37213c8

commit r12-3017-g301dc6011cbceb7ea9debd86aaec7cadb37213c8
Author: Jakub Jelinek <jakub@redhat.com>
Date:   Thu Aug 19 11:00:27 2021 +0200

    expand: Add new clrsb fallback expansion [PR101950]
    
    As suggested in the PR, the following patch adds two new clrsb
    expansion possibilities if target doesn't have clrsb_optab for the
    requested nor wider modes, but does have clz_optab for the requested
    mode.
    One expansion is
    clrsb (op0)
    expands as
    clz (op0 ^ (((stype)op0) >> (prec-1))) - 1
    which is usable if CLZ_DEFINED_VALUE_AT_ZERO is 2 with value
    of prec, because the clz argument can be 0 and clrsb should give
    prec-1 in that case.
    The other expansion is
    clz (((op0 << 1) ^ (((stype)op0) >> (prec-1))) | 1)
    where the clz argument is never 0, but it is one operation longer.
    E.g. on x86_64-linux with -O2 -mno-lzcnt, this results for
    int foo (int x) { return __builtin_clrsb (x); }
    in
    -       subq    $8, %rsp
    -       movslq  %edi, %rdi
    -       call    __clrsbdi2
    -       addq    $8, %rsp
    -       subl    $32, %eax
    +       leal    (%rdi,%rdi), %eax
    +       sarl    $31, %edi
    +       xorl    %edi, %eax
    +       orl     $1, %eax
    +       bsrl    %eax, %eax
    +       xorl    $31, %eax
    and with -O2 -mlzcnt:
    +       movl    %edi, %eax
    +       sarl    $31, %eax
    +       xorl    %edi, %eax
    +       lzcntl  %eax, %eax
    +       subl    $1, %eax
    On armv7hl-linux-gnueabi with -O2:
    -       push    {r4, lr}
    -       bl      __clrsbsi2
    -       pop     {r4, pc}
    +       @ link register save eliminated.
    +       eor     r0, r0, r0, asr #31
    +       clz     r0, r0
    +       sub     r0, r0, #1
    +       bx      lr
    As it (at least usually) will make code larger, it is
    disabled for -Os or cold instructions.
    
    2021-08-19  Jakub Jelinek  <jakub@redhat.com>
    
            PR middle-end/101950
            * optabs.c (expand_clrsb_using_clz): New function.
            (expand_unop): Use it as another clrsb expansion fallback.
    
            * gcc.target/i386/pr101950-1.c: New test.
            * gcc.target/i386/pr101950-2.c: New test.

Diff:
---
 gcc/optabs.c                               | 79 ++++++++++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr101950-1.c | 20 ++++++++
 gcc/testsuite/gcc.target/i386/pr101950-2.c | 19 +++++++
 3 files changed, 118 insertions(+)

diff --git a/gcc/optabs.c b/gcc/optabs.c
index 14d8ad2f33f..ebed78fda3f 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -2600,6 +2600,82 @@ widen_leading (scalar_int_mode mode, rtx op0, rtx target, optab unoptab)
   return 0;
 }
 
+/* Attempt to emit (clrsb:mode op0) as
+   (plus:mode (clz:mode (xor:mode op0 (ashr:mode op0 (const_int prec-1))))
+	      (const_int -1))
+   if CLZ_DEFINED_VALUE_AT_ZERO (mode, val) is 2 and val is prec,
+   or as
+   (clz:mode (ior:mode (xor:mode (ashl:mode op0 (const_int 1))
+				 (ashr:mode op0 (const_int prec-1)))
+		       (const_int 1)))
+   otherwise.  */
+
+static rtx
+expand_clrsb_using_clz (scalar_int_mode mode, rtx op0, rtx target)
+{
+  if (optimize_insn_for_size_p ()
+      || optab_handler (clz_optab, mode) == CODE_FOR_nothing)
+    return NULL_RTX;
+
+  start_sequence ();
+  HOST_WIDE_INT val = 0;
+  if (CLZ_DEFINED_VALUE_AT_ZERO (mode, val) != 2
+      || val != GET_MODE_PRECISION (mode))
+    val = 0;
+  else
+    val = 1;
+
+  rtx temp2 = op0;
+  if (!val)
+    {
+      temp2 = expand_binop (mode, ashl_optab, op0, const1_rtx,
+			    NULL_RTX, 0, OPTAB_DIRECT);
+      if (!temp2)
+	{
+	fail:
+	  end_sequence ();
+	  return NULL_RTX;
+	}
+    }
+
+  rtx temp = expand_binop (mode, ashr_optab, op0,
+			   GEN_INT (GET_MODE_PRECISION (mode) - 1),
+			   NULL_RTX, 0, OPTAB_DIRECT);
+  if (!temp)
+    goto fail;
+
+  temp = expand_binop (mode, xor_optab, temp2, temp, NULL_RTX, 0,
+		       OPTAB_DIRECT);
+  if (!temp)
+    goto fail;
+
+  if (!val)
+    {
+      temp = expand_binop (mode, ior_optab, temp, const1_rtx,
+			   NULL_RTX, 0, OPTAB_DIRECT);
+      if (!temp)
+	goto fail;
+    }
+  temp = expand_unop_direct (mode, clz_optab, temp, val ? NULL_RTX : target,
+			     true);
+  if (!temp)
+    goto fail;
+  if (val)
+    {
+      temp = expand_binop (mode, add_optab, temp, constm1_rtx,
+			   target, 0, OPTAB_DIRECT);
+      if (!temp)
+	goto fail;
+    }
+
+  rtx_insn *seq = get_insns ();
+  end_sequence ();
+
+  add_equal_note (seq, temp, CLRSB, op0, NULL_RTX, mode);
+  emit_insn (seq);
+  return temp;
+}
+
 /* Try calculating clz of a double-word quantity as two clz's of word-sized
    quantities, choosing which based on whether the high word is nonzero.  */
 static rtx
@@ -3171,6 +3247,9 @@ expand_unop (machine_mode mode, optab unoptab, rtx op0, rtx target,
 	  temp = widen_leading (int_mode, op0, target, unoptab);
 	  if (temp)
 	    return temp;
+	  temp = expand_clrsb_using_clz (int_mode, op0, target);
+	  if (temp)
+	    return temp;
 	}
       goto try_libcall;
     }
diff --git a/gcc/testsuite/gcc.target/i386/pr101950-1.c b/gcc/testsuite/gcc.target/i386/pr101950-1.c
new file mode 100644
index 00000000000..cc980646f6c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr101950-1.c
@@ -0,0 +1,20 @@
+/* PR middle-end/101950 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-lzcnt" } */
+/* { dg-final { scan-assembler-not "call\[^\n\r]*__clrsb.i2" } } */
+/* { dg-final { scan-assembler-times "\tbsr\[ql]\t" 2 } } */
+/* { dg-final { scan-assembler-times "\txor\[ql]\t" 4 } } */
+/* { dg-final { scan-assembler-times "\tor\[ql]\t" 2 } } */
+/* { dg-final { scan-assembler-times "\tsar\[ql]\t|\tcltd" 2 } } */
+
+int
+foo (long x)
+{
+  return __builtin_clrsbl (x);
+}
+
+int
+bar (int x)
+{
+  return __builtin_clrsb (x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr101950-2.c b/gcc/testsuite/gcc.target/i386/pr101950-2.c
new file mode 100644
index 00000000000..896f1b46414
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr101950-2.c
@@ -0,0 +1,19 @@
+/* PR middle-end/101950 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mlzcnt" } */
+/* { dg-final { scan-assembler-not "call\[^\n\r]*__clrsb.i2" } } */
+/* { dg-final { scan-assembler-times "\tlzcnt\[ql]\t" 2 } } */
+/* { dg-final { scan-assembler-times "\txor\[ql]\t" 2 } } */
+/* { dg-final { scan-assembler-times "\tsar\[ql]\t|\tcltd" 2 } } */
+
+int
+foo (long x)
+{
+  return __builtin_clrsbl (x);
+}
+
+int
+bar (int x)
+{
+  return __builtin_clrsb (x);
+}


                 reply	other threads:[~2021-08-19  9:02 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210819090238.9D950385840C@sourceware.org \
    --to=jakub@gcc.gnu.org \
    --cc=gcc-cvs@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).