public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [x86 PATCH] PR rtl-optimization/101617: Use neg/sbb in ix86_expand_int_movcc.
@ 2022-05-30 17:50 Roger Sayle
  2022-05-30 20:06 ` Uros Bizjak
  0 siblings, 1 reply; 2+ messages in thread
From: Roger Sayle @ 2022-05-30 17:50 UTC (permalink / raw)
  To: 'GCC Patches'

[-- Attachment #1: Type: text/plain, Size: 1824 bytes --]


This patch resolves PR rtl-optimization/101617 where we should generate
the exact same code for (X ? -1 : 1) as we do for ((X ? -1 : 0) | 1).
The cause of the current difference on x86_64 is actually in
ix86_expand_int_movcc that doesn't know that negl;sbbl can be used
to create a -1/0 result depending on whether the input is zero/nonzero.

So for Andrew Pinski's test case:

int f1(int i)
{
  return i ? -1 : 1;
}

GCC currently generates:

f1:     cmpl    $1, %edi
        sbbl    %eax, %eax      // x ? 0 : -1
        andl    $2, %eax        // x ? 0 : 2
        subl    $1, %eax        // x ? -1 : 1
        ret

but with the attached patch, now generates:

f1:     negl    %edi
        sbbl    %eax, %eax      // x ? -1 : 0
        orl     $1, %eax        // x ? -1 : 1
        ret

To implement this I needed to add two expanders to i386.md to generate
the required instructions (in both SImode and DImode) matching the
pre-existing define_insns of the same name.

This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check, both with and without --target_board=unix{-m32},
with no new failures.  Ok for mainline?


2022-05-30  Roger Sayle  <roger@nextmovesoftware.com>

gcc/ChangeLog
        PR rtl-optimization/101617
        * config/i386/i386-expand.cc (ix86_expand_int_movcc): Add a
        special case (indicated by negate_cc_compare_p) to generate a
        -1/0 mask using neg;sbb.
        * config/i386/i386.md (x86_neg<mode>_ccc): New define_expand
        to generate an *x86_neg<mode>_ccc instruction.
        (x86_mov<mode>cc_0_m1_neg): Likewise, a new define_expand to
        generate a *x86_mov<mode>cc_0_m1_neg instruction.

gcc/testsuite/ChangeLog
        PR rtl-optimization/101617
        * gcc.target/i386/pr101617.c: New test case.


Thanks in advance,
Roger
--


[-- Attachment #2: patchif2.txt --]
[-- Type: text/plain, Size: 3707 bytes --]

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 5cd7b99..36f4698 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -3142,6 +3142,7 @@ ix86_expand_int_movcc (rtx operands[])
   rtx compare_op;
   machine_mode mode = GET_MODE (operands[0]);
   bool sign_bit_compare_p = false;
+  bool negate_cc_compare_p = false;
   rtx op0 = XEXP (operands[1], 0);
   rtx op1 = XEXP (operands[1], 1);
   rtx op2 = operands[2];
@@ -3188,16 +3189,48 @@ ix86_expand_int_movcc (rtx operands[])
       HOST_WIDE_INT cf = INTVAL (op3);
       HOST_WIDE_INT diff;
 
+      if ((mode == SImode
+	   || (TARGET_64BIT && mode == DImode))
+	  && (GET_MODE (op0) == SImode
+	      || (TARGET_64BIT && GET_MODE (op0) == DImode)))
+	{
+	  /* Special case x != 0 ? -1 : y.  */
+	  if (code == NE && op1 == const0_rtx && ct == -1)
+	    {
+	      negate_cc_compare_p = true;
+	      std::swap (ct, cf);
+	      code = EQ;
+	    }
+	  else if (code == EQ && op1 == const0_rtx && cf == -1)
+	    negate_cc_compare_p = true;
+	}
+
       diff = ct - cf;
       /*  Sign bit compares are better done using shifts than we do by using
 	  sbb.  */
       if (sign_bit_compare_p
+	  || negate_cc_compare_p
 	  || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
 	{
 	  /* Detect overlap between destination and compare sources.  */
 	  rtx tmp = out;
 
-          if (!sign_bit_compare_p)
+	  if (negate_cc_compare_p)
+	    {
+	      if (GET_MODE (op0) == DImode)
+		emit_insn (gen_x86_negdi_ccc (gen_reg_rtx (DImode), op0));
+	      else
+		emit_insn (gen_x86_negsi_ccc (gen_reg_rtx (SImode),
+					      gen_lowpart (SImode, op0)));
+
+	      tmp = gen_reg_rtx (mode);
+	      if (mode == DImode)
+		emit_insn (gen_x86_movdicc_0_m1_neg (tmp));
+	      else
+		emit_insn (gen_x86_movsicc_0_m1_neg (gen_lowpart (SImode,
+								  tmp)));
+	    }
+	  else if (!sign_bit_compare_p)
 	    {
 	      rtx flags;
 	      bool fpcmp = false;
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 602dfa7..370df74 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -11189,6 +11189,14 @@
   [(set_attr "type" "negnot")
    (set_attr "mode" "<MODE>")])
 
+(define_expand "x86_neg<mode>_ccc"
+  [(parallel
+    [(set (reg:CCC FLAGS_REG)
+	  (ne:CCC (match_operand:SWI48 1 "register_operand")
+		  (const_int 0)))
+     (set (match_operand:SWI48 0 "register_operand")
+	  (neg:SWI48 (match_dup 1)))])])
+
 (define_insn "*negqi_ext<mode>_2"
   [(set (zero_extract:SWI248
 	  (match_operand:SWI248 0 "register_operand" "+Q")
@@ -20700,6 +20708,12 @@
    (set_attr "mode" "<MODE>")
    (set_attr "length_immediate" "0")])
 
+(define_expand "x86_mov<mode>cc_0_m1_neg"
+  [(parallel
+    [(set (match_operand:SWI48 0 "register_operand")
+	  (neg:SWI48 (ltu:SWI48 (reg:CCC FLAGS_REG) (const_int 0))))
+     (clobber (reg:CC FLAGS_REG))])])
+
 (define_split
   [(set (match_operand:SWI48 0 "register_operand")
 	(neg:SWI48
diff --git a/gcc/testsuite/gcc.target/i386/pr101617.c b/gcc/testsuite/gcc.target/i386/pr101617.c
new file mode 100644
index 0000000..503bf11
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr101617.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+int f(int i)
+{
+  int t = i ? -1 : 0;
+  return t | 1;
+}
+
+int f1(int i)
+{
+  int t = i ? -1 : 1;
+  return t;
+}
+
+/* { dg-final { scan-assembler-times "negl" 2 } } */
+/* { dg-final { scan-assembler-times "sbbl" 2 } } */
+/* { dg-final { scan-assembler-times "orl" 2 } } */
+/* { dg-final { scan-assembler-not "cmpl" } } */
+

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [x86 PATCH] PR rtl-optimization/101617: Use neg/sbb in ix86_expand_int_movcc.
  2022-05-30 17:50 [x86 PATCH] PR rtl-optimization/101617: Use neg/sbb in ix86_expand_int_movcc Roger Sayle
@ 2022-05-30 20:06 ` Uros Bizjak
  0 siblings, 0 replies; 2+ messages in thread
From: Uros Bizjak @ 2022-05-30 20:06 UTC (permalink / raw)
  To: Roger Sayle; +Cc: GCC Patches, Andrew Pinski

On Mon, May 30, 2022 at 7:50 PM Roger Sayle <roger@nextmovesoftware.com> wrote:
>
>
> This patch resolves PR rtl-optimization/101617 where we should generate
> the exact same code for (X ? -1 : 1) as we do for ((X ? -1 : 0) | 1).
> The cause of the current difference on x86_64 is actually in
> ix86_expand_int_movcc that doesn't know that negl;sbbl can be used
> to create a -1/0 result depending on whether the input is zero/nonzero.
>
> So for Andrew Pinski's test case:
>
> int f1(int i)
> {
>   return i ? -1 : 1;
> }
>
> GCC currently generates:
>
> f1:     cmpl    $1, %edi
>         sbbl    %eax, %eax      // x ? 0 : -1
>         andl    $2, %eax        // x ? 0 : 2
>         subl    $1, %eax        // x ? -1 : 1
>         ret
>
> but with the attached patch, now generates:
>
> f1:     negl    %edi
>         sbbl    %eax, %eax      // x ? -1 : 0
>         orl     $1, %eax        // x ? -1 : 1
>         ret
>
> To implement this I needed to add two expanders to i386.md to generate
> the required instructions (in both SImode and DImode) matching the
> pre-existing define_insns of the same name.
>
> This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
> and make -k check, both with and without --target_board=unix{-m32},
> with no new failures.  Ok for mainline?
>
>
> 2022-05-30  Roger Sayle  <roger@nextmovesoftware.com>
>
> gcc/ChangeLog
>         PR rtl-optimization/101617
>         * config/i386/i386-expand.cc (ix86_expand_int_movcc): Add a
>         special case (indicated by negate_cc_compare_p) to generate a
>         -1/0 mask using neg;sbb.
>         * config/i386/i386.md (x86_neg<mode>_ccc): New define_expand
>         to generate an *x86_neg<mode>_ccc instruction.
>         (x86_mov<mode>cc_0_m1_neg): Likewise, a new define_expand to
>         generate a *x86_mov<mode>cc_0_m1_neg instruction.
>
> gcc/testsuite/ChangeLog
>         PR rtl-optimization/101617
>         * gcc.target/i386/pr101617.c: New test case.

LGTM.

Thanks,
Uros.

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2022-05-30 20:06 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-30 17:50 [x86 PATCH] PR rtl-optimization/101617: Use neg/sbb in ix86_expand_int_movcc Roger Sayle
2022-05-30 20:06 ` Uros Bizjak

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).