public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r11-2588] x86_64: Integer min/max improvements.
@ 2020-08-06  8:16 Roger Sayle
  0 siblings, 0 replies; only message in thread
From: Roger Sayle @ 2020-08-06  8:16 UTC (permalink / raw)
  To: gcc-cvs

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="us-ascii", Size: 8670 bytes --]

https://gcc.gnu.org/g:c072fd236dc08f990bfcffd98b27f211a39bb404

commit r11-2588-gc072fd236dc08f990bfcffd98b27f211a39bb404
Author: Roger Sayle <roger@nextmovesoftware.com>
Date:   Thu Aug 6 09:15:25 2020 +0100

    x86_64: Integer min/max improvements.
    
    This patch tweaks the way that min and max are expanded, so that the
    semantics of these operations are visible to the early RTL optimization
    passes, until split into explicit comparison and conditional move
    instructions. The good news is that i386.md already contains all of
    the required logic (many thanks to Richard Biener and Uros Bizjak),
    but this is currently only enabled to scalar-to-vector (STV) synthesis
    of min/max instructions.  This change enables this functionality for
    all TARGET_CMOVE architectures for SImode, HImode and DImode.
    
    2020-08-06  Roger Sayle  <roger@nextmovesoftware.com>
                Uroš Bizjak  <ubizjak@gmail.com>
    
    gcc/ChangeLog
            * config/i386/i386.md (MAXMIN_IMODE): No longer needed.
            (<maxmin><mode>3):  Support SWI248 and general_operand for
            second operand, when TARGET_CMOVE.
            (<maxmin><mode>3_1 splitter): Optimize comparisons against
            0, 1 and -1 to use "test" instead of "cmp".
            (*<maxmin>di3_doubleword): Likewise, allow general_operand
            and enable on TARGET_CMOVE.
            (peephole2): Convert clearing a register after a flag setting
            instruction into an xor followed by the original flag setter.
    
    gcc/testsuite/ChangeLog
            * gcc.target/i386/minmax-8.c: New test.
            * gcc.target/i386/minmax-9.c: New test.
            * gcc.target/i386/minmax-10.c: New test.
            * gcc.target/i386/minmax-11.c: New test.

Diff:
---
 gcc/config/i386/i386.md                   | 74 +++++++++++++++++++++++--------
 gcc/testsuite/gcc.target/i386/minmax-10.c | 38 ++++++++++++++++
 gcc/testsuite/gcc.target/i386/minmax-11.c | 12 +++++
 gcc/testsuite/gcc.target/i386/minmax-8.c  | 18 ++++++++
 gcc/testsuite/gcc.target/i386/minmax-9.c  | 23 ++++++++++
 5 files changed, 146 insertions(+), 19 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index b24a4557871..4e916bf3c32 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -18809,45 +18809,68 @@
 
 ;; min/max patterns
 
-(define_mode_iterator MAXMIN_IMODE
-  [(SI "TARGET_SSE4_1") (DI "TARGET_AVX512VL")])
 (define_code_attr maxmin_rel
   [(smax "GE") (smin "LE") (umax "GEU") (umin "LEU")])
 
 (define_expand "<code><mode>3"
   [(parallel
-    [(set (match_operand:MAXMIN_IMODE 0 "register_operand")
-	  (maxmin:MAXMIN_IMODE
-	    (match_operand:MAXMIN_IMODE 1 "register_operand")
-	    (match_operand:MAXMIN_IMODE 2 "nonimmediate_operand")))
+    [(set (match_operand:SWI248 0 "register_operand")
+	  (maxmin:SWI248
+	    (match_operand:SWI248 1 "register_operand")
+	    (match_operand:SWI248 2 "general_operand")))
      (clobber (reg:CC FLAGS_REG))])]
-  "TARGET_STV")
+  "TARGET_CMOVE")
 
 (define_insn_and_split "*<code><mode>3_1"
-  [(set (match_operand:MAXMIN_IMODE 0 "register_operand")
-	(maxmin:MAXMIN_IMODE
-	  (match_operand:MAXMIN_IMODE 1 "register_operand")
-	  (match_operand:MAXMIN_IMODE 2 "nonimmediate_operand")))
+  [(set (match_operand:SWI248 0 "register_operand")
+	(maxmin:SWI248
+	  (match_operand:SWI248 1 "register_operand")
+	  (match_operand:SWI248 2 "general_operand")))
    (clobber (reg:CC FLAGS_REG))]
-  "(TARGET_64BIT || <MODE>mode != DImode) && TARGET_STV
+  "TARGET_CMOVE
    && ix86_pre_reload_split ()"
   "#"
   "&& 1"
   [(set (match_dup 0)
-	(if_then_else:MAXMIN_IMODE (match_dup 3)
+	(if_then_else:SWI248 (match_dup 3)
 	  (match_dup 1)
 	  (match_dup 2)))]
 {
   machine_mode mode = <MODE>mode;
+  rtx cmp_op = operands[2];
 
-  if (!register_operand (operands[2], mode))
-    operands[2] = force_reg (mode, operands[2]);
+  if (!register_operand (cmp_op, mode))
+    operands[2] = force_reg (mode, cmp_op);
 
   enum rtx_code code = <maxmin_rel>;
-  machine_mode cmpmode = SELECT_CC_MODE (code, operands[1], operands[2]);
+
+  if (cmp_op == const1_rtx)
+    {
+      /* Convert smax (x, 1) into (x > 0 ? x : 1).
+	 Convert umax (x, 1) into (x != 0 ? x : 1).
+	 Convert ?min (x, 1) into (x <= 0 ? x : 1).  */
+      cmp_op = const0_rtx;
+      if (code == GE)
+	code = GT;
+      else if (code == GEU)
+	code = NE;
+    }
+  /* Convert smin (x, -1) into (x < 0 ? x : -1).  */
+  else if (cmp_op == constm1_rtx && code == LE)
+    {
+      cmp_op = const0_rtx;
+      code = LT;
+    }
+  /* Convert smax (x, -1) into (x >= 0 ? x : -1).  */
+  else if (cmp_op == constm1_rtx && code == GE)
+    cmp_op = const0_rtx;
+  else if (cmp_op != const0_rtx)
+    cmp_op = operands[2];
+
+  machine_mode cmpmode = SELECT_CC_MODE (code, operands[1], cmp_op);
   rtx flags = gen_rtx_REG (cmpmode, FLAGS_REG);
 
-  rtx tmp = gen_rtx_COMPARE (cmpmode, operands[1], operands[2]);
+  rtx tmp = gen_rtx_COMPARE (cmpmode, operands[1], cmp_op);
   emit_insn (gen_rtx_SET (flags, tmp));
 
   operands[3] = gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
@@ -18856,9 +18879,9 @@
 (define_insn_and_split "*<code>di3_doubleword"
   [(set (match_operand:DI 0 "register_operand")
 	(maxmin:DI (match_operand:DI 1 "register_operand")
-		   (match_operand:DI 2 "nonimmediate_operand")))
+		   (match_operand:DI 2 "general_operand")))
    (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_64BIT && TARGET_STV && TARGET_AVX512VL
+  "!TARGET_64BIT && TARGET_CMOVE
    && ix86_pre_reload_split ()"
   "#"
   "&& 1"
@@ -18910,6 +18933,19 @@
       gcc_unreachable ();
     }
 })
+
+;; Avoid clearing a register between a flags setting comparison and its use,
+;; i.e. prefer "xorl %eax,%eax; test/cmp" over "test/cmp; movl $0, %eax".
+(define_peephole2
+  [(set (reg FLAGS_REG) (match_operand 0))
+   (set (match_operand:SWI 1 "register_operand") (const_int 0))]
+  "peep2_regno_dead_p (0, FLAGS_REG)
+   && !reg_overlap_mentioned_p (operands[1], operands[0])"
+   [(set (match_dup 2) (match_dup 0))]
+{
+  operands[2] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG);
+  ix86_expand_clear (operands[1]);
+})
 \f
 ;; Misc patterns (?)
 
diff --git a/gcc/testsuite/gcc.target/i386/minmax-10.c b/gcc/testsuite/gcc.target/i386/minmax-10.c
new file mode 100644
index 00000000000..b044462c5a9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/minmax-10.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#define max(a,b) (((a) > (b))? (a) : (b))
+#define min(a,b) (((a) < (b))? (a) : (b))
+
+int smax1(int x)
+{
+  return max(x,1);
+}
+
+int smin1(int x)
+{
+  return min(x,1);
+}
+
+int smaxm1(int x)
+{
+  return max(x,-1);
+}
+
+int sminm1(int x)
+{
+  return min(x,-1);
+}
+
+unsigned int umax1(unsigned int x)
+{
+  return max(x,1);
+}
+
+unsigned int umin1(unsigned int x)
+{
+  return min(x,1);
+}
+
+/* { dg-final { scan-assembler-times "test" 6 } } */
+/* { dg-final { scan-assembler-not "cmp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/minmax-11.c b/gcc/testsuite/gcc.target/i386/minmax-11.c
new file mode 100644
index 00000000000..a8c2df54754
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/minmax-11.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-tree-reassoc" } */
+
+#define max(a,b) (((a) > (b))? (a) : (b))
+
+int foo(int x)
+{
+  int y = max(x,12345);
+  return max(y,87654);
+}
+
+/* { dg-final { scan-assembler-not "12345" } } */
diff --git a/gcc/testsuite/gcc.target/i386/minmax-8.c b/gcc/testsuite/gcc.target/i386/minmax-8.c
new file mode 100644
index 00000000000..1f7e4667e49
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/minmax-8.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-Os" } */
+
+#define max(a,b) (((a) > (b))? (a) : (b))
+#define min(a,b) (((a) < (b))? (a) : (b))
+
+int foo(int x)
+{
+  return max(x,12345);
+}
+
+int bar(int x)
+{
+  return min(x,87654);
+}
+
+/* { dg-final { scan-assembler-times "12345" 1 } } */
+/* { dg-final { scan-assembler-times "87654" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/minmax-9.c b/gcc/testsuite/gcc.target/i386/minmax-9.c
new file mode 100644
index 00000000000..3b940230bd4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/minmax-9.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-Os" } */
+
+#define max(a,b) (((a) > (b))? (a) : (b))
+#define min(a,b) (((a) < (b))? (a) : (b))
+
+int foo(int x)
+{
+  return max(x,0);
+}
+
+int bar(int x)
+{
+  return min(x,0);
+}
+
+unsigned int baz(unsigned int x)
+{
+  return min(x,1);
+}
+
+/* { dg-final { scan-assembler-times "xor" 3 } } */
+/* { dg-final { scan-assembler-times "test" 3 } } */


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2020-08-06  8:16 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-08-06  8:16 [gcc r11-2588] x86_64: Integer min/max improvements Roger Sayle

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).