public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH 1/2] [x86] Add pre_reload splitter to detect fp min/max pattern.
@ 2023-07-06  1:18 liuhongt
  2023-07-06  1:18 ` [PATCH 2/2] Adjust rtx_cost for DF/SFmode AND/IOR/XOR/ANDN operations liuhongt
  2023-07-06  6:19 ` [PATCH 1/2] [x86] Add pre_reload splitter to detect fp min/max pattern Uros Bizjak
  0 siblings, 2 replies; 7+ messages in thread
From: liuhongt @ 2023-07-06  1:18 UTC (permalink / raw)
  To: gcc-patches; +Cc: ubizjak

We have ix86_expand_sse_fp_minmax to detect min/max sematics, but
it requires rtx_equal_p for cmp_op0/cmp_op1 and if_true/if_false, for
the testcase in the PR, there's an extra move from cmp_op0 to if_true,
and it failed ix86_expand_sse_fp_minmax.

This patch adds pre_reload splitter to detect the min/max pattern.

Operands order in MINSS matters for signed zero and NANs, since the
instruction always returns second operand when any operand is NAN or
both operands are zero.

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ok for trunk?

gcc/ChangeLog:

	PR target/110170
	* config/i386/i386.md (*ieee_minmax<mode>3_1): New pre_reload
	splitter to detect fp min/max pattern.

gcc/testsuite/ChangeLog:

	* g++.target/i386/pr110170.C: New test.
	* gcc.target/i386/pr110170.c: New test.
---
 gcc/config/i386/i386.md                  | 30 +++++++++
 gcc/testsuite/g++.target/i386/pr110170.C | 78 ++++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr110170.c | 18 ++++++
 3 files changed, 126 insertions(+)
 create mode 100644 gcc/testsuite/g++.target/i386/pr110170.C
 create mode 100644 gcc/testsuite/gcc.target/i386/pr110170.c

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index e6ebc461e52..353bb21993d 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -22483,6 +22483,36 @@ (define_insn "*ieee_s<ieee_maxmin><mode>3"
    (set_attr "type" "sseadd")
    (set_attr "mode" "<MODE>")])
 
+;; Operands order in min/max instruction matters for signed zero and NANs.
+(define_insn_and_split "*ieee_minmax<mode>3_1"
+  [(set (match_operand:MODEF 0 "register_operand")
+	(unspec:MODEF
+	  [(match_operand:MODEF 1 "register_operand")
+	   (match_operand:MODEF 2 "register_operand")
+	   (lt:MODEF
+	     (match_operand:MODEF 3 "register_operand")
+	     (match_operand:MODEF 4 "register_operand"))]
+	  UNSPEC_BLENDV))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+  && ((rtx_equal_p (operands[1], operands[3])
+       && rtx_equal_p (operands[2], operands[4]))
+      || (rtx_equal_p (operands[1], operands[4])
+	  && rtx_equal_p (operands[2], operands[3])))
+  && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  int u = (rtx_equal_p (operands[1], operands[3])
+	   && rtx_equal_p (operands[2], operands[4]))
+	   ? UNSPEC_IEEE_MAX : UNSPEC_IEEE_MIN;
+  emit_move_insn (operands[0],
+		  gen_rtx_UNSPEC (<MODE>mode,
+				  gen_rtvec (2, operands[2], operands[1]),
+				  u));
+  DONE;
+})
+
 ;; Make two stack loads independent:
 ;;   fld aa              fld aa
 ;;   fld %st(0)     ->   fld bb
diff --git a/gcc/testsuite/g++.target/i386/pr110170.C b/gcc/testsuite/g++.target/i386/pr110170.C
new file mode 100644
index 00000000000..1e9a781ca74
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr110170.C
@@ -0,0 +1,78 @@
+/* { dg-do run } */
+/* { dg-options " -O2 -march=x86-64 -mfpmath=sse -std=gnu++20" } */
+#include <math.h>
+
+void
+__attribute__((noinline))
+__cond_swap(double* __x, double* __y) {
+  bool __r = (*__x < *__y);
+  auto __tmp = __r ? *__x : *__y;
+  *__y = __r ? *__y : *__x;
+  *__x = __tmp;
+}
+
+auto test1() {
+    double nan = -0.0;
+    double x = 0.0;
+    __cond_swap(&nan, &x);
+    return x == -0.0 && nan == 0.0;
+}
+
+auto test1r() {
+    double nan = NAN;
+    double x = 1.0;
+    __cond_swap(&x, &nan);
+    return isnan(x) && signbit(x) == 0 && nan == 1.0;
+}
+
+auto test2() {
+    double nan = NAN;
+    double x = -1.0;
+    __cond_swap(&nan, &x);
+    return isnan(x) && signbit(x) == 0 && nan == -1.0;
+}
+
+auto test2r() {
+    double nan = NAN;
+    double x = -1.0;
+    __cond_swap(&x, &nan);
+    return isnan(x) && signbit(x) == 0 && nan == -1.0;
+}
+
+auto test3() {
+    double nan = -NAN;
+    double x = 1.0;
+    __cond_swap(&nan, &x);
+    return isnan(x) && signbit(x) == 1 && nan == 1.0;
+}
+
+auto test3r() {
+    double nan = -NAN;
+    double x = 1.0;
+    __cond_swap(&x, &nan);
+    return isnan(x) && signbit(x) == 1 && nan == 1.0;
+}
+
+auto test4() {
+    double nan = -NAN;
+    double x = -1.0;
+    __cond_swap(&nan, &x);
+    return isnan(x) && signbit(x) == 1 && nan == -1.0;
+}
+
+auto test4r() {
+    double nan = -NAN;
+    double x = -1.0;
+    __cond_swap(&x, &nan);
+    return isnan(x) && signbit(x) == 1 && nan == -1.0;
+}
+
+
+int main() {
+    if (
+        !test1() || !test1r()
+        || !test2() || !test2r()
+        || !test3() || !test4r()
+        || !test4() || !test4r()
+    ) __builtin_abort();
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr110170.c b/gcc/testsuite/gcc.target/i386/pr110170.c
new file mode 100644
index 00000000000..0f98545cce3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr110170.c
@@ -0,0 +1,18 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options " -O2 -march=x86-64-v2 -mfpmath=sse" } */
+/* { dg-final { scan-assembler-times {(?n)mins[sd]} 2 } } */
+/* { dg-final { scan-assembler-times {(?n)maxs[sd]} 2 } } */
+
+void __cond_swap_df(double* __x, double* __y) {
+  _Bool __r = (*__x < *__y);
+  double __tmp = __r ? *__x : *__y;
+  *__y = __r ? *__y : *__x;
+  *__x = __tmp;
+}
+
+void __cond_swap_sf(float* __x, float* __y) {
+  _Bool __r = (*__x < *__y);
+  float __tmp = __r ? *__x : *__y;
+  *__y = __r ? *__y : *__x;
+  *__x = __tmp;
+}
-- 
2.39.1.388.g2fc9e9ca3c


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2023-07-07  6:35 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-07-06  1:18 [PATCH 1/2] [x86] Add pre_reload splitter to detect fp min/max pattern liuhongt
2023-07-06  1:18 ` [PATCH 2/2] Adjust rtx_cost for DF/SFmode AND/IOR/XOR/ANDN operations liuhongt
2023-07-06  5:54   ` Uros Bizjak
2023-07-06  6:19 ` [PATCH 1/2] [x86] Add pre_reload splitter to detect fp min/max pattern Uros Bizjak
2023-07-07  5:29   ` [PATCH V2] " liuhongt
2023-07-07  6:02     ` Uros Bizjak
2023-07-07  6:41       ` Hongtao Liu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).