[PATCH 1/2] [x86] Add pre_reload splitter to detect fp min/max pattern.

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH 1/2] [x86] Add pre_reload splitter to detect fp min/max pattern.
@ 2023-07-06  1:18 liuhongt
  2023-07-06  1:18 ` [PATCH 2/2] Adjust rtx_cost for DF/SFmode AND/IOR/XOR/ANDN operations liuhongt
  2023-07-06  6:19 ` [PATCH 1/2] [x86] Add pre_reload splitter to detect fp min/max pattern Uros Bizjak
  0 siblings, 2 replies; 7+ messages in thread
From: liuhongt @ 2023-07-06  1:18 UTC (permalink / raw)
  To: gcc-patches; +Cc: ubizjak

We have ix86_expand_sse_fp_minmax to detect min/max sematics, but
it requires rtx_equal_p for cmp_op0/cmp_op1 and if_true/if_false, for
the testcase in the PR, there's an extra move from cmp_op0 to if_true,
and it failed ix86_expand_sse_fp_minmax.

This patch adds pre_reload splitter to detect the min/max pattern.

Operands order in MINSS matters for signed zero and NANs, since the
instruction always returns second operand when any operand is NAN or
both operands are zero.

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ok for trunk?

gcc/ChangeLog:

	PR target/110170
	* config/i386/i386.md (*ieee_minmax<mode>3_1): New pre_reload
	splitter to detect fp min/max pattern.

gcc/testsuite/ChangeLog:

	* g++.target/i386/pr110170.C: New test.
	* gcc.target/i386/pr110170.c: New test.
---
 gcc/config/i386/i386.md                  | 30 +++++++++
 gcc/testsuite/g++.target/i386/pr110170.C | 78 ++++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr110170.c | 18 ++++++
 3 files changed, 126 insertions(+)
 create mode 100644 gcc/testsuite/g++.target/i386/pr110170.C
 create mode 100644 gcc/testsuite/gcc.target/i386/pr110170.c

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index e6ebc461e52..353bb21993d 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -22483,6 +22483,36 @@ (define_insn "*ieee_s<ieee_maxmin><mode>3"
    (set_attr "type" "sseadd")
    (set_attr "mode" "<MODE>")])
 
+;; Operands order in min/max instruction matters for signed zero and NANs.
+(define_insn_and_split "*ieee_minmax<mode>3_1"
+  [(set (match_operand:MODEF 0 "register_operand")
+	(unspec:MODEF
+	  [(match_operand:MODEF 1 "register_operand")
+	   (match_operand:MODEF 2 "register_operand")
+	   (lt:MODEF
+	     (match_operand:MODEF 3 "register_operand")
+	     (match_operand:MODEF 4 "register_operand"))]
+	  UNSPEC_BLENDV))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+  && ((rtx_equal_p (operands[1], operands[3])
+       && rtx_equal_p (operands[2], operands[4]))
+      || (rtx_equal_p (operands[1], operands[4])
+	  && rtx_equal_p (operands[2], operands[3])))
+  && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  int u = (rtx_equal_p (operands[1], operands[3])
+	   && rtx_equal_p (operands[2], operands[4]))
+	   ? UNSPEC_IEEE_MAX : UNSPEC_IEEE_MIN;
+  emit_move_insn (operands[0],
+		  gen_rtx_UNSPEC (<MODE>mode,
+				  gen_rtvec (2, operands[2], operands[1]),
+				  u));
+  DONE;
+})
+
 ;; Make two stack loads independent:
 ;;   fld aa              fld aa
 ;;   fld %st(0)     ->   fld bb
diff --git a/gcc/testsuite/g++.target/i386/pr110170.C b/gcc/testsuite/g++.target/i386/pr110170.C
new file mode 100644
index 00000000000..1e9a781ca74
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr110170.C
@@ -0,0 +1,78 @@
+/* { dg-do run } */
+/* { dg-options " -O2 -march=x86-64 -mfpmath=sse -std=gnu++20" } */
+#include <math.h>
+
+void
+__attribute__((noinline))
+__cond_swap(double* __x, double* __y) {
+  bool __r = (*__x < *__y);
+  auto __tmp = __r ? *__x : *__y;
+  *__y = __r ? *__y : *__x;
+  *__x = __tmp;
+}
+
+auto test1() {
+    double nan = -0.0;
+    double x = 0.0;
+    __cond_swap(&nan, &x);
+    return x == -0.0 && nan == 0.0;
+}
+
+auto test1r() {
+    double nan = NAN;
+    double x = 1.0;
+    __cond_swap(&x, &nan);
+    return isnan(x) && signbit(x) == 0 && nan == 1.0;
+}
+
+auto test2() {
+    double nan = NAN;
+    double x = -1.0;
+    __cond_swap(&nan, &x);
+    return isnan(x) && signbit(x) == 0 && nan == -1.0;
+}
+
+auto test2r() {
+    double nan = NAN;
+    double x = -1.0;
+    __cond_swap(&x, &nan);
+    return isnan(x) && signbit(x) == 0 && nan == -1.0;
+}
+
+auto test3() {
+    double nan = -NAN;
+    double x = 1.0;
+    __cond_swap(&nan, &x);
+    return isnan(x) && signbit(x) == 1 && nan == 1.0;
+}
+
+auto test3r() {
+    double nan = -NAN;
+    double x = 1.0;
+    __cond_swap(&x, &nan);
+    return isnan(x) && signbit(x) == 1 && nan == 1.0;
+}
+
+auto test4() {
+    double nan = -NAN;
+    double x = -1.0;
+    __cond_swap(&nan, &x);
+    return isnan(x) && signbit(x) == 1 && nan == -1.0;
+}
+
+auto test4r() {
+    double nan = -NAN;
+    double x = -1.0;
+    __cond_swap(&x, &nan);
+    return isnan(x) && signbit(x) == 1 && nan == -1.0;
+}
+
+
+int main() {
+    if (
+        !test1() || !test1r()
+        || !test2() || !test2r()
+        || !test3() || !test4r()
+        || !test4() || !test4r()
+    ) __builtin_abort();
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr110170.c b/gcc/testsuite/gcc.target/i386/pr110170.c
new file mode 100644
index 00000000000..0f98545cce3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr110170.c
@@ -0,0 +1,18 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options " -O2 -march=x86-64-v2 -mfpmath=sse" } */
+/* { dg-final { scan-assembler-times {(?n)mins[sd]} 2 } } */
+/* { dg-final { scan-assembler-times {(?n)maxs[sd]} 2 } } */
+
+void __cond_swap_df(double* __x, double* __y) {
+  _Bool __r = (*__x < *__y);
+  double __tmp = __r ? *__x : *__y;
+  *__y = __r ? *__y : *__x;
+  *__x = __tmp;
+}
+
+void __cond_swap_sf(float* __x, float* __y) {
+  _Bool __r = (*__x < *__y);
+  float __tmp = __r ? *__x : *__y;
+  *__y = __r ? *__y : *__x;
+  *__x = __tmp;
+}
-- 
2.39.1.388.g2fc9e9ca3c


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 2/2] Adjust rtx_cost for DF/SFmode AND/IOR/XOR/ANDN operations.
  2023-07-06  1:18 [PATCH 1/2] [x86] Add pre_reload splitter to detect fp min/max pattern liuhongt
@ 2023-07-06  1:18 ` liuhongt
  2023-07-06  5:54   ` Uros Bizjak
  2023-07-06  6:19 ` [PATCH 1/2] [x86] Add pre_reload splitter to detect fp min/max pattern Uros Bizjak
  1 sibling, 1 reply; 7+ messages in thread
From: liuhongt @ 2023-07-06  1:18 UTC (permalink / raw)
  To: gcc-patches; +Cc: ubizjak

They should have same cost as vector mode since both generate
pand/pandn/pxor/por instruction.

Bootstrapped and regtested on x86_64-pc-linu-gnu{-m32,}.
Ok for trunk?

gcc/ChangeLog:

	* config/i386/i386.cc (ix86_rtx_costs): Adjust rtx_cost for
	DF/SFmode AND/IOR/XOR/ANDN operations.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/pr110170-2.c: New test.
---
 gcc/config/i386/i386.cc                    |  6 ++++--
 gcc/testsuite/gcc.target/i386/pr110170-2.c | 16 ++++++++++++++++
 2 files changed, 20 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr110170-2.c

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index d4ff56ee8dd..fe31acd7646 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -21153,7 +21153,8 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
 
     case IOR:
     case XOR:
-      if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+      if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+	  || SSE_FLOAT_MODE_P (mode))
 	*total = ix86_vec_cost (mode, cost->sse_op);
       else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
 	*total = cost->add * 2;
@@ -21167,7 +21168,8 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
 	  *total = cost->lea;
 	  return true;
 	}
-      else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+      else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+	       || SSE_FLOAT_MODE_P (mode))
 	{
 	  /* pandn is a single instruction.  */
 	  if (GET_CODE (XEXP (x, 0)) == NOT)
diff --git a/gcc/testsuite/gcc.target/i386/pr110170-2.c b/gcc/testsuite/gcc.target/i386/pr110170-2.c
new file mode 100644
index 00000000000..d43e322fc49
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr110170-2.c
@@ -0,0 +1,16 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-msse2 -O2 -mfpmath=sse" } */
+/* { dg-final { scan-assembler-not "comi" } }  */
+
+double
+foo (double* a, double* b, double c, double d)
+{
+  return *a < *b ? c : d;
+}
+
+float
+foo1 (float* a, float* b, float c, float d)
+{
+  return *a < *b ? c : d;
+}
+
-- 
2.39.1.388.g2fc9e9ca3c


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] Adjust rtx_cost for DF/SFmode AND/IOR/XOR/ANDN operations.
  2023-07-06  1:18 ` [PATCH 2/2] Adjust rtx_cost for DF/SFmode AND/IOR/XOR/ANDN operations liuhongt
@ 2023-07-06  5:54   ` Uros Bizjak
  0 siblings, 0 replies; 7+ messages in thread
From: Uros Bizjak @ 2023-07-06  5:54 UTC (permalink / raw)
  To: liuhongt; +Cc: gcc-patches

On Thu, Jul 6, 2023 at 3:20 AM liuhongt <hongtao.liu@intel.com> wrote:
>
> They should have same cost as vector mode since both generate
> pand/pandn/pxor/por instruction.
>
> Bootstrapped and regtested on x86_64-pc-linu-gnu{-m32,}.
> Ok for trunk?
>
> gcc/ChangeLog:
>
>         * config/i386/i386.cc (ix86_rtx_costs): Adjust rtx_cost for
>         DF/SFmode AND/IOR/XOR/ANDN operations.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/pr110170-2.c: New test.

OK.

Thanks,
Uros.

> ---
>  gcc/config/i386/i386.cc                    |  6 ++++--
>  gcc/testsuite/gcc.target/i386/pr110170-2.c | 16 ++++++++++++++++
>  2 files changed, 20 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr110170-2.c
>
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index d4ff56ee8dd..fe31acd7646 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -21153,7 +21153,8 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
>
>      case IOR:
>      case XOR:
> -      if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
> +      if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
> +         || SSE_FLOAT_MODE_P (mode))
>         *total = ix86_vec_cost (mode, cost->sse_op);
>        else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
>         *total = cost->add * 2;
> @@ -21167,7 +21168,8 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
>           *total = cost->lea;
>           return true;
>         }
> -      else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
> +      else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
> +              || SSE_FLOAT_MODE_P (mode))
>         {
>           /* pandn is a single instruction.  */
>           if (GET_CODE (XEXP (x, 0)) == NOT)
> diff --git a/gcc/testsuite/gcc.target/i386/pr110170-2.c b/gcc/testsuite/gcc.target/i386/pr110170-2.c
> new file mode 100644
> index 00000000000..d43e322fc49
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr110170-2.c
> @@ -0,0 +1,16 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-msse2 -O2 -mfpmath=sse" } */
> +/* { dg-final { scan-assembler-not "comi" } }  */
> +
> +double
> +foo (double* a, double* b, double c, double d)
> +{
> +  return *a < *b ? c : d;
> +}
> +
> +float
> +foo1 (float* a, float* b, float c, float d)
> +{
> +  return *a < *b ? c : d;
> +}
> +
> --
> 2.39.1.388.g2fc9e9ca3c
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/2] [x86] Add pre_reload splitter to detect fp min/max pattern.
  2023-07-06  1:18 [PATCH 1/2] [x86] Add pre_reload splitter to detect fp min/max pattern liuhongt
  2023-07-06  1:18 ` [PATCH 2/2] Adjust rtx_cost for DF/SFmode AND/IOR/XOR/ANDN operations liuhongt
@ 2023-07-06  6:19 ` Uros Bizjak
  2023-07-07  5:29   ` [PATCH V2] " liuhongt
  1 sibling, 1 reply; 7+ messages in thread
From: Uros Bizjak @ 2023-07-06  6:19 UTC (permalink / raw)
  To: liuhongt; +Cc: gcc-patches

On Thu, Jul 6, 2023 at 3:20 AM liuhongt <hongtao.liu@intel.com> wrote:
>
> We have ix86_expand_sse_fp_minmax to detect min/max sematics, but
> it requires rtx_equal_p for cmp_op0/cmp_op1 and if_true/if_false, for
> the testcase in the PR, there's an extra move from cmp_op0 to if_true,
> and it failed ix86_expand_sse_fp_minmax.
>
> This patch adds pre_reload splitter to detect the min/max pattern.
>
> Operands order in MINSS matters for signed zero and NANs, since the
> instruction always returns second operand when any operand is NAN or
> both operands are zero.
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Ok for trunk?
>
> gcc/ChangeLog:
>
>         PR target/110170
>         * config/i386/i386.md (*ieee_minmax<mode>3_1): New pre_reload
>         splitter to detect fp min/max pattern.
>
> gcc/testsuite/ChangeLog:
>
>         * g++.target/i386/pr110170.C: New test.
>         * gcc.target/i386/pr110170.c: New test.
> ---
>  gcc/config/i386/i386.md                  | 30 +++++++++
>  gcc/testsuite/g++.target/i386/pr110170.C | 78 ++++++++++++++++++++++++
>  gcc/testsuite/gcc.target/i386/pr110170.c | 18 ++++++
>  3 files changed, 126 insertions(+)
>  create mode 100644 gcc/testsuite/g++.target/i386/pr110170.C
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr110170.c
>
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index e6ebc461e52..353bb21993d 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -22483,6 +22483,36 @@ (define_insn "*ieee_s<ieee_maxmin><mode>3"
>     (set_attr "type" "sseadd")
>     (set_attr "mode" "<MODE>")])
>
> +;; Operands order in min/max instruction matters for signed zero and NANs.
> +(define_insn_and_split "*ieee_minmax<mode>3_1"
> +  [(set (match_operand:MODEF 0 "register_operand")
> +       (unspec:MODEF
> +         [(match_operand:MODEF 1 "register_operand")
> +          (match_operand:MODEF 2 "register_operand")
> +          (lt:MODEF
> +            (match_operand:MODEF 3 "register_operand")
> +            (match_operand:MODEF 4 "register_operand"))]
> +         UNSPEC_BLENDV))]
> +  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
> +  && ((rtx_equal_p (operands[1], operands[3])
> +       && rtx_equal_p (operands[2], operands[4]))
> +      || (rtx_equal_p (operands[1], operands[4])
> +         && rtx_equal_p (operands[2], operands[3])))
> +  && ix86_pre_reload_split ()"
> +  "#"
> +  "&& 1"
> +  [(const_int 0)]
> +{
> +  int u = (rtx_equal_p (operands[1], operands[3])
> +          && rtx_equal_p (operands[2], operands[4]))
> +          ? UNSPEC_IEEE_MAX : UNSPEC_IEEE_MIN;
> +  emit_move_insn (operands[0],
> +                 gen_rtx_UNSPEC (<MODE>mode,
> +                                 gen_rtvec (2, operands[2], operands[1]),
> +                                 u));
> +  DONE;
> +})

Please split the above pattern into two, one emitting UNSPEC_IEEE_MAX
and the other emitting UNSPEC_IEEE_MIN.

> +
>  ;; Make two stack loads independent:
>  ;;   fld aa              fld aa
>  ;;   fld %st(0)     ->   fld bb
> diff --git a/gcc/testsuite/g++.target/i386/pr110170.C b/gcc/testsuite/g++.target/i386/pr110170.C
> new file mode 100644
> index 00000000000..1e9a781ca74
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr110170.C
> @@ -0,0 +1,78 @@
> +/* { dg-do run } */
> +/* { dg-options " -O2 -march=x86-64 -mfpmath=sse -std=gnu++20" } */

The test involves blendv instruction, which is SSE4.1, so it is
pointless to test it without -msse4.1. Please add -msse4.1 instead of
-march=x86_64 and use sse4_runtime target selector, as is the case
with gcc.target/i386/pr90358.c.

> +#include <math.h>
> +
> +void
> +__attribute__((noinline))
> +__cond_swap(double* __x, double* __y) {
> +  bool __r = (*__x < *__y);
> +  auto __tmp = __r ? *__x : *__y;
> +  *__y = __r ? *__y : *__x;
> +  *__x = __tmp;
> +}
> +
> +auto test1() {
> +    double nan = -0.0;
> +    double x = 0.0;
> +    __cond_swap(&nan, &x);
> +    return x == -0.0 && nan == 0.0;
> +}
> +
> +auto test1r() {
> +    double nan = NAN;
> +    double x = 1.0;
> +    __cond_swap(&x, &nan);
> +    return isnan(x) && signbit(x) == 0 && nan == 1.0;
> +}
> +
> +auto test2() {
> +    double nan = NAN;
> +    double x = -1.0;
> +    __cond_swap(&nan, &x);
> +    return isnan(x) && signbit(x) == 0 && nan == -1.0;
> +}
> +
> +auto test2r() {
> +    double nan = NAN;
> +    double x = -1.0;
> +    __cond_swap(&x, &nan);
> +    return isnan(x) && signbit(x) == 0 && nan == -1.0;
> +}
> +
> +auto test3() {
> +    double nan = -NAN;
> +    double x = 1.0;
> +    __cond_swap(&nan, &x);
> +    return isnan(x) && signbit(x) == 1 && nan == 1.0;
> +}
> +
> +auto test3r() {
> +    double nan = -NAN;
> +    double x = 1.0;
> +    __cond_swap(&x, &nan);
> +    return isnan(x) && signbit(x) == 1 && nan == 1.0;
> +}
> +
> +auto test4() {
> +    double nan = -NAN;
> +    double x = -1.0;
> +    __cond_swap(&nan, &x);
> +    return isnan(x) && signbit(x) == 1 && nan == -1.0;
> +}
> +
> +auto test4r() {
> +    double nan = -NAN;
> +    double x = -1.0;
> +    __cond_swap(&x, &nan);
> +    return isnan(x) && signbit(x) == 1 && nan == -1.0;
> +}
> +
> +
> +int main() {
> +    if (
> +        !test1() || !test1r()
> +        || !test2() || !test2r()
> +        || !test3() || !test4r()
> +        || !test4() || !test4r()
> +    ) __builtin_abort();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr110170.c b/gcc/testsuite/gcc.target/i386/pr110170.c
> new file mode 100644
> index 00000000000..0f98545cce3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr110170.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options " -O2 -march=x86-64-v2 -mfpmath=sse" } */

Please also use -msse4.1 instead of -march here. With -mfpmath=sse,
the test is valid also for 32bit targets, you should use -msseregparm
additional options for ia32 (please see gcc.target/i386/pr43546.c
testcase) in the same way as -mregparm to pass SSE arguments in
registers.

Uros.

> +/* { dg-final { scan-assembler-times {(?n)mins[sd]} 2 } } */
> +/* { dg-final { scan-assembler-times {(?n)maxs[sd]} 2 } } */
> +
> +void __cond_swap_df(double* __x, double* __y) {
> +  _Bool __r = (*__x < *__y);
> +  double __tmp = __r ? *__x : *__y;
> +  *__y = __r ? *__y : *__x;
> +  *__x = __tmp;
> +}
> +
> +void __cond_swap_sf(float* __x, float* __y) {
> +  _Bool __r = (*__x < *__y);
> +  float __tmp = __r ? *__x : *__y;
> +  *__y = __r ? *__y : *__x;
> +  *__x = __tmp;
> +}
> --
> 2.39.1.388.g2fc9e9ca3c
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH V2] [x86] Add pre_reload splitter to detect fp min/max pattern.
  2023-07-06  6:19 ` [PATCH 1/2] [x86] Add pre_reload splitter to detect fp min/max pattern Uros Bizjak
@ 2023-07-07  5:29   ` liuhongt
  2023-07-07  6:02     ` Uros Bizjak
  0 siblings, 1 reply; 7+ messages in thread
From: liuhongt @ 2023-07-07  5:29 UTC (permalink / raw)
  To: gcc-patches; +Cc: ubizjak

> Please split the above pattern into two, one emitting UNSPEC_IEEE_MAX
> and the other emitting UNSPEC_IEEE_MIN.
Splitted.

> The test involves blendv instruction, which is SSE4.1, so it is
> pointless to test it without -msse4.1. Please add -msse4.1 instead of
> -march=x86_64 and use sse4_runtime target selector, as is the case
> with gcc.target/i386/pr90358.c.
Changed.

> Please also use -msse4.1 instead of -march here. With -mfpmath=sse,
> the test is valid also for 32bit targets, you should use -msseregparm
> additional options for ia32 (please see gcc.target/i386/pr43546.c
> testcase) in the same way as -mregparm to pass SSE arguments in
> registers.
32-bit target still failed to do condition elimination for DFmode due to
below code in rtx_cost

  /* A size N times larger than UNITS_PER_WORD likely needs N times as
     many insns, taking N times as long.  */
  factor = mode_size > UNITS_PER_WORD ? mode_size / UNITS_PER_WORD : 1;

It looks like a separate issue for DFmode operation under 32-bit target.

I've enable 32-bit for the testcase, but only scan for minss/maxss
currently.

Here's updated patch.
Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ok for trunk?

We have ix86_expand_sse_fp_minmax to detect min/max sematics, but
it requires rtx_equal_p for cmp_op0/cmp_op1 and if_true/if_false, for
the testcase in the PR, there's an extra move from cmp_op0 to if_true,
and it failed ix86_expand_sse_fp_minmax.

This patch adds pre_reload splitter to detect the min/max pattern.

Operands order in MINSS matters for signed zero and NANs, since the
instruction always returns second operand when any operand is NAN or
both operands are zero.

gcc/ChangeLog:

	PR target/110170
	* config/i386/i386.md (*ieee_max<mode>3_1): New pre_reload
	splitter to detect fp max pattern.
	(*ieee_min<mode>3_1): Ditto, but for fp min pattern.

gcc/testsuite/ChangeLog:

	* g++.target/i386/pr110170.C: New test.
	* gcc.target/i386/pr110170.c: New test.
---
 gcc/config/i386/i386.md                  | 43 +++++++++++++
 gcc/testsuite/g++.target/i386/pr110170.C | 78 ++++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr110170.c | 21 +++++++
 3 files changed, 142 insertions(+)
 create mode 100644 gcc/testsuite/g++.target/i386/pr110170.C
 create mode 100644 gcc/testsuite/gcc.target/i386/pr110170.c

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index a82cc353cfd..6f415f899ae 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -23163,6 +23163,49 @@ (define_insn "*ieee_s<ieee_maxmin><mode>3"
    (set_attr "type" "sseadd")
    (set_attr "mode" "<MODE>")])
 
+;; Operands order in min/max instruction matters for signed zero and NANs.
+(define_insn_and_split "*ieee_max<mode>3_1"
+  [(set (match_operand:MODEF 0 "register_operand")
+	(unspec:MODEF
+	  [(match_operand:MODEF 1 "register_operand")
+	   (match_operand:MODEF 2 "register_operand")
+	   (lt:MODEF
+	     (match_operand:MODEF 3 "register_operand")
+	     (match_operand:MODEF 4 "register_operand"))]
+	  UNSPEC_BLENDV))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+  && (rtx_equal_p (operands[1], operands[3])
+      && rtx_equal_p (operands[2], operands[4]))
+  && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(unspec:MODEF
+	  [(match_dup 2)
+	   (match_dup 1)]
+	 UNSPEC_IEEE_MAX))])
+
+(define_insn_and_split "*ieee_min<mode>3_1"
+  [(set (match_operand:MODEF 0 "register_operand")
+	(unspec:MODEF
+	  [(match_operand:MODEF 1 "register_operand")
+	   (match_operand:MODEF 2 "register_operand")
+	   (lt:MODEF
+	     (match_operand:MODEF 3 "register_operand")
+	     (match_operand:MODEF 4 "register_operand"))]
+	  UNSPEC_BLENDV))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+  && (rtx_equal_p (operands[1], operands[4])
+      && rtx_equal_p (operands[2], operands[3]))
+  && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(unspec:MODEF
+	  [(match_dup 2)
+	   (match_dup 1)]
+	 UNSPEC_IEEE_MIN))])
+
 ;; Make two stack loads independent:
 ;;   fld aa              fld aa
 ;;   fld %st(0)     ->   fld bb
diff --git a/gcc/testsuite/g++.target/i386/pr110170.C b/gcc/testsuite/g++.target/i386/pr110170.C
new file mode 100644
index 00000000000..5d6842270d0
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr110170.C
@@ -0,0 +1,78 @@
+/* { dg-do run } */
+/* { dg-options " -O2 -msse4.1 -mfpmath=sse -std=gnu++20" } */
+#include <math.h>
+
+void
+__attribute__((noinline))
+__cond_swap(double* __x, double* __y) {
+  bool __r = (*__x < *__y);
+  auto __tmp = __r ? *__x : *__y;
+  *__y = __r ? *__y : *__x;
+  *__x = __tmp;
+}
+
+auto test1() {
+    double nan = -0.0;
+    double x = 0.0;
+    __cond_swap(&nan, &x);
+    return x == -0.0 && nan == 0.0;
+}
+
+auto test1r() {
+    double nan = NAN;
+    double x = 1.0;
+    __cond_swap(&x, &nan);
+    return isnan(x) && signbit(x) == 0 && nan == 1.0;
+}
+
+auto test2() {
+    double nan = NAN;
+    double x = -1.0;
+    __cond_swap(&nan, &x);
+    return isnan(x) && signbit(x) == 0 && nan == -1.0;
+}
+
+auto test2r() {
+    double nan = NAN;
+    double x = -1.0;
+    __cond_swap(&x, &nan);
+    return isnan(x) && signbit(x) == 0 && nan == -1.0;
+}
+
+auto test3() {
+    double nan = -NAN;
+    double x = 1.0;
+    __cond_swap(&nan, &x);
+    return isnan(x) && signbit(x) == 1 && nan == 1.0;
+}
+
+auto test3r() {
+    double nan = -NAN;
+    double x = 1.0;
+    __cond_swap(&x, &nan);
+    return isnan(x) && signbit(x) == 1 && nan == 1.0;
+}
+
+auto test4() {
+    double nan = -NAN;
+    double x = -1.0;
+    __cond_swap(&nan, &x);
+    return isnan(x) && signbit(x) == 1 && nan == -1.0;
+}
+
+auto test4r() {
+    double nan = -NAN;
+    double x = -1.0;
+    __cond_swap(&x, &nan);
+    return isnan(x) && signbit(x) == 1 && nan == -1.0;
+}
+
+
+int main() {
+    if (
+        !test1() || !test1r()
+        || !test2() || !test2r()
+        || !test3() || !test4r()
+        || !test4() || !test4r()
+    ) __builtin_abort();
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr110170.c b/gcc/testsuite/gcc.target/i386/pr110170.c
new file mode 100644
index 00000000000..c72f73398a1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr110170.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options " -O2 -msse4.1 -mfpmath=sse" } */
+/* { dg-final { scan-assembler-times {(?n)mins[sd]} 2 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times {(?n)maxs[sd]} 2 { target { ! ia32 } } } } */
+/* Ideally cond_swap_df is also optimized to minsd/maxsd.  */
+/* { dg-final { scan-assembler-times {(?n)mins[sd]} 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-times {(?n)maxs[sd]} 1 { target ia32 } } } */
+
+void __cond_swap_df(double* __x, double* __y) {
+  _Bool __r = (*__x < *__y);
+  double __tmp = __r ? *__x : *__y;
+  *__y = __r ? *__y : *__x;
+  *__x = __tmp;
+}
+
+void __cond_swap_sf(float* __x, float* __y) {
+  _Bool __r = (*__x < *__y);
+  float __tmp = __r ? *__x : *__y;
+  *__y = __r ? *__y : *__x;
+  *__x = __tmp;
+}
-- 
2.39.1.388.g2fc9e9ca3c


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH V2] [x86] Add pre_reload splitter to detect fp min/max pattern.
  2023-07-07  5:29   ` [PATCH V2] " liuhongt
@ 2023-07-07  6:02     ` Uros Bizjak
  2023-07-07  6:41       ` Hongtao Liu
  0 siblings, 1 reply; 7+ messages in thread
From: Uros Bizjak @ 2023-07-07  6:02 UTC (permalink / raw)
  To: liuhongt; +Cc: gcc-patches

On Fri, Jul 7, 2023 at 7:31 AM liuhongt <hongtao.liu@intel.com> wrote:
>
> > Please split the above pattern into two, one emitting UNSPEC_IEEE_MAX
> > and the other emitting UNSPEC_IEEE_MIN.
> Splitted.
>
> > The test involves blendv instruction, which is SSE4.1, so it is
> > pointless to test it without -msse4.1. Please add -msse4.1 instead of
> > -march=x86_64 and use sse4_runtime target selector, as is the case
> > with gcc.target/i386/pr90358.c.
> Changed.
>
> > Please also use -msse4.1 instead of -march here. With -mfpmath=sse,
> > the test is valid also for 32bit targets, you should use -msseregparm
> > additional options for ia32 (please see gcc.target/i386/pr43546.c
> > testcase) in the same way as -mregparm to pass SSE arguments in
> > registers.
> 32-bit target still failed to do condition elimination for DFmode due to
> below code in rtx_cost
>
>   /* A size N times larger than UNITS_PER_WORD likely needs N times as
>      many insns, taking N times as long.  */
>   factor = mode_size > UNITS_PER_WORD ? mode_size / UNITS_PER_WORD : 1;
>
> It looks like a separate issue for DFmode operation under 32-bit target.
>
> I've enable 32-bit for the testcase, but only scan for minss/maxss
> currently.
>
> Here's updated patch.
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Ok for trunk?
>
> We have ix86_expand_sse_fp_minmax to detect min/max sematics, but
> it requires rtx_equal_p for cmp_op0/cmp_op1 and if_true/if_false, for
> the testcase in the PR, there's an extra move from cmp_op0 to if_true,
> and it failed ix86_expand_sse_fp_minmax.
>
> This patch adds pre_reload splitter to detect the min/max pattern.
>
> Operands order in MINSS matters for signed zero and NANs, since the
> instruction always returns second operand when any operand is NAN or
> both operands are zero.
>
> gcc/ChangeLog:
>
>         PR target/110170
>         * config/i386/i386.md (*ieee_max<mode>3_1): New pre_reload
>         splitter to detect fp max pattern.
>         (*ieee_min<mode>3_1): Ditto, but for fp min pattern.
>
> gcc/testsuite/ChangeLog:
>
>         * g++.target/i386/pr110170.C: New test.
>         * gcc.target/i386/pr110170.c: New test.

OK with a testcase fix below.

Uros.

> ---
>  gcc/config/i386/i386.md                  | 43 +++++++++++++
>  gcc/testsuite/g++.target/i386/pr110170.C | 78 ++++++++++++++++++++++++
>  gcc/testsuite/gcc.target/i386/pr110170.c | 21 +++++++
>  3 files changed, 142 insertions(+)
>  create mode 100644 gcc/testsuite/g++.target/i386/pr110170.C
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr110170.c
>
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index a82cc353cfd..6f415f899ae 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -23163,6 +23163,49 @@ (define_insn "*ieee_s<ieee_maxmin><mode>3"
>     (set_attr "type" "sseadd")
>     (set_attr "mode" "<MODE>")])
>
> +;; Operands order in min/max instruction matters for signed zero and NANs.
> +(define_insn_and_split "*ieee_max<mode>3_1"
> +  [(set (match_operand:MODEF 0 "register_operand")
> +       (unspec:MODEF
> +         [(match_operand:MODEF 1 "register_operand")
> +          (match_operand:MODEF 2 "register_operand")
> +          (lt:MODEF
> +            (match_operand:MODEF 3 "register_operand")
> +            (match_operand:MODEF 4 "register_operand"))]
> +         UNSPEC_BLENDV))]
> +  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
> +  && (rtx_equal_p (operands[1], operands[3])
> +      && rtx_equal_p (operands[2], operands[4]))
> +  && ix86_pre_reload_split ()"
> +  "#"
> +  "&& 1"
> +  [(set (match_dup 0)
> +       (unspec:MODEF
> +         [(match_dup 2)
> +          (match_dup 1)]
> +        UNSPEC_IEEE_MAX))])
> +
> +(define_insn_and_split "*ieee_min<mode>3_1"
> +  [(set (match_operand:MODEF 0 "register_operand")
> +       (unspec:MODEF
> +         [(match_operand:MODEF 1 "register_operand")
> +          (match_operand:MODEF 2 "register_operand")
> +          (lt:MODEF
> +            (match_operand:MODEF 3 "register_operand")
> +            (match_operand:MODEF 4 "register_operand"))]
> +         UNSPEC_BLENDV))]
> +  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
> +  && (rtx_equal_p (operands[1], operands[4])
> +      && rtx_equal_p (operands[2], operands[3]))
> +  && ix86_pre_reload_split ()"
> +  "#"
> +  "&& 1"
> +  [(set (match_dup 0)
> +       (unspec:MODEF
> +         [(match_dup 2)
> +          (match_dup 1)]
> +        UNSPEC_IEEE_MIN))])
> +
>  ;; Make two stack loads independent:
>  ;;   fld aa              fld aa
>  ;;   fld %st(0)     ->   fld bb
> diff --git a/gcc/testsuite/g++.target/i386/pr110170.C b/gcc/testsuite/g++.target/i386/pr110170.C
> new file mode 100644
> index 00000000000..5d6842270d0
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr110170.C
> @@ -0,0 +1,78 @@
> +/* { dg-do run } */
> +/* { dg-options " -O2 -msse4.1 -mfpmath=sse -std=gnu++20" } */

Please either change the first line to:

{ dg-do run { target sse4_runtime } }

or add

{ dg-require-effective-target sse4_runtime }

to the runtime test.

> +#include <math.h>
> +
> +void
> +__attribute__((noinline))
> +__cond_swap(double* __x, double* __y) {
> +  bool __r = (*__x < *__y);
> +  auto __tmp = __r ? *__x : *__y;
> +  *__y = __r ? *__y : *__x;
> +  *__x = __tmp;
> +}
> +
> +auto test1() {
> +    double nan = -0.0;
> +    double x = 0.0;
> +    __cond_swap(&nan, &x);
> +    return x == -0.0 && nan == 0.0;
> +}
> +
> +auto test1r() {
> +    double nan = NAN;
> +    double x = 1.0;
> +    __cond_swap(&x, &nan);
> +    return isnan(x) && signbit(x) == 0 && nan == 1.0;
> +}
> +
> +auto test2() {
> +    double nan = NAN;
> +    double x = -1.0;
> +    __cond_swap(&nan, &x);
> +    return isnan(x) && signbit(x) == 0 && nan == -1.0;
> +}
> +
> +auto test2r() {
> +    double nan = NAN;
> +    double x = -1.0;
> +    __cond_swap(&x, &nan);
> +    return isnan(x) && signbit(x) == 0 && nan == -1.0;
> +}
> +
> +auto test3() {
> +    double nan = -NAN;
> +    double x = 1.0;
> +    __cond_swap(&nan, &x);
> +    return isnan(x) && signbit(x) == 1 && nan == 1.0;
> +}
> +
> +auto test3r() {
> +    double nan = -NAN;
> +    double x = 1.0;
> +    __cond_swap(&x, &nan);
> +    return isnan(x) && signbit(x) == 1 && nan == 1.0;
> +}
> +
> +auto test4() {
> +    double nan = -NAN;
> +    double x = -1.0;
> +    __cond_swap(&nan, &x);
> +    return isnan(x) && signbit(x) == 1 && nan == -1.0;
> +}
> +
> +auto test4r() {
> +    double nan = -NAN;
> +    double x = -1.0;
> +    __cond_swap(&x, &nan);
> +    return isnan(x) && signbit(x) == 1 && nan == -1.0;
> +}
> +
> +
> +int main() {
> +    if (
> +        !test1() || !test1r()
> +        || !test2() || !test2r()
> +        || !test3() || !test4r()
> +        || !test4() || !test4r()
> +    ) __builtin_abort();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr110170.c b/gcc/testsuite/gcc.target/i386/pr110170.c
> new file mode 100644
> index 00000000000..c72f73398a1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr110170.c
> @@ -0,0 +1,21 @@
> +/* { dg-do compile } */
> +/* { dg-options " -O2 -msse4.1 -mfpmath=sse" } */
> +/* { dg-final { scan-assembler-times {(?n)mins[sd]} 2 { target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-times {(?n)maxs[sd]} 2 { target { ! ia32 } } } } */
> +/* Ideally cond_swap_df is also optimized to minsd/maxsd.  */
> +/* { dg-final { scan-assembler-times {(?n)mins[sd]} 1 { target ia32 } } } */
> +/* { dg-final { scan-assembler-times {(?n)maxs[sd]} 1 { target ia32 } } } */
> +
> +void __cond_swap_df(double* __x, double* __y) {
> +  _Bool __r = (*__x < *__y);
> +  double __tmp = __r ? *__x : *__y;
> +  *__y = __r ? *__y : *__x;
> +  *__x = __tmp;
> +}
> +
> +void __cond_swap_sf(float* __x, float* __y) {
> +  _Bool __r = (*__x < *__y);
> +  float __tmp = __r ? *__x : *__y;
> +  *__y = __r ? *__y : *__x;
> +  *__x = __tmp;
> +}
> --
> 2.39.1.388.g2fc9e9ca3c
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH V2] [x86] Add pre_reload splitter to detect fp min/max pattern.
  2023-07-07  6:02     ` Uros Bizjak
@ 2023-07-07  6:41       ` Hongtao Liu
  0 siblings, 0 replies; 7+ messages in thread
From: Hongtao Liu @ 2023-07-07  6:41 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: liuhongt, gcc-patches

On Fri, Jul 7, 2023 at 2:02 PM Uros Bizjak via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> On Fri, Jul 7, 2023 at 7:31 AM liuhongt <hongtao.liu@intel.com> wrote:
> >
> > > Please split the above pattern into two, one emitting UNSPEC_IEEE_MAX
> > > and the other emitting UNSPEC_IEEE_MIN.
> > Splitted.
> >
> > > The test involves blendv instruction, which is SSE4.1, so it is
> > > pointless to test it without -msse4.1. Please add -msse4.1 instead of
> > > -march=x86_64 and use sse4_runtime target selector, as is the case
> > > with gcc.target/i386/pr90358.c.
> > Changed.
> >
> > > Please also use -msse4.1 instead of -march here. With -mfpmath=sse,
> > > the test is valid also for 32bit targets, you should use -msseregparm
> > > additional options for ia32 (please see gcc.target/i386/pr43546.c
> > > testcase) in the same way as -mregparm to pass SSE arguments in
> > > registers.
> > 32-bit target still failed to do condition elimination for DFmode due to
> > below code in rtx_cost
> >
> >   /* A size N times larger than UNITS_PER_WORD likely needs N times as
> >      many insns, taking N times as long.  */
> >   factor = mode_size > UNITS_PER_WORD ? mode_size / UNITS_PER_WORD : 1;
> >
> > It looks like a separate issue for DFmode operation under 32-bit target.
> >
> > I've enable 32-bit for the testcase, but only scan for minss/maxss
> > currently.
> >
> > Here's updated patch.
> > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> > Ok for trunk?
> >
> > We have ix86_expand_sse_fp_minmax to detect min/max sematics, but
> > it requires rtx_equal_p for cmp_op0/cmp_op1 and if_true/if_false, for
> > the testcase in the PR, there's an extra move from cmp_op0 to if_true,
> > and it failed ix86_expand_sse_fp_minmax.
> >
> > This patch adds pre_reload splitter to detect the min/max pattern.
> >
> > Operands order in MINSS matters for signed zero and NANs, since the
> > instruction always returns second operand when any operand is NAN or
> > both operands are zero.
> >
> > gcc/ChangeLog:
> >
> >         PR target/110170
> >         * config/i386/i386.md (*ieee_max<mode>3_1): New pre_reload
> >         splitter to detect fp max pattern.
> >         (*ieee_min<mode>3_1): Ditto, but for fp min pattern.
> >
> > gcc/testsuite/ChangeLog:
> >
> >         * g++.target/i386/pr110170.C: New test.
> >         * gcc.target/i386/pr110170.c: New test.
>
> OK with a testcase fix below.
>
> Uros.
>
> > ---
> >  gcc/config/i386/i386.md                  | 43 +++++++++++++
> >  gcc/testsuite/g++.target/i386/pr110170.C | 78 ++++++++++++++++++++++++
> >  gcc/testsuite/gcc.target/i386/pr110170.c | 21 +++++++
> >  3 files changed, 142 insertions(+)
> >  create mode 100644 gcc/testsuite/g++.target/i386/pr110170.C
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr110170.c
> >
> > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> > index a82cc353cfd..6f415f899ae 100644
> > --- a/gcc/config/i386/i386.md
> > +++ b/gcc/config/i386/i386.md
> > @@ -23163,6 +23163,49 @@ (define_insn "*ieee_s<ieee_maxmin><mode>3"
> >     (set_attr "type" "sseadd")
> >     (set_attr "mode" "<MODE>")])
> >
> > +;; Operands order in min/max instruction matters for signed zero and NANs.
> > +(define_insn_and_split "*ieee_max<mode>3_1"
> > +  [(set (match_operand:MODEF 0 "register_operand")
> > +       (unspec:MODEF
> > +         [(match_operand:MODEF 1 "register_operand")
> > +          (match_operand:MODEF 2 "register_operand")
> > +          (lt:MODEF
> > +            (match_operand:MODEF 3 "register_operand")
> > +            (match_operand:MODEF 4 "register_operand"))]
> > +         UNSPEC_BLENDV))]
> > +  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
> > +  && (rtx_equal_p (operands[1], operands[3])
> > +      && rtx_equal_p (operands[2], operands[4]))
> > +  && ix86_pre_reload_split ()"
> > +  "#"
> > +  "&& 1"
> > +  [(set (match_dup 0)
> > +       (unspec:MODEF
> > +         [(match_dup 2)
> > +          (match_dup 1)]
> > +        UNSPEC_IEEE_MAX))])
> > +
> > +(define_insn_and_split "*ieee_min<mode>3_1"
> > +  [(set (match_operand:MODEF 0 "register_operand")
> > +       (unspec:MODEF
> > +         [(match_operand:MODEF 1 "register_operand")
> > +          (match_operand:MODEF 2 "register_operand")
> > +          (lt:MODEF
> > +            (match_operand:MODEF 3 "register_operand")
> > +            (match_operand:MODEF 4 "register_operand"))]
> > +         UNSPEC_BLENDV))]
> > +  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
> > +  && (rtx_equal_p (operands[1], operands[4])
> > +      && rtx_equal_p (operands[2], operands[3]))
> > +  && ix86_pre_reload_split ()"
> > +  "#"
> > +  "&& 1"
> > +  [(set (match_dup 0)
> > +       (unspec:MODEF
> > +         [(match_dup 2)
> > +          (match_dup 1)]
> > +        UNSPEC_IEEE_MIN))])
> > +
> >  ;; Make two stack loads independent:
> >  ;;   fld aa              fld aa
> >  ;;   fld %st(0)     ->   fld bb
> > diff --git a/gcc/testsuite/g++.target/i386/pr110170.C b/gcc/testsuite/g++.target/i386/pr110170.C
> > new file mode 100644
> > index 00000000000..5d6842270d0
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.target/i386/pr110170.C
> > @@ -0,0 +1,78 @@
> > +/* { dg-do run } */
> > +/* { dg-options " -O2 -msse4.1 -mfpmath=sse -std=gnu++20" } */
>
> Please either change the first line to:
>
> { dg-do run { target sse4_runtime } }
>
> or add
>
> { dg-require-effective-target sse4_runtime }
>
> to the runtime test.
Assume it's  { dg-do run { target sse4 } } + runtime check for
processor support of sse4.
I've included "sse4_1-check.h" in the testcase and renamed the main to
sse4_1_test to integrate the test into the existing infrastructure.

>
> > +#include <math.h>
> > +
> > +void
> > +__attribute__((noinline))
> > +__cond_swap(double* __x, double* __y) {
> > +  bool __r = (*__x < *__y);
> > +  auto __tmp = __r ? *__x : *__y;
> > +  *__y = __r ? *__y : *__x;
> > +  *__x = __tmp;
> > +}
> > +
> > +auto test1() {
> > +    double nan = -0.0;
> > +    double x = 0.0;
> > +    __cond_swap(&nan, &x);
> > +    return x == -0.0 && nan == 0.0;
> > +}
> > +
> > +auto test1r() {
> > +    double nan = NAN;
> > +    double x = 1.0;
> > +    __cond_swap(&x, &nan);
> > +    return isnan(x) && signbit(x) == 0 && nan == 1.0;
> > +}
> > +
> > +auto test2() {
> > +    double nan = NAN;
> > +    double x = -1.0;
> > +    __cond_swap(&nan, &x);
> > +    return isnan(x) && signbit(x) == 0 && nan == -1.0;
> > +}
> > +
> > +auto test2r() {
> > +    double nan = NAN;
> > +    double x = -1.0;
> > +    __cond_swap(&x, &nan);
> > +    return isnan(x) && signbit(x) == 0 && nan == -1.0;
> > +}
> > +
> > +auto test3() {
> > +    double nan = -NAN;
> > +    double x = 1.0;
> > +    __cond_swap(&nan, &x);
> > +    return isnan(x) && signbit(x) == 1 && nan == 1.0;
> > +}
> > +
> > +auto test3r() {
> > +    double nan = -NAN;
> > +    double x = 1.0;
> > +    __cond_swap(&x, &nan);
> > +    return isnan(x) && signbit(x) == 1 && nan == 1.0;
> > +}
> > +
> > +auto test4() {
> > +    double nan = -NAN;
> > +    double x = -1.0;
> > +    __cond_swap(&nan, &x);
> > +    return isnan(x) && signbit(x) == 1 && nan == -1.0;
> > +}
> > +
> > +auto test4r() {
> > +    double nan = -NAN;
> > +    double x = -1.0;
> > +    __cond_swap(&x, &nan);
> > +    return isnan(x) && signbit(x) == 1 && nan == -1.0;
> > +}
> > +
> > +
> > +int main() {
> > +    if (
> > +        !test1() || !test1r()
> > +        || !test2() || !test2r()
> > +        || !test3() || !test4r()
> > +        || !test4() || !test4r()
> > +    ) __builtin_abort();
> > +}
> > diff --git a/gcc/testsuite/gcc.target/i386/pr110170.c b/gcc/testsuite/gcc.target/i386/pr110170.c
> > new file mode 100644
> > index 00000000000..c72f73398a1
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr110170.c
> > @@ -0,0 +1,21 @@
> > +/* { dg-do compile } */
> > +/* { dg-options " -O2 -msse4.1 -mfpmath=sse" } */
> > +/* { dg-final { scan-assembler-times {(?n)mins[sd]} 2 { target { ! ia32 } } } } */
> > +/* { dg-final { scan-assembler-times {(?n)maxs[sd]} 2 { target { ! ia32 } } } } */
> > +/* Ideally cond_swap_df is also optimized to minsd/maxsd.  */
> > +/* { dg-final { scan-assembler-times {(?n)mins[sd]} 1 { target ia32 } } } */
> > +/* { dg-final { scan-assembler-times {(?n)maxs[sd]} 1 { target ia32 } } } */
> > +
> > +void __cond_swap_df(double* __x, double* __y) {
> > +  _Bool __r = (*__x < *__y);
> > +  double __tmp = __r ? *__x : *__y;
> > +  *__y = __r ? *__y : *__x;
> > +  *__x = __tmp;
> > +}
> > +
> > +void __cond_swap_sf(float* __x, float* __y) {
> > +  _Bool __r = (*__x < *__y);
> > +  float __tmp = __r ? *__x : *__y;
> > +  *__y = __r ? *__y : *__x;
> > +  *__x = __tmp;
> > +}
> > --
> > 2.39.1.388.g2fc9e9ca3c
> >



-- 
BR,
Hongtao

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2023-07-07  6:35 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-07-06  1:18 [PATCH 1/2] [x86] Add pre_reload splitter to detect fp min/max pattern liuhongt
2023-07-06  1:18 ` [PATCH 2/2] Adjust rtx_cost for DF/SFmode AND/IOR/XOR/ANDN operations liuhongt
2023-07-06  5:54   ` Uros Bizjak
2023-07-06  6:19 ` [PATCH 1/2] [x86] Add pre_reload splitter to detect fp min/max pattern Uros Bizjak
2023-07-07  5:29   ` [PATCH V2] " liuhongt
2023-07-07  6:02     ` Uros Bizjak
2023-07-07  6:41       ` Hongtao Liu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).