public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] tree-optimization/88540 - FP x > y ? x : y if-conversion without -ffast-math
@ 2023-07-18 14:52 Richard Biener
  0 siblings, 0 replies; only message in thread
From: Richard Biener @ 2023-07-18 14:52 UTC (permalink / raw)
  To: gcc-patches

The following makes sure that FP x > y ? x : y style max/min operations
are if-converted at the GIMPLE level.  While we can neither match
it to MAX_EXPR nor .FMAX as both have different semantics with IEEE
than the ternary ?: operation we can make sure to maintain this form
as a COND_EXPR so backends have the chance to match this to instructions
their ISA offers.

The patch does this in phiopt where we recognize min/max and instead
of giving up when we have to honor NaNs we alter the generated code
to a COND_EXPR.

This resolves PR88540 and we can then SLP vectorize the min operation
for its testcase.  It also resolves part of the regressions observed
with the change matching bit-inserts of bit-field-refs to vec_perm.

Expansion from a COND_EXPR rather than from compare-and-branch
gcc.target/i386/pr54855-9.c by producing extra moves while the
corresponding min/max operations are now already synthesized by
RTL expansion, register selection isn't optimal.  This can be also
provoked without this change by altering the operand order in the source.
I have XFAILed that part of the test.

Bootstrapped and tested on x86_64-unknown-linux-gnu ontop of the
patch fixing if-converted RTL expansion when constants are involved.

Comments welcome but I plan to push this once that dependency is acked.

Thanks,
Richard.

	PR tree-optimization/88540
	* tree-ssa-phiopt.cc (minmax_replacement): Do not give up
	with NaNs but handle the simple case by if-converting to a
	COND_EXPR.

	* gcc.target/i386/pr88540.c: New testcase.
	* gcc.target/i386/pr54855-9.c: XFAIL check for redundant moves.
	* gcc.target/i386/pr54855-12.c: Adjust.
	* gcc.target/i386/pr54855-13.c: Likewise.
	* gcc.dg/tree-ssa/split-path-12.c: Likewise.
---
 gcc/testsuite/gcc.dg/tree-ssa/split-path-12.c |  4 +++-
 gcc/testsuite/gcc.target/i386/pr54855-12.c    |  2 +-
 gcc/testsuite/gcc.target/i386/pr54855-13.c    |  2 +-
 gcc/testsuite/gcc.target/i386/pr54855-9.c     |  4 ++--
 gcc/testsuite/gcc.target/i386/pr88540.c       | 10 +++++++++
 gcc/tree-ssa-phiopt.cc                        | 21 ++++++++++++++-----
 6 files changed, 33 insertions(+), 10 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr88540.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/split-path-12.c b/gcc/testsuite/gcc.dg/tree-ssa/split-path-12.c
index 19a130d9bf1..da00f795ef0 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/split-path-12.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/split-path-12.c
@@ -16,4 +16,6 @@ foo(double *d1, double *d2, double *d3, int num, double *ip)
   return dmax[0] + dmax[1] + dmax[2];
 }
 
-/* { dg-final { scan-tree-dump "appears to be optimized to a join point for if-convertable half-diamond" "split-paths" } } */
+/* Split-paths shouldn't do anything here, if there's a diamond it would
+   be if-convertible.  */
+/* { dg-final { scan-tree-dump-not "Duplicating join block" "split-paths" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr54855-12.c b/gcc/testsuite/gcc.target/i386/pr54855-12.c
index 2f8af392c83..09e8ab8ae39 100644
--- a/gcc/testsuite/gcc.target/i386/pr54855-12.c
+++ b/gcc/testsuite/gcc.target/i386/pr54855-12.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -mavx512fp16" } */
-/* { dg-final { scan-assembler-times "vmaxsh\[ \\t\]" 1 } } */
+/* { dg-final { scan-assembler-times "vm\[ai\]\[nx\]sh\[ \\t\]" 1 } } */
 /* { dg-final { scan-assembler-not "vcomish\[ \\t\]" } } */
 /* { dg-final { scan-assembler-not "vmovsh\[ \\t\]" { target { ! ia32 } } } } */
 
diff --git a/gcc/testsuite/gcc.target/i386/pr54855-13.c b/gcc/testsuite/gcc.target/i386/pr54855-13.c
index 87b4f459a5a..a4f25066f81 100644
--- a/gcc/testsuite/gcc.target/i386/pr54855-13.c
+++ b/gcc/testsuite/gcc.target/i386/pr54855-13.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -mavx512fp16" } */
-/* { dg-final { scan-assembler-times "vmaxsh\[ \\t\]" 1 } } */
+/* { dg-final { scan-assembler-times "vm\[ai\]\[nx\]sh\[ \\t\]" 1 } } */
 /* { dg-final { scan-assembler-not "vcomish\[ \\t\]" } } */
 /* { dg-final { scan-assembler-not "vmovsh\[ \\t\]" { target { ! ia32 } } } } */
 
diff --git a/gcc/testsuite/gcc.target/i386/pr54855-9.c b/gcc/testsuite/gcc.target/i386/pr54855-9.c
index 40add5f6763..fe9302e5077 100644
--- a/gcc/testsuite/gcc.target/i386/pr54855-9.c
+++ b/gcc/testsuite/gcc.target/i386/pr54855-9.c
@@ -1,8 +1,8 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -msse2 -mfpmath=sse" } */
 /* { dg-final { scan-assembler-times "minss" 1 } } */
-/* { dg-final { scan-assembler-not "movaps" } } */
-/* { dg-final { scan-assembler-not "movss" } } */
+/* { dg-final { scan-assembler-not "movaps" { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-not "movss" { xfail *-*-* } } } */
 
 typedef float vec __attribute__((vector_size(16)));
 
diff --git a/gcc/testsuite/gcc.target/i386/pr88540.c b/gcc/testsuite/gcc.target/i386/pr88540.c
new file mode 100644
index 00000000000..b927d0c57d5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr88540.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+
+void test(double* __restrict d1, double* __restrict d2, double* __restrict d3)
+{
+  for (int n = 0; n < 2; ++n)
+    d3[n] = d1[n] < d2[n] ? d1[n] : d2[n];
+}
+
+/* { dg-final { scan-assembler "minpd" } } */
diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc
index 467c9fd108a..13ee486831d 100644
--- a/gcc/tree-ssa-phiopt.cc
+++ b/gcc/tree-ssa-phiopt.cc
@@ -1580,10 +1580,6 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb, basic_block alt_
 
   tree type = TREE_TYPE (PHI_RESULT (phi));
 
-  /* The optimization may be unsafe due to NaNs.  */
-  if (HONOR_NANS (type) || HONOR_SIGNED_ZEROS (type))
-    return false;
-
   gcond *cond = as_a <gcond *> (*gsi_last_bb (cond_bb));
   enum tree_code cmp = gimple_cond_code (cond);
   tree rhs = gimple_cond_rhs (cond);
@@ -1770,6 +1766,9 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb, basic_block alt_
       else
 	return false;
     }
+  else if (HONOR_NANS (type) || HONOR_SIGNED_ZEROS (type))
+    /* The optimization may be unsafe due to NaNs.  */
+    return false;
   else if (middle_bb != alt_middle_bb && threeway_p)
     {
       /* Recognize the following case:
@@ -2103,7 +2102,19 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb, basic_block alt_
   /* Emit the statement to compute min/max.  */
   gimple_seq stmts = NULL;
   tree phi_result = PHI_RESULT (phi);
-  result = gimple_build (&stmts, minmax, TREE_TYPE (phi_result), arg0, arg1);
+
+  /* When we can't use a MIN/MAX_EXPR still make sure the expression
+     stays in a form to be recognized by ISA that map to IEEE x > y ? x : y
+     semantics (that's not IEEE max semantics).  */
+  if (HONOR_NANS (type) || HONOR_SIGNED_ZEROS (type))
+    {
+      result = gimple_build (&stmts, cmp, boolean_type_node,
+			     gimple_cond_lhs (cond), rhs);
+      result = gimple_build (&stmts, COND_EXPR, TREE_TYPE (phi_result),
+			     result, arg_true, arg_false);
+    }
+  else
+    result = gimple_build (&stmts, minmax, TREE_TYPE (phi_result), arg0, arg1);
 
   gsi = gsi_last_bb (cond_bb);
   gsi_insert_seq_before (&gsi, stmts, GSI_NEW_STMT);
-- 
2.35.3

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-07-18 14:52 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-07-18 14:52 [PATCH] tree-optimization/88540 - FP x > y ? x : y if-conversion without -ffast-math Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).