public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r10-10641] i386: Fix up @xorsign<mode>3_1 [PR102224]
@ 2022-05-10  8:20 Jakub Jelinek
  0 siblings, 0 replies; only message in thread
From: Jakub Jelinek @ 2022-05-10  8:20 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:c7b00fbc469bc9c0a486b48bc349dba13881241e

commit r10-10641-gc7b00fbc469bc9c0a486b48bc349dba13881241e
Author: Jakub Jelinek <jakub@redhat.com>
Date:   Wed Sep 8 11:25:31 2021 +0200

    i386: Fix up @xorsign<mode>3_1 [PR102224]
    
    As the testcase shows, we miscompile @xorsign<mode>3_1 if both input
    operands are in the same register, because the splitter overwrites op1
    before with op1 & mask before using op0.
    
    For dest = xorsign op0, op0 we can actually simplify it from
    dest = (op0 & mask) ^ op0 to dest = op0 & ~mask (aka abs).
    
    The expander change is an optimization improvement, if we at expansion
    time know it is xorsign op0, op0, we can emit abs right away and get better
    code through that.
    
    The @xorsign<mode>3_1 is a fix for the case where xorsign wouldn't be known
    to have same operands during expansion, but during RTL optimizations they
    would appear.  We need to use earlyclobber, we require dest and op1 to be
    the same but op0 must be different because we overwrite
    op1 first.
    
    2021-09-08  Jakub Jelinek  <jakub@redhat.com>
    
            PR target/102224
            * config/i386/i386.md (xorsign<mode>3): If operands[1] is equal to
            operands[2], emit abs<mode>2 instead.
            (@xorsign<mode>3_1): Add early-clobber for output operand.
    
            * gcc.dg/pr102224.c: New test.
            * gcc.target/i386/avx-pr102224.c: New test.
    
    (cherry picked from commit a7b626d98a9a821ffb33466818d6aa86cac1d6fd)

Diff:
---
 gcc/config/i386/i386.md                      | 10 ++++--
 gcc/testsuite/gcc.dg/pr102224.c              | 49 ++++++++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/avx-pr102224.c | 23 +++++++++++++
 3 files changed, 80 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 670ff1d3682..d784238ced1 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -10194,10 +10194,16 @@
    (match_operand:MODEF 1 "register_operand")
    (match_operand:MODEF 2 "register_operand")]
   "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
-  "ix86_expand_xorsign (operands); DONE;")
+{
+  if (rtx_equal_p (operands[1], operands[2]))
+    emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
+  else
+    ix86_expand_xorsign (operands);
+  DONE;
+})
 
 (define_insn_and_split "@xorsign<mode>3_1"
-  [(set (match_operand:MODEF 0 "register_operand" "=Yv")
+  [(set (match_operand:MODEF 0 "register_operand" "=&Yv")
 	(unspec:MODEF
 	  [(match_operand:MODEF 1 "register_operand" "Yv")
 	   (match_operand:MODEF 2 "register_operand" "0")
diff --git a/gcc/testsuite/gcc.dg/pr102224.c b/gcc/testsuite/gcc.dg/pr102224.c
new file mode 100644
index 00000000000..9f09ba5ccbb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr102224.c
@@ -0,0 +1,49 @@
+/* PR target/102224 */
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+__attribute__((noipa)) float
+foo (float x)
+{
+  return x * __builtin_copysignf (1.0f, x);
+}
+
+__attribute__((noipa)) float
+bar (float x, float y)
+{
+  return x * __builtin_copysignf (1.0f, y);
+}
+
+__attribute__((noipa)) float
+baz (float z, float x)
+{
+  return x * __builtin_copysignf (1.0f, x);
+}
+
+__attribute__((noipa)) float
+qux (float z, float x, float y)
+{
+  return x * __builtin_copysignf (1.0f, y);
+}
+
+int
+main ()
+{
+  if (foo (1.0f) != 1.0f
+      || foo (-4.0f) != 4.0f)
+    __builtin_abort ();
+  if (bar (1.25f, 7.25f) != 1.25f
+      || bar (1.75f, -3.25f) != -1.75f
+      || bar (-2.25f, 7.5f) != -2.25f
+      || bar (-3.0f, -4.0f) != 3.0f)
+    __builtin_abort ();
+  if (baz (5.5f, 1.0f) != 1.0f
+      || baz (4.25f, -4.0f) != 4.0f)
+    __builtin_abort ();
+  if (qux (1.0f, 1.25f, 7.25f) != 1.25f
+      || qux (2.0f, 1.75f, -3.25f) != -1.75f
+      || qux (3.0f, -2.25f, 7.5f) != -2.25f
+      || qux (4.0f, -3.0f, -4.0f) != 3.0f)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx-pr102224.c b/gcc/testsuite/gcc.target/i386/avx-pr102224.c
new file mode 100644
index 00000000000..be6b88c05db
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx-pr102224.c
@@ -0,0 +1,23 @@
+/* PR tree-optimization/51581 */
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx" } */
+/* { dg-require-effective-target avx } */
+
+#ifndef CHECK_H
+#define CHECK_H "avx-check.h"
+#endif
+#ifndef TEST
+#define TEST avx_test
+#endif
+
+#define main main1
+#include "../../gcc.dg/pr102224.c"
+#undef main
+
+#include CHECK_H
+
+static void
+TEST (void)
+{
+  main1 ();
+}


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-05-10  8:20 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-10  8:20 [gcc r10-10641] i386: Fix up @xorsign<mode>3_1 [PR102224] Jakub Jelinek

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).