public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [pushed] aarch64: Fix bogus cnot optimisation [PR114603]
@ 2024-04-05 13:51 Richard Sandiford
  2024-04-06 12:54 ` Richard Biener
  0 siblings, 1 reply; 3+ messages in thread
From: Richard Sandiford @ 2024-04-05 13:51 UTC (permalink / raw)
  To: gcc-patches

aarch64-sve.md had a pattern that combined:

	cmpeq	pb.T, pa/z, zc.T, #0
	mov	zd.T, pb/z, #1

into:

	cnot	zd.T, pa/m, zc.T

But this is only valid if pa.T is a ptrue.  In other cases, the
original would set inactive elements of zd.T to 0, whereas the
combined form would copy elements from zc.T.

This isn't a regression on a known testcase.  However, it's a nasty
wrong code bug that could conceivably trigger for autovec code (although
I've not been able to construct a reproducer so far).  That fix is also
quite localised to the buggy operation.  I'd therefore prefer to push
the fix now rather than wait for GCC 15.

Tested on aarch64-linux-gnu & pushed.  I'll backport to branches if
there is no fallout.

Richard

gcc/
	PR target/114603
	* config/aarch64/aarch64-sve.md (@aarch64_pred_cnot<mode>): Replace
	with...
	(@aarch64_ptrue_cnot<mode>): ...this, requiring operand 1 to be
	a ptrue.
	(*cnot<mode>): Require operand 1 to be a ptrue.
	* config/aarch64/aarch64-sve-builtins-base.cc (svcnot_impl::expand):
	Use aarch64_ptrue_cnot<mode> for _x operations that are predicated
	with a ptrue.  Represent other _x operations as fully-defined _m
	operations.

gcc/testsuite/
	PR target/114603
	* gcc.target/aarch64/sve/acle/general/cnot_1.c: New test.
---
 .../aarch64/aarch64-sve-builtins-base.cc      | 25 ++++++++++++-------
 gcc/config/aarch64/aarch64-sve.md             | 22 ++++++++--------
 .../aarch64/sve/acle/general/cnot_1.c         | 23 +++++++++++++++++
 3 files changed, 50 insertions(+), 20 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general/cnot_1.c

diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index 257ca5bf6ad..5be2315a3c6 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -517,15 +517,22 @@ public:
   expand (function_expander &e) const override
   {
     machine_mode mode = e.vector_mode (0);
-    if (e.pred == PRED_x)
-      {
-	/* The pattern for CNOT includes an UNSPEC_PRED_Z, so needs
-	   a ptrue hint.  */
-	e.add_ptrue_hint (0, e.gp_mode (0));
-	return e.use_pred_x_insn (code_for_aarch64_pred_cnot (mode));
-      }
-
-    return e.use_cond_insn (code_for_cond_cnot (mode), 0);
+    machine_mode pred_mode = e.gp_mode (0);
+    /* The underlying _x pattern is effectively:
+
+	 dst = src == 0 ? 1 : 0
+
+       rather than an UNSPEC_PRED_X.  Using this form allows autovec
+       constructs to be matched by combine, but it means that the
+       predicate on the src == 0 comparison must be all-true.
+
+       For simplicity, represent other _x operations as fully-defined _m
+       operations rather than using a separate bespoke pattern.  */
+    if (e.pred == PRED_x
+	&& gen_lowpart (pred_mode, e.args[0]) == CONSTM1_RTX (pred_mode))
+      return e.use_pred_x_insn (code_for_aarch64_ptrue_cnot (mode));
+    return e.use_cond_insn (code_for_cond_cnot (mode),
+			    e.pred == PRED_x ? 1 : 0);
   }
 };
 
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index eca8623e587..0434358122d 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -3363,24 +3363,24 @@ (define_insn_and_split "trunc<SVE_HSDI:mode><SVE_PARTIAL_I:mode>2"
 ;; - CNOT
 ;; -------------------------------------------------------------------------
 
-;; Predicated logical inverse.
-(define_expand "@aarch64_pred_cnot<mode>"
+;; Logical inverse, predicated with a ptrue.
+(define_expand "@aarch64_ptrue_cnot<mode>"
   [(set (match_operand:SVE_FULL_I 0 "register_operand")
 	(unspec:SVE_FULL_I
 	  [(unspec:<VPRED>
 	     [(match_operand:<VPRED> 1 "register_operand")
-	      (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+	      (const_int SVE_KNOWN_PTRUE)
 	      (eq:<VPRED>
-		(match_operand:SVE_FULL_I 3 "register_operand")
-		(match_dup 4))]
+		(match_operand:SVE_FULL_I 2 "register_operand")
+		(match_dup 3))]
 	     UNSPEC_PRED_Z)
-	   (match_dup 5)
-	   (match_dup 4)]
+	   (match_dup 4)
+	   (match_dup 3)]
 	  UNSPEC_SEL))]
   "TARGET_SVE"
   {
-    operands[4] = CONST0_RTX (<MODE>mode);
-    operands[5] = CONST1_RTX (<MODE>mode);
+    operands[3] = CONST0_RTX (<MODE>mode);
+    operands[4] = CONST1_RTX (<MODE>mode);
   }
 )
 
@@ -3389,7 +3389,7 @@ (define_insn "*cnot<mode>"
 	(unspec:SVE_I
 	  [(unspec:<VPRED>
 	     [(match_operand:<VPRED> 1 "register_operand")
-	      (match_operand:SI 5 "aarch64_sve_ptrue_flag")
+	      (const_int SVE_KNOWN_PTRUE)
 	      (eq:<VPRED>
 		(match_operand:SVE_I 2 "register_operand")
 		(match_operand:SVE_I 3 "aarch64_simd_imm_zero"))]
@@ -11001,4 +11001,4 @@ (define_insn "@aarch64_sve_set_neonq_<mode>"
                                   GET_MODE (operands[2]));
     return "sel\t%0.<Vetype>, %3, %2.<Vetype>, %1.<Vetype>";
   }
-)
\ No newline at end of file
+)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cnot_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cnot_1.c
new file mode 100644
index 00000000000..b1a489f0cf0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cnot_1.c
@@ -0,0 +1,23 @@
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <arm_sve.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+** foo:
+**	cmpeq	(p[0-7])\.s, p0/z, z0\.s, #0
+**	mov	z0\.s, \1/z, #1
+**	ret
+*/
+svint32_t foo(svbool_t pg, svint32_t y)
+{
+  return svsel(svcmpeq(pg, y, 0), svdup_s32(1), svdup_s32(0));
+}
+
+#ifdef __cplusplus
+}
+#endif
-- 
2.25.1


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2024-04-08  9:45 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-04-05 13:51 [pushed] aarch64: Fix bogus cnot optimisation [PR114603] Richard Sandiford
2024-04-06 12:54 ` Richard Biener
2024-04-08  9:45   ` Richard Sandiford

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).