public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH][committed] aarch64: PR target/99195 annotate vector compare patterns for vec-concat-zero
@ 2023-05-15  8:55 Kyrylo Tkachov
  0 siblings, 0 replies; only message in thread
From: Kyrylo Tkachov @ 2023-05-15  8:55 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 1258 bytes --]

Hi all,

This instalment of the series goes through the vector comparison patterns in the backend.
One wart are the int64x1_t comparisons that this patch doesn't touch.
Those are a bit trickier because they have define_insn_and_split mechanisms for falling back to
GP reg comparisons after reload and I don't think a simple annotation will catch those cases correctly.
Those will need more custom thinking.
As said, this patch doesn't touch those and is a decent straightforward improvement on its own.

Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf.
Pushing to trunk.

Thanks,
Kyrill

gcc/ChangeLog:

	PR target/99195
	* config/aarch64/aarch64-simd.md (aarch64_cm<optab><mode>): Rename to...
	(aarch64_cm<optab><mode><vczle><vczbe>): ... This.
	(aarch64_cmtst<mode>): Rename to...
	(aarch64_cmtst<mode><vczle><vczbe>): ... This.
	(*aarch64_cmtst_same_<mode>): Rename to...
	(*aarch64_cmtst_same_<mode><vczle><vczbe>): ... This.
	(*aarch64_cmtstdi): Rename to...
	(*aarch64_cmtstdi<vczle><vczbe>): ... This.
	(aarch64_fac<optab><mode>): Rename to...
	(aarch64_fac<optab><mode><vczle><vczbe>): ... This.

gcc/testsuite/ChangeLog:

	PR target/99195
	* gcc.target/aarch64/simd/pr99195_7.c: New test.

[-- Attachment #2: vcmpc.patch --]
[-- Type: application/octet-stream, Size: 5526 bytes --]

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 53d97c1cbcfe81b87404f8fd7636538ed38ecf12..73dc97856d7a018ad267edcbc4be765ec23f898c 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -6641,7 +6641,7 @@ (define_expand "aarch64_<sur>q<r>shr<u>n2_n<mode>"
 ;; Note, we have constraints for Dz and Z as different expanders
 ;; have different ideas of what should be passed to this pattern.
 
-(define_insn "aarch64_cm<optab><mode>"
+(define_insn "aarch64_cm<optab><mode><vczle><vczbe>"
   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
 	(neg:<V_INT_EQUIV>
 	  (COMPARISONS:<V_INT_EQUIV>
@@ -6706,7 +6706,7 @@ (define_insn "*aarch64_cm<optab>di"
 
 ;; cm(hs|hi)
 
-(define_insn "aarch64_cm<optab><mode>"
+(define_insn "aarch64_cm<optab><mode><vczle><vczbe>"
   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
 	(neg:<V_INT_EQUIV>
 	  (UCOMPARISONS:<V_INT_EQUIV>
@@ -6773,7 +6773,7 @@ (define_insn "*aarch64_cm<optab>di"
 ;; which is rewritten by simplify_rtx as
 ;; plus (eq (and x y) 0) -1.
 
-(define_insn "aarch64_cmtst<mode>"
+(define_insn "aarch64_cmtst<mode><vczle><vczbe>"
   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
 	(plus:<V_INT_EQUIV>
 	  (eq:<V_INT_EQUIV>
@@ -6792,7 +6792,7 @@ (define_insn "aarch64_cmtst<mode>"
 ;; not (neq (eq x 0)) in which case you rewrite it to
 ;; a comparison against itself
 
-(define_insn "*aarch64_cmtst_same_<mode>"
+(define_insn "*aarch64_cmtst_same_<mode><vczle><vczbe>"
   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
 	(plus:<V_INT_EQUIV>
 	  (eq:<V_INT_EQUIV>
@@ -6843,7 +6843,7 @@ (define_insn_and_split "aarch64_cmtstdi"
   [(set_attr "type" "neon_tst,multiple")]
 )
 
-(define_insn "*aarch64_cmtstdi"
+(define_insn "*aarch64_cmtstdi<vczle><vczbe>"
   [(set (match_operand:DI 0 "register_operand" "=w")
 	(neg:DI
 	  (ne:DI
@@ -6858,7 +6858,7 @@ (define_insn "*aarch64_cmtstdi"
 
 ;; fcm(eq|ge|gt|le|lt)
 
-(define_insn "aarch64_cm<optab><mode>"
+(define_insn "aarch64_cm<optab><mode><vczle><vczbe>"
   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
 	(neg:<V_INT_EQUIV>
 	  (COMPARISONS:<V_INT_EQUIV>
@@ -6876,7 +6876,7 @@ (define_insn "aarch64_cm<optab><mode>"
 ;; Note we can also handle what would be fac(le|lt) by
 ;; generating fac(ge|gt).
 
-(define_insn "aarch64_fac<optab><mode>"
+(define_insn "aarch64_fac<optab><mode><vczle><vczbe>"
   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
 	(neg:<V_INT_EQUIV>
 	  (FAC_COMPARISONS:<V_INT_EQUIV>
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_7.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_7.c
new file mode 100644
index 0000000000000000000000000000000000000000..86bd729e2a77a970ca2f5887e9dd7c6fe1f35cd1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_7.c
@@ -0,0 +1,96 @@
+/* PR target/99195.  */
+/*  Check that we take advantage of 64-bit Advanced SIMD operations clearing
+    the top half of the vector register and no explicit zeroing instructions
+    are emitted.  */
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+
+#include <arm_neon.h>
+
+#define MYOP(OT,IT,IMT,OP,IS,OS)			\
+OT							\
+foo_##OP##_##OS##_##IT##_##IS (IT a, IT b)		\
+{							\
+  IMT zeros = vcreate_##OS (0);				\
+  return vcombine_##OS (v##OP##_##IS (a, b), zeros);	\
+}
+
+#define FUNC(OT,IT,IMT,IS,OS)		\
+MYOP (OT, IT, IMT, ceq, IS, OS)		\
+MYOP (OT, IT, IMT, clt, IS, OS)		\
+MYOP (OT, IT, IMT, cge, IS, OS)		\
+MYOP (OT, IT, IMT, cle, IS, OS)		\
+MYOP (OT, IT, IMT, cgt, IS, OS)		\
+MYOP (OT, IT, IMT, tst, IS, OS)
+
+#define MYFUNC(PFX, T, S, N, DN)		\
+FUNC (uint##S##x##DN##_t, T##S##x##N##_t, uint##S##x##N##_t, PFX##S, u##S)
+
+MYFUNC (s, int, 8, 8, 16)
+MYFUNC (s, int, 16, 4, 8)
+MYFUNC (s, int, 32, 2, 4)
+MYFUNC (u, uint, 8, 8, 16)
+MYFUNC (u, uint, 16, 4, 8)
+MYFUNC (u, uint, 32, 2, 4)
+
+#undef FUNC
+#define FUNC(OT,IT,IMT,IS,OS)		\
+MYOP (OT, IT, IMT, ceq, IS, OS)		\
+MYOP (OT, IT, IMT, clt, IS, OS)		\
+MYOP (OT, IT, IMT, cge, IS, OS)		\
+MYOP (OT, IT, IMT, cle, IS, OS)		\
+MYOP (OT, IT, IMT, cgt, IS, OS)
+
+
+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.2-a+fp16")
+MYFUNC (f, float, 16, 4, 8)
+#pragma GCC pop_options
+MYFUNC (f, float, 32, 2, 4)
+MYFUNC (f, float, 64, 1, 2)
+
+#undef FUNC
+#define FUNC(OT,IT,IMT,IS,OS)			\
+MYOP (OT, IT, IMT, cale, IS, OS)		\
+MYOP (OT, IT, IMT, cagt, IS, OS)		\
+MYOP (OT, IT, IMT, calt, IS, OS)		\
+MYOP (OT, IT, IMT, cage, IS, OS)		\
+
+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.2-a+fp16")
+MYFUNC (f, float, 16, 4, 8)
+#pragma GCC pop_options
+MYFUNC (f, float, 32, 2, 4)
+MYFUNC (f, float, 64, 1, 2)
+
+#undef MYOP
+#define MYOP(OT,IT,IMT,OP,IS,OS)			\
+OT							\
+foo_##OP##_##OS##_##IT##_z (IT a)			\
+{							\
+  IMT zeros = vcreate_##OS (0);				\
+  return vcombine_##OS (v##OP##_##IS (a), zeros);	\
+}
+
+#undef FUNC
+#define FUNC(OT,IT,IMT,IS,OS)			\
+MYOP (OT, IT, IMT, cltz, IS, OS)		\
+MYOP (OT, IT, IMT, ceqz, IS, OS)		\
+MYOP (OT, IT, IMT, cgez, IS, OS)		\
+MYOP (OT, IT, IMT, cgtz, IS, OS)		\
+MYOP (OT, IT, IMT, clez, IS, OS)		\
+
+MYFUNC (s, int, 8, 8, 16)
+MYFUNC (s, int, 16, 4, 8)
+MYFUNC (s, int, 32, 2, 4)
+
+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.2-a+fp16")
+MYFUNC (f, float, 16, 4, 8)
+#pragma GCC pop_options
+MYFUNC (f, float, 32, 2, 4)
+MYFUNC (f, float, 64, 1, 2)
+
+/* { dg-final { scan-assembler-not {\tfmov\t} } }  */
+/* { dg-final { scan-assembler-not {\tmov\t} } }  */
+

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-05-15  8:55 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-15  8:55 [PATCH][committed] aarch64: PR target/99195 annotate vector compare patterns for vec-concat-zero Kyrylo Tkachov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).