[PATCH][committed] aarch64: PR target/99195 annotate simple saturating add/sub patterns for vec-concat-zero

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH][committed] aarch64: PR target/99195 annotate simple saturating add/sub patterns for vec-concat-zero
@ 2023-05-10 10:51 Kyrylo Tkachov
  0 siblings, 0 replies; only message in thread
From: Kyrylo Tkachov @ 2023-05-10 10:51 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 663 bytes --]

Hi all,

Moving onto the saturating instructions, this one goes through the simple add/sub ones.
Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf.
Pushing to trunk.
Thanks,
Kyrill

gcc/ChangeLog:

	PR target/99195
	* config/aarch64/aarch64-simd.md (aarch64_<su_optab>q<addsub><mode>):
	Rename to...
	(aarch64_<su_optab>q<addsub><mode><vczle><vczbe>): ... This.
	(aarch64_<sur>qadd<mode>): Rename to...
	(aarch64_<sur>qadd<mode><vczle><vczbe>): ... This.

gcc/testsuite/ChangeLog:

	PR target/99195
	* gcc.target/aarch64/simd/pr99195_1.c: Add testing for qadd, qsub.
	* gcc.target/aarch64/simd/pr99195_6.c: New test.

[-- Attachment #2: qaddsub.patch --]
[-- Type: application/octet-stream, Size: 5261 bytes --]

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 2bd019a19f3ef9bb258ec34da5671d9f5a468114..1fc20a278ac23a8e911b3a3af40927c87078ee1f 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -5270,7 +5270,7 @@ (define_insn "*aarch64_vgetfmulx<mode>"
 )
 ;; <su>q<addsub>
 
-(define_insn "aarch64_<su_optab>q<addsub><mode>"
+(define_insn "aarch64_<su_optab>q<addsub><mode><vczle><vczbe>"
   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
 	(BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
 			(match_operand:VSDQ_I 2 "register_operand" "w")))]
@@ -5281,7 +5281,7 @@ (define_insn "aarch64_<su_optab>q<addsub><mode>"
 
 ;; suqadd and usqadd
 
-(define_insn "aarch64_<sur>qadd<mode>"
+(define_insn "aarch64_<sur>qadd<mode><vczle><vczbe>"
   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
 	(unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
 			(match_operand:VSDQ_I 2 "register_operand" "w")]
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c
index 5801598d429373d638d9294df85848c791fec30b..4e6b3412749c469cc759744f849b228c511fada8 100644
--- a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c
@@ -57,17 +57,17 @@ OPSIX (T, IS, OS, S, OP6, OP7, OP8, OP9, OP10, OP11)
 OPSEVEN (T, IS, OS, S, OP1, OP2, OP3, OP4, OP5, OP6, OP7)                \
 OPSEVEN (T, IS, OS, S, OP8, OP9, OP10, OP11, OP12, OP13, OP14)
 
-#define OPSEVENTEEN(T,IS,OS,S,OP1,OP2,OP3,OP4,OP5,OP6,OP7,OP8,OP9,OP10,OP11,OP12,OP13,OP14,OP15,OP16,OP17)        \
-OPSEVEN (T, IS, OS, S, OP1, OP2, OP3, OP4, OP5, OP6, OP7)                \
-OPTEN (T, IS, OS, S, OP8, OP9, OP10, OP11, OP12, OP13, OP14, OP15, OP16, OP17)
+#define OPNINETEEN(T,IS,OS,S,OP1,OP2,OP3,OP4,OP5,OP6,OP7,OP8,OP9,OP10,OP11,OP12,OP13,OP14,OP15,OP16,OP17,OP18,OP19)        \
+OPEIGHT (T, IS, OS, S, OP1, OP2, OP3, OP4, OP5, OP6, OP7, OP8)                \
+OPELEVEN (T, IS, OS, S, OP9, OP10, OP11, OP12, OP13, OP14, OP15, OP16, OP17, OP18, OP19)
 
-OPSEVENTEEN (int8, 8, 16, s8, padd, add, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
-OPSEVENTEEN (int16, 4, 8, s16, padd, add, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
-OPSEVENTEEN (int32, 2, 4, s32, padd, add, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
+OPNINETEEN (int8, 8, 16, s8, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
+OPNINETEEN (int16, 4, 8, s16, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
+OPNINETEEN (int32, 2, 4, s32, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
 
-OPSEVENTEEN (uint8, 8, 16, u8, padd, add, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
-OPSEVENTEEN (uint16, 4, 8, u16, padd, add, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
-OPSEVENTEEN (uint32, 2, 4, u32, padd, add, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
+OPNINETEEN (uint8, 8, 16, u8, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
+OPNINETEEN (uint16, 4, 8, u16, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
+OPNINETEEN (uint32, 2, 4, u32, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
 
 OPFOURTEEN (float32, 2, 4, f32, add, padd, sub, mul, div, max, maxnm, min, minnm, abd, pmax, pmin, pmaxnm, pminnm)
 
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_6.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_6.c
new file mode 100644
index 0000000000000000000000000000000000000000..52ad2709400a93db803edea2d2f9656ee5dd470a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_6.c
@@ -0,0 +1,30 @@
+/* PR target/99195.  */
+/*  Check that we take advantage of 64-bit Advanced SIMD operations clearing
+    the top half of the vector register and no explicit zeroing instructions
+    are emitted.  */
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+
+#include <arm_neon.h>
+
+#define MYOP(OT,IT1,IT2,OP,OS)                         \
+OT                                              \
+foo_##OP##_##OS (IT1 a, IT2 b)                     \
+{                                               \
+  IT1 zeros = vcreate_##OS (0);                   \
+  return vcombine_##OS (v##OP##_##OS (a, b), zeros);      \
+}
+
+MYOP (int8x16_t, int8x8_t, uint8x8_t, uqadd, s8)
+MYOP (int16x8_t, int16x4_t, uint16x4_t, uqadd, s16)
+MYOP (int32x4_t, int32x2_t, uint32x2_t, uqadd, s32)
+MYOP (int64x2_t, int64x1_t, uint64x1_t, uqadd, s64)
+
+MYOP (uint8x16_t, uint8x8_t, int8x8_t, sqadd, u8)
+MYOP (uint16x8_t, uint16x4_t, int16x4_t, sqadd, u16)
+MYOP (uint32x4_t, uint32x2_t, int32x2_t, sqadd, u32)
+MYOP (uint64x2_t, uint64x1_t, int64x1_t, sqadd, u64)
+
+/* { dg-final { scan-assembler-not {\tfmov\t} } }  */
+/* { dg-final { scan-assembler-not {\tmov\t} } }  */
+

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-05-10 10:51 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-10 10:51 [PATCH][committed] aarch64: PR target/99195 annotate simple saturating add/sub patterns for vec-concat-zero Kyrylo Tkachov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).