[PATCH][committed] aarch64: PR target/99195 annotate more simple binary ops for vec-concat with zero

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH][committed] aarch64: PR target/99195 annotate more simple binary ops for vec-concat with zero
@ 2023-05-04  8:45 Kyrylo Tkachov
  0 siblings, 0 replies; only message in thread
From: Kyrylo Tkachov @ 2023-05-04  8:45 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 916 bytes --]

Hi all,

More pattern annotations and tests to eliminate redundant vec-concat with zero instructions.
These are for the abd family of instructions and the pairwise floating-point max/min and fadd
operations too.

Bootstrapped and tested on aarch64-none-linux-gnu.
Pushing to trunk.
Thanks,
Kyrill

gcc/ChangeLog:

	PR target/99195
	* config/aarch64/aarch64-simd.md (aarch64_<su>abd<mode>): Rename to...
	(aarch64_<su>abd<mode><vczle><vczbe>): ... This.
	(fabd<mode>3): Rename to...
	(fabd<mode>3<vczle><vczbe>): ... This.
	(aarch64_<optab>p<mode>): Rename to...
	(aarch64_<optab>p<mode><vczle><vczbe>): ... This.
	(aarch64_faddp<mode>): Rename to...
	(aarch64_faddp<mode><vczle><vczbe>): ... This.

gcc/testsuite/ChangeLog:

	PR target/99195
	* gcc.target/aarch64/simd/pr99195_1.c: Add testing for more binary ops.
	* gcc.target/aarch64/simd/pr99195_2.c: Add testing for more binary ops.

[-- Attachment #2: vbin.patch --]
[-- Type: application/octet-stream, Size: 5614 bytes --]

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index dafff637c00a9a03aa8188d8202af213c3e80f72..b19c700fc2fc46ce2f9af26777dbdcc46102befb 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -890,7 +890,7 @@ (define_insn "aarch64_abs<mode><vczle><vczbe>"
 ;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64.
 ;; Whereas SABD would return 192 (-64 signed) on the above example.
 ;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead.
-(define_insn "aarch64_<su>abd<mode>"
+(define_insn "aarch64_<su>abd<mode><vczle><vczbe>"
   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 	(minus:VDQ_BHSI
 	  (USMAX:VDQ_BHSI
@@ -1087,7 +1087,7 @@ (define_insn "aarch64_<su>aba<mode>"
   [(set_attr "type" "neon_arith_acc<q>")]
 )
 
-(define_insn "fabd<mode>3"
+(define_insn "fabd<mode>3<vczle><vczbe>"
   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 	(abs:VHSDF_HSDF
 	  (minus:VHSDF_HSDF
@@ -1740,7 +1740,7 @@ (define_expand "<su><maxmin>v2di3"
 })
 
 ;; Pairwise Integer Max/Min operations.
-(define_insn "aarch64_<optab>p<mode>"
+(define_insn "aarch64_<optab>p<mode><vczle><vczbe>"
  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
        (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
 			 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
@@ -1751,7 +1751,7 @@ (define_insn "aarch64_<optab>p<mode>"
 )
 
 ;; Pairwise FP Max/Min operations.
-(define_insn "aarch64_<optab>p<mode>"
+(define_insn "aarch64_<optab>p<mode><vczle><vczbe>"
  [(set (match_operand:VHSDF 0 "register_operand" "=w")
        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
 		      (match_operand:VHSDF 2 "register_operand" "w")]
@@ -3621,7 +3621,7 @@ (define_insn "<fmaxmin><mode>3<vczle><vczbe>"
 
 ;; 'across lanes' add.
 
-(define_insn "aarch64_faddp<mode>"
+(define_insn "aarch64_faddp<mode><vczle><vczbe>"
  [(set (match_operand:VHSDF 0 "register_operand" "=w")
        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
 		      (match_operand:VHSDF 2 "register_operand" "w")]
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c
index 7354a0be4b2fc529e4c407e65c4b5926b11d7f9e..5801598d429373d638d9294df85848c791fec30b 100644
--- a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c
@@ -57,15 +57,19 @@ OPSIX (T, IS, OS, S, OP6, OP7, OP8, OP9, OP10, OP11)
 OPSEVEN (T, IS, OS, S, OP1, OP2, OP3, OP4, OP5, OP6, OP7)                \
 OPSEVEN (T, IS, OS, S, OP8, OP9, OP10, OP11, OP12, OP13, OP14)
 
-OPFOURTEEN (int8, 8, 16, s8, padd, add, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub)
-OPFOURTEEN (int16, 4, 8, s16, padd, add, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub)
-OPFOURTEEN (int32, 2, 4, s32, padd, add, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub)
+#define OPSEVENTEEN(T,IS,OS,S,OP1,OP2,OP3,OP4,OP5,OP6,OP7,OP8,OP9,OP10,OP11,OP12,OP13,OP14,OP15,OP16,OP17)        \
+OPSEVEN (T, IS, OS, S, OP1, OP2, OP3, OP4, OP5, OP6, OP7)                \
+OPTEN (T, IS, OS, S, OP8, OP9, OP10, OP11, OP12, OP13, OP14, OP15, OP16, OP17)
+
+OPSEVENTEEN (int8, 8, 16, s8, padd, add, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
+OPSEVENTEEN (int16, 4, 8, s16, padd, add, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
+OPSEVENTEEN (int32, 2, 4, s32, padd, add, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
 
-OPFOURTEEN (uint8, 8, 16, u8, padd, add, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub)
-OPFOURTEEN (uint16, 4, 8, u16, padd, add, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub)
-OPFOURTEEN (uint32, 2, 4, u32, padd, add, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub)
+OPSEVENTEEN (uint8, 8, 16, u8, padd, add, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
+OPSEVENTEEN (uint16, 4, 8, u16, padd, add, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
+OPSEVENTEEN (uint32, 2, 4, u32, padd, add, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
 
-OPEIGHT (float32, 2, 4, f32, add, sub, mul, div, max, maxnm, min, minnm)
+OPFOURTEEN (float32, 2, 4, f32, add, padd, sub, mul, div, max, maxnm, min, minnm, abd, pmax, pmin, pmaxnm, pminnm)
 
 #define UNARY(OT,IT,OP,S)			\
 OT                                              \
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_2.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_2.c
index 603c5ab1439f84df650c345d7a2a73dc8f67d94d..f11f49e0087190e0de706c43f663f09471a540af 100644
--- a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_2.c
@@ -53,7 +53,11 @@ OPFIVE (T, IS, OS, S, OP6, OP7, OP8, OP9, OP10)
 OPFIVE (T, IS, OS, S, OP1, OP2, OP3, OP4, OP5)                \
 OPSIX (T, IS, OS, S, OP6, OP7, OP8, OP9, OP10, OP11)
 
-OPEIGHT (float16, 4, 8, f16, add, sub, mul, div, max, maxnm, min, minnm)
+#define OPTHIRTEEN(T,IS,OS,S,OP1,OP2,OP3,OP4,OP5,OP6,OP7,OP8,OP9,OP10,OP11,OP12,OP13)        \
+OPSIX (T, IS, OS, S, OP1, OP2, OP3, OP4, OP5, OP6)                \
+OPSEVEN (T, IS, OS, S, OP7, OP8, OP9, OP10, OP11, OP12, OP13)
+
+OPTHIRTEEN (float16, 4, 8, f16, add, sub, mul, div, max, maxnm, min, minnm, abd, pmax, pmin, pmaxnm, pminnm)
 
 #define UNARY(OT,IT,OP,S)			\
 OT                                              \

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-05-04  8:45 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-04  8:45 [PATCH][committed] aarch64: PR target/99195 annotate more simple binary ops for vec-concat with zero Kyrylo Tkachov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).