public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH][committed] aarch64: PR target/99195 Annotate dot-product patterns for vec-concat-zero
@ 2023-05-31 16:45 Kyrylo Tkachov
  0 siblings, 0 replies; only message in thread
From: Kyrylo Tkachov @ 2023-05-31 16:45 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 990 bytes --]

Hi all,

This straightforward patch annotates the dotproduct instructions, including the i8mm ones.
Tests included.
Nothing unexpected here.

Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf.
Pushing to trunk.
Thanks,
Kyrill

gcc/ChangeLog:

	PR target/99195
	* config/aarch64/aarch64-simd.md (<sur>dot_prod<vsi2qi>): Rename to...
	(<sur>dot_prod<vsi2qi><vczle><vczbe>): ... This.
	(usdot_prod<vsi2qi>): Rename to...
	(usdot_prod<vsi2qi><vczle><vczbe>): ... This.
	(aarch64_<sur>dot_lane<vsi2qi>): Rename to...
	(aarch64_<sur>dot_lane<vsi2qi><vczle><vczbe>): ... This.
	(aarch64_<sur>dot_laneq<vsi2qi>): Rename to...
	(aarch64_<sur>dot_laneq<vsi2qi><vczle><vczbe>): ... This.
	(aarch64_<DOTPROD_I8MM:sur>dot_lane<VB:isquadop><VS:vsi2qi>): Rename to...
	(aarch64_<DOTPROD_I8MM:sur>dot_lane<VB:isquadop><VS:vsi2qi><vczle><vczbe>):
	... This.

gcc/testsuite/ChangeLog:

	PR target/99195
	* gcc.target/aarch64/simd/pr99195_11.c: New test.

[-- Attachment #2: dotprod.patch --]
[-- Type: application/octet-stream, Size: 4196 bytes --]

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 1efae8d5e6834d251c9a44f04a87ec0ddb894b9b..4904a50658bdf148938a6a1ccb50f690fbd89194 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -665,7 +665,7 @@ (define_expand "cmul<conj_op><mode>3"
 ;; ...
 ;;
 ;; and so the vectorizer provides r, in which the result has to be accumulated.
-(define_insn "<sur>dot_prod<vsi2qi>"
+(define_insn "<sur>dot_prod<vsi2qi><vczle><vczbe>"
   [(set (match_operand:VS 0 "register_operand" "=w")
 	(plus:VS
 	  (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand" "w")
@@ -679,7 +679,7 @@ (define_insn "<sur>dot_prod<vsi2qi>"
 
 ;; These instructions map to the __builtins for the Armv8.6-a I8MM usdot
 ;; (vector) Dot Product operation and the vectorized optab.
-(define_insn "usdot_prod<vsi2qi>"
+(define_insn "usdot_prod<vsi2qi><vczle><vczbe>"
   [(set (match_operand:VS 0 "register_operand" "=w")
 	(plus:VS
 	  (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand" "w")
@@ -693,7 +693,7 @@ (define_insn "usdot_prod<vsi2qi>"
 
 ;; These instructions map to the __builtins for the Dot Product
 ;; indexed operations.
-(define_insn "aarch64_<sur>dot_lane<vsi2qi>"
+(define_insn "aarch64_<sur>dot_lane<vsi2qi><vczle><vczbe>"
   [(set (match_operand:VS 0 "register_operand" "=w")
 	(plus:VS
 	  (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
@@ -709,7 +709,7 @@ (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
   [(set_attr "type" "neon_dot<q>")]
 )
 
-(define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
+(define_insn "aarch64_<sur>dot_laneq<vsi2qi><vczle><vczbe>"
   [(set (match_operand:VS 0 "register_operand" "=w")
 	(plus:VS
 	  (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
@@ -727,7 +727,7 @@ (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
 
 ;; These instructions map to the __builtins for the armv8.6a I8MM usdot, sudot
 ;; (by element) Dot Product operations.
-(define_insn "aarch64_<DOTPROD_I8MM:sur>dot_lane<VB:isquadop><VS:vsi2qi>"
+(define_insn "aarch64_<DOTPROD_I8MM:sur>dot_lane<VB:isquadop><VS:vsi2qi><vczle><vczbe>"
   [(set (match_operand:VS 0 "register_operand" "=w")
 	(plus:VS
 	  (unspec:VS [(match_operand:<VS:VSI2QI> 2 "register_operand" "w")
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_11.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_11.c
new file mode 100644
index 0000000000000000000000000000000000000000..1ca8c6abe5ddabad153f1a08570791c5f588d177
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_11.c
@@ -0,0 +1,38 @@
+/* PR target/99195.  */
+/*  Check that we take advantage of 64-bit Advanced SIMD operations clearing
+    the top half of the vector register and no explicit zeroing instructions
+    are emitted.  */
+/* { dg-do compile } */
+/* { dg-options "-O -march=armv8.2-a+dotprod" } */
+
+#include <arm_neon.h>
+
+#define DOTPROD(OT,AT,IT1,IT2,OP,S)                         \
+OT                                              \
+foo_##OP##_##S (AT a, IT1 b, IT2 c)                 \
+{                                               \
+  AT zeros = vcreate_##S (0);                   \
+  return vcombine_##S (v##OP##_##S (a, b, c), zeros);      \
+}
+
+#define DOTPROD_IDX(OT,AT,IT1,IT2,OP,S)                         \
+OT                                              \
+foo_##OP##_##S (AT a, IT1 b, IT2 c)                 \
+{                                               \
+  AT zeros = vcreate_##S (0);                   \
+  return vcombine_##S (v##OP##_##S (a, b, c, 1), zeros);      \
+}
+
+DOTPROD (int32x4_t, int32x2_t, int8x8_t, int8x8_t, dot, s32)
+DOTPROD (uint32x4_t, uint32x2_t, uint8x8_t, uint8x8_t, dot, u32)
+DOTPROD_IDX (int32x4_t, int32x2_t, int8x8_t, int8x8_t, dot_lane, s32)
+DOTPROD_IDX (uint32x4_t, uint32x2_t, uint8x8_t, uint8x8_t, dot_lane, u32)
+
+#pragma GCC target ("+i8mm")
+DOTPROD (int32x4_t, int32x2_t, uint8x8_t, int8x8_t, usdot, s32)
+DOTPROD_IDX (int32x4_t, int32x2_t, uint8x8_t, int8x8_t, usdot_lane, s32)
+DOTPROD_IDX (int32x4_t, int32x2_t, int8x8_t, uint8x8_t, sudot_lane, s32)
+
+/* { dg-final { scan-assembler-not {\tfmov\t} } }  */
+/* { dg-final { scan-assembler-not {\tmov\t} } }  */
+

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-05-31 16:46 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-31 16:45 [PATCH][committed] aarch64: PR target/99195 Annotate dot-product patterns for vec-concat-zero Kyrylo Tkachov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).