public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r14-1447] aarch64: PR target/99195 Annotate saturating mult patterns for vec-concat-zero
@ 2023-05-31 16:45 Kyrylo Tkachov
  0 siblings, 0 replies; only message in thread
From: Kyrylo Tkachov @ 2023-05-31 16:45 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:547d3bce0c02dbcbb6f62d9469a71eedf17bd688

commit r14-1447-g547d3bce0c02dbcbb6f62d9469a71eedf17bd688
Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date:   Wed May 31 17:43:20 2023 +0100

    aarch64: PR target/99195 Annotate saturating mult patterns for vec-concat-zero
    
    This patch goes through the various alphabet soup saturating multiplication patterns, including those in TARGET_RDMA
    and annotates them with <vczle><vczbe>. Many other patterns are widening and always write the full 128-bit vectors
    so this annotation doesn't apply to them. Nothing out of the ordinary in this patch.
    
    Bootstrapped and tested on aarch64-none-linux and aarch64_be-none-elf.
    
    gcc/ChangeLog:
    
            PR target/99195
            * config/aarch64/aarch64-simd.md (aarch64_sq<r>dmulh<mode>): Rename to...
            (aarch64_sq<r>dmulh<mode><vczle><vczbe>): ... This.
            (aarch64_sq<r>dmulh_n<mode>): Rename to...
            (aarch64_sq<r>dmulh_n<mode><vczle><vczbe>): ... This.
            (aarch64_sq<r>dmulh_lane<mode>): Rename to...
            (aarch64_sq<r>dmulh_lane<mode><vczle><vczbe>): ... This.
            (aarch64_sq<r>dmulh_laneq<mode>): Rename to...
            (aarch64_sq<r>dmulh_laneq<mode><vczle><vczbe>): ... This.
            (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>): Rename to...
            (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode><vczle><vczbe>): ... This.
            (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Rename to...
            (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode><vczle><vczbe>): ... This.
            (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Rename to...
            (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode><vczle><vczbe>): ... This.
    
    gcc/testsuite/ChangeLog:
    
            PR target/99195
            * gcc.target/aarch64/simd/pr99195_1.c: Add tests for qdmulh, qrdmulh.
            * gcc.target/aarch64/simd/pr99195_10.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64-simd.md                 | 18 ++++-----
 gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c  |  4 +-
 gcc/testsuite/gcc.target/aarch64/simd/pr99195_10.c | 43 ++++++++++++++++++++++
 3 files changed, 54 insertions(+), 11 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 2cd8b82df0f..1efae8d5e68 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -5510,7 +5510,7 @@
 
 ;; sq<r>dmulh.
 
-(define_insn "aarch64_sq<r>dmulh<mode>"
+(define_insn "aarch64_sq<r>dmulh<mode><vczle><vczbe>"
   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
 	(unspec:VSDQ_HSI
 	  [(match_operand:VSDQ_HSI 1 "register_operand" "w")
@@ -5521,7 +5521,7 @@
   [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
 )
 
-(define_insn "aarch64_sq<r>dmulh_n<mode>"
+(define_insn "aarch64_sq<r>dmulh_n<mode><vczle><vczbe>"
   [(set (match_operand:VDQHS 0 "register_operand" "=w")
 	(unspec:VDQHS
 	  [(match_operand:VDQHS 1 "register_operand" "w")
@@ -5535,7 +5535,7 @@
 
 ;; sq<r>dmulh_lane
 
-(define_insn "aarch64_sq<r>dmulh_lane<mode>"
+(define_insn "aarch64_sq<r>dmulh_lane<mode><vczle><vczbe>"
   [(set (match_operand:VDQHS 0 "register_operand" "=w")
         (unspec:VDQHS
 	  [(match_operand:VDQHS 1 "register_operand" "w")
@@ -5550,7 +5550,7 @@
   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
 )
 
-(define_insn "aarch64_sq<r>dmulh_laneq<mode>"
+(define_insn "aarch64_sq<r>dmulh_laneq<mode><vczle><vczbe>"
   [(set (match_operand:VDQHS 0 "register_operand" "=w")
         (unspec:VDQHS
 	  [(match_operand:VDQHS 1 "register_operand" "w")
@@ -5597,7 +5597,7 @@
 
 ;; sqrdml[as]h.
 
-(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
+(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode><vczle><vczbe>"
   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
 	(unspec:VSDQ_HSI
 	  [(match_operand:VSDQ_HSI 1 "register_operand" "0")
@@ -5611,7 +5611,7 @@
 
 ;; sqrdml[as]h_lane.
 
-(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
+(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode><vczle><vczbe>"
   [(set (match_operand:VDQHS 0 "register_operand" "=w")
 	(unspec:VDQHS
 	  [(match_operand:VDQHS 1 "register_operand" "0")
@@ -5629,7 +5629,7 @@
    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
 )
 
-(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
+(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode><vczle><vczbe>"
   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
 	(unspec:SD_HSI
 	  [(match_operand:SD_HSI 1 "register_operand" "0")
@@ -5649,7 +5649,7 @@
 
 ;; sqrdml[as]h_laneq.
 
-(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
+(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode><vczle><vczbe>"
   [(set (match_operand:VDQHS 0 "register_operand" "=w")
 	(unspec:VDQHS
 	  [(match_operand:VDQHS 1 "register_operand" "0")
@@ -5667,7 +5667,7 @@
    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
 )
 
-(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
+(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode><vczle><vczbe>"
   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
 	(unspec:SD_HSI
 	  [(match_operand:SD_HSI 1 "register_operand" "0")
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c
index 8b6548a154f..765cb270b4c 100644
--- a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c
@@ -66,8 +66,8 @@ OPNINETEEN (int16, 4, 8, s16, padd, add, qadd, qsub, sub, mul, and, orr, eor, or
 OPNINETEEN (int32, 2, 4, s32, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
 
 OPSIX (int8, 8, 16, s8, zip1, zip2, uzp1, uzp2, shl, qshl)
-OPSIX (int16, 4, 8, s16, zip1, zip2, uzp1, uzp2, shl, qshl)
-OPSIX (int32, 2, 4, s32, zip1, zip2, uzp1, uzp2, shl, qshl)
+OPEIGHT (int16, 4, 8, s16, zip1, zip2, uzp1, uzp2, shl, qshl, qdmulh, qrdmulh)
+OPEIGHT (int32, 2, 4, s32, zip1, zip2, uzp1, uzp2, shl, qshl, qdmulh, qrdmulh)
 
 OPNINETEEN (uint8, 8, 16, u8, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
 OPNINETEEN (uint16, 4, 8, u16, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_10.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_10.c
new file mode 100644
index 00000000000..9db54009db9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_10.c
@@ -0,0 +1,43 @@
+/* PR target/99195.  */
+/*  Check that we take advantage of 64-bit Advanced SIMD operations clearing
+    the top half of the vector register and no explicit zeroing instructions
+    are emitted.  */
+/* { dg-do compile } */
+/* { dg-options "-O -march=armv8.1-a+rdma" } */
+
+#include <arm_neon.h>
+
+#define OPTWO(T,IS,OS,S,OP1,OP2)        \
+FUNC (T, IS, OS, OP1, S)                \
+FUNC (T, IS, OS, OP2, S)
+
+#define TERNARY(OT,IT,OP,S)                         \
+OT                                              \
+foo_##OP##_##S (IT a, IT b, IT c)                 \
+{                                               \
+  IT zeros = vcreate_##S (0);                   \
+  return vcombine_##S (v##OP##_##S (a, b, c), zeros);      \
+}
+
+#undef FUNC
+#define FUNC(T,IS,OS,OP,S) TERNARY (T##x##OS##_t, T##x##IS##_t, OP, S)
+
+OPTWO (int16, 4, 8, s16, qrdmlah, qrdmlsh)
+OPTWO (int32, 2, 4, s32, qrdmlah, qrdmlsh)
+
+#define TERNARY_IDX(OT,IT,OP,S)                         \
+OT                                              \
+foo_##OP##_##S (IT a, IT b, IT c)                 \
+{                                               \
+  IT zeros = vcreate_##S (0);                   \
+  return vcombine_##S (v##OP##_##S (a, b, c, 0), zeros);      \
+}
+
+#undef FUNC
+#define FUNC(T,IS,OS,OP,S) TERNARY_IDX (T##x##OS##_t, T##x##IS##_t, OP, S)
+OPTWO (int16, 4, 8, s16, qrdmlah_lane, qrdmlsh_lane)
+OPTWO (int32, 2, 4, s32, qrdmlah_lane, qrdmlsh_lane)
+
+/* { dg-final { scan-assembler-not {\tfmov\t} } }  */
+/* { dg-final { scan-assembler-not {\tmov\t} } }  */
+

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-05-31 16:45 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-31 16:45 [gcc r14-1447] aarch64: PR target/99195 Annotate saturating mult patterns for vec-concat-zero Kyrylo Tkachov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).