From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <ktkachov@sourceware.org>
Received: by sourceware.org (Postfix, from userid 1816)
	id 4CCE6385697D; Wed, 24 May 2023 13:53:23 +0000 (GMT)
DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 4CCE6385697D
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org;
	s=default; t=1684936403;
	bh=IHBuLaWTNzicibiyi90z3q3rMGQIpFTCyR2CVCllbUg=;
	h=From:To:Subject:Date:From;
	b=PkDP4BMbdH+mbm3Qgk5fepmyAgdYRt8ZjJJRyIWD9UveMd6/p1MFdZwyQc5rNPxGu
	 Ws1TVeLDAeX/h3Y8QYa/P9bY+f0UFYX4DxmPXN58oTugWuT67VfMtUHU6CLrcxgvWx
	 gUgsg7nCX8IRBcOi47SUhtk5YaFzaSHg8eSSD768=
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Content-Type: text/plain; charset="utf-8"
From: Kyrylo Tkachov <ktkachov@gcc.gnu.org>
To: gcc-cvs@gcc.gnu.org
Subject: [gcc r14-1167] aarch64: PR target/99195 Annotate vector shift
 patterns for vec-concat-zero
X-Act-Checkin: gcc
X-Git-Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
X-Git-Refname: refs/heads/master
X-Git-Oldrev: affee7dcfa1ee272d43ac7cb68cf423dbd956fd8
X-Git-Newrev: b30ab0dcf9db2ac6d81fb3743add1fbfa0d18f6e
Message-Id: <20230524135323.4CCE6385697D@sourceware.org>
Date: Wed, 24 May 2023 13:53:23 +0000 (GMT)
List-Id: <gcc-cvs.sourceware.org>

https://gcc.gnu.org/g:b30ab0dcf9db2ac6d81fb3743add1fbfa0d18f6e

commit r14-1167-gb30ab0dcf9db2ac6d81fb3743add1fbfa0d18f6e
Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date:   Wed May 24 14:52:34 2023 +0100

    aarch64: PR target/99195 Annotate vector shift patterns for vec-concat-zero
    
    Continuing the series of straightforward annotations, this one handles the normal (not widening or narrowing) vector shifts.
    Tests included.
    
    Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf.
    
    gcc/ChangeLog:
    
            PR target/99195
            * config/aarch64/aarch64-simd.md (aarch64_simd_lshr<mode>): Rename to...
            (aarch64_simd_lshr<mode><vczle><vczbe>): ... This.
            (aarch64_simd_ashr<mode>): Rename to...
            (aarch64_simd_ashr<mode><vczle><vczbe>): ... This.
            (aarch64_simd_imm_shl<mode>): Rename to...
            (aarch64_simd_imm_shl<mode><vczle><vczbe>): ... This.
            (aarch64_simd_reg_sshl<mode>): Rename to...
            (aarch64_simd_reg_sshl<mode><vczle><vczbe>): ... This.
            (aarch64_simd_reg_shl<mode>_unsigned): Rename to...
            (aarch64_simd_reg_shl<mode>_unsigned<vczle><vczbe>): ... This.
            (aarch64_simd_reg_shl<mode>_signed): Rename to...
            (aarch64_simd_reg_shl<mode>_signed<vczle><vczbe>): ... This.
            (vec_shr_<mode>): Rename to...
            (vec_shr_<mode><vczle><vczbe>): ... This.
            (aarch64_<sur>shl<mode>): Rename to...
            (aarch64_<sur>shl<mode><vczle><vczbe>): ... This.
            (aarch64_<sur>q<r>shl<mode>): Rename to...
            (aarch64_<sur>q<r>shl<mode><vczle><vczbe>): ... This.
    
    gcc/testsuite/ChangeLog:
    
            PR target/99195
            * gcc.target/aarch64/simd/pr99195_1.c: Add testing for shifts.
            * gcc.target/aarch64/simd/pr99195_6.c: Likewise.
            * gcc.target/aarch64/simd/pr99195_8.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64-simd.md                | 18 +++++------
 gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c |  6 ++--
 gcc/testsuite/gcc.target/aarch64/simd/pr99195_6.c | 10 ++++++
 gcc/testsuite/gcc.target/aarch64/simd/pr99195_8.c | 39 +++++++++++++++++++++++
 4 files changed, 61 insertions(+), 12 deletions(-)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index af95bbb29a7..0df97310fd9 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1280,7 +1280,7 @@
   DONE;
 })
 
-(define_insn "aarch64_simd_lshr<mode>"
+(define_insn "aarch64_simd_lshr<mode><vczle><vczbe>"
  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
        (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 		     (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
@@ -1289,7 +1289,7 @@
   [(set_attr "type" "neon_shift_imm<q>")]
 )
 
-(define_insn "aarch64_simd_ashr<mode>"
+(define_insn "aarch64_simd_ashr<mode><vczle><vczbe>"
  [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
        (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,w")
 		     (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "D1,Dr")))]
@@ -1312,7 +1312,7 @@
   [(set_attr "type" "neon_shift_acc<q>")]
 )
 
-(define_insn "aarch64_simd_imm_shl<mode>"
+(define_insn "aarch64_simd_imm_shl<mode><vczle><vczbe>"
  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 		   (match_operand:VDQ_I  2 "aarch64_simd_lshift_imm" "Dl")))]
@@ -1321,7 +1321,7 @@
   [(set_attr "type" "neon_shift_imm<q>")]
 )
 
-(define_insn "aarch64_simd_reg_sshl<mode>"
+(define_insn "aarch64_simd_reg_sshl<mode><vczle><vczbe>"
  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 		   (match_operand:VDQ_I 2 "register_operand" "w")))]
@@ -1330,7 +1330,7 @@
   [(set_attr "type" "neon_shift_reg<q>")]
 )
 
-(define_insn "aarch64_simd_reg_shl<mode>_unsigned"
+(define_insn "aarch64_simd_reg_shl<mode>_unsigned<vczle><vczbe>"
  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
 		    (match_operand:VDQ_I 2 "register_operand" "w")]
@@ -1340,7 +1340,7 @@
   [(set_attr "type" "neon_shift_reg<q>")]
 )
 
-(define_insn "aarch64_simd_reg_shl<mode>_signed"
+(define_insn "aarch64_simd_reg_shl<mode>_signed<vczle><vczbe>"
  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
 		    (match_operand:VDQ_I 2 "register_operand" "w")]
@@ -1522,7 +1522,7 @@
 )
 
 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
-(define_insn "vec_shr_<mode>"
+(define_insn "vec_shr_<mode><vczle><vczbe>"
   [(set (match_operand:VD 0 "register_operand" "=w")
         (unspec:VD [(match_operand:VD 1 "register_operand" "w")
 		    (match_operand:SI 2 "immediate_operand" "i")]
@@ -6340,7 +6340,7 @@
 
 ;; vshl
 
-(define_insn "aarch64_<sur>shl<mode>"
+(define_insn "aarch64_<sur>shl<mode><vczle><vczbe>"
   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
         (unspec:VSDQ_I_DI
 	  [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
@@ -6354,7 +6354,7 @@
 
 ;; vqshl
 
-(define_insn "aarch64_<sur>q<r>shl<mode>"
+(define_insn "aarch64_<sur>q<r>shl<mode><vczle><vczbe>"
   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
         (unspec:VSDQ_I
 	  [(match_operand:VSDQ_I 1 "register_operand" "w")
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c
index fde501d28e3..8b6548a154f 100644
--- a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c
@@ -65,9 +65,9 @@ OPNINETEEN (int8, 8, 16, s8, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn
 OPNINETEEN (int16, 4, 8, s16, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
 OPNINETEEN (int32, 2, 4, s32, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
 
-OPFOUR (int8, 8, 16, s8, zip1, zip2, uzp1, uzp2)
-OPFOUR (int16, 4, 8, s16, zip1, zip2, uzp1, uzp2)
-OPFOUR (int32, 2, 4, s32, zip1, zip2, uzp1, uzp2)
+OPSIX (int8, 8, 16, s8, zip1, zip2, uzp1, uzp2, shl, qshl)
+OPSIX (int16, 4, 8, s16, zip1, zip2, uzp1, uzp2, shl, qshl)
+OPSIX (int32, 2, 4, s32, zip1, zip2, uzp1, uzp2, shl, qshl)
 
 OPNINETEEN (uint8, 8, 16, u8, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
 OPNINETEEN (uint16, 4, 8, u16, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_6.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_6.c
index 52ad2709400..c86506e96d1 100644
--- a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_6.c
+++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_6.c
@@ -25,6 +25,16 @@ MYOP (uint16x8_t, uint16x4_t, int16x4_t, sqadd, u16)
 MYOP (uint32x4_t, uint32x2_t, int32x2_t, sqadd, u32)
 MYOP (uint64x2_t, uint64x1_t, int64x1_t, sqadd, u64)
 
+MYOP (uint8x16_t, uint8x8_t, int8x8_t, shl, u8)
+MYOP (uint16x8_t, uint16x4_t, int16x4_t, shl, u16)
+MYOP (uint32x4_t, uint32x2_t, int32x2_t, shl, u32)
+MYOP (uint64x2_t, uint64x1_t, int64x1_t, shl, u64)
+
+MYOP (uint8x16_t, uint8x8_t, int8x8_t, qshl, u8)
+MYOP (uint16x8_t, uint16x4_t, int16x4_t, qshl, u16)
+MYOP (uint32x4_t, uint32x2_t, int32x2_t, qshl, u32)
+MYOP (uint64x2_t, uint64x1_t, int64x1_t, qshl, u64)
+
 /* { dg-final { scan-assembler-not {\tfmov\t} } }  */
 /* { dg-final { scan-assembler-not {\tmov\t} } }  */
 
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_8.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_8.c
new file mode 100644
index 00000000000..29499e71df6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_8.c
@@ -0,0 +1,39 @@
+/* PR target/99195.  */
+/*  Check that we take advantage of 64-bit Advanced SIMD operations clearing
+    the top half of the vector register and no explicit zeroing instructions
+    are emitted.  */
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+
+#include <arm_neon.h>
+
+#define MYOP(OT,IT1,OP,S,OS)					\
+OT								\
+foo_##OP##_##S##OS (IT1 a)					\
+{								\
+  IT1 zeros = vcreate_##S##OS (0);				\
+  return vcombine_##S##OS (v##OP##_##S##OS (a, 3), zeros);	\
+}								\
+OT								\
+foo_##OP##_##S##OS##_s (IT1 a)					\
+{								\
+  IT1 zeros = vcreate_##S##OS (0);				\
+  return vcombine_##S##OS (v##OP##_##S##OS (a, OS - 1), zeros);	\
+}
+
+MYOP (int8x16_t, int8x8_t, shr_n, s, 8)
+MYOP (int16x8_t, int16x4_t, shr_n, s, 16)
+MYOP (int32x4_t, int32x2_t, shr_n, s, 32)
+MYOP (uint8x16_t, uint8x8_t, shr_n, u, 8)
+MYOP (uint16x8_t, uint16x4_t, shr_n, u, 16)
+MYOP (uint32x4_t, uint32x2_t, shr_n, u, 32)
+MYOP (int8x16_t, int8x8_t, shl_n, s, 8)
+MYOP (int16x8_t, int16x4_t, shl_n, s, 16)
+MYOP (int32x4_t, int32x2_t, shl_n, s, 32)
+MYOP (uint8x16_t, uint8x8_t, shl_n, u, 8)
+MYOP (uint16x8_t, uint16x4_t, shl_n, u, 16)
+MYOP (uint32x4_t, uint32x2_t, shl_n, u, 32)
+
+/* { dg-final { scan-assembler-not {\tfmov\t} } }  */
+/* { dg-final { scan-assembler-not {\tmov\t} } }  */
+