[gcc(refs/users/clyon/heads/mve-autovec)] arm: Auto-vectorization for MVE: vshl

public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed

* [gcc(refs/users/clyon/heads/mve-autovec)] arm: Auto-vectorization for MVE: vshl
@ 2020-12-16 13:48 Christophe Lyon
  0 siblings, 0 replies; 5+ messages in thread
From: Christophe Lyon @ 2020-12-16 13:48 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:186d50728da65273faf4bb4277691761687a4a1d

commit 186d50728da65273faf4bb4277691761687a4a1d
Author: Christophe Lyon <christophe.lyon@linaro.org>
Date:   Mon Nov 16 14:58:19 2020 +0000

    arm: Auto-vectorization for MVE: vshl
    
    This patch enables MVE vshlq instructions for auto-vectorization.  A
    new MVE pattern is introduced that takes a vector of constants as
    second operand, all constants being equal.
    
    The existing mve_vshlq_n_<supf><mode> is kept, as it takes a single
    immediate as second operand, and is used by arm_mve.h.
    
    The vashl<mode>3 expander is added to vec-common.md.
    
    2020-12-03  Christophe Lyon  <christophe.lyon@linaro.org>
    
            gcc/
            * config/arm/iterators.md (VDQIW): Add TARGET_HAVE_MVE condition
            where relevant.
            * config/arm/mve.md (mve_vshlq_n_<mode>_imm): New entry.
            * config/arm/neon.md (vashl<mode>3): Rename into vashl<mode>3_neon.
            * config/arm/vec-common.md (vasl<mode>3): New expander.
    
            gcc/testsuite/
            * gcc.target/arm/simd/mve-vshl.c: Add tests for vshl.

Diff:
---
 gcc/config/arm/mve.md                        | 38 +++++++++++++++---
 gcc/config/arm/neon.md                       |  2 +-
 gcc/config/arm/vec-common.md                 |  7 ++++
 gcc/testsuite/gcc.target/arm/simd/mve-vshl.c | 59 ++++++++++++++++++++++++++++
 4 files changed, 100 insertions(+), 6 deletions(-)

diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index b4c5a1e27c4..3e2340d6ebc 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -823,15 +823,24 @@
 ;;
 ;; [vshlq_s, vshlq_u])
 ;;
-(define_insn "mve_vshlq_<supf><mode>"
+(define_insn "mve_vshlq_s<mode>"
   [
    (set (match_operand:MVE_2 0 "s_register_operand" "=w")
-	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")
-		       (match_operand:MVE_2 2 "s_register_operand" "w")]
-	 VSHLQ))
+	(ashift:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w")
+		      (match_operand:MVE_2 2 "s_register_operand" "w")))
   ]
   "TARGET_HAVE_MVE"
-  "vshl.<supf>%#<V_sz_elem>\t%q0, %q1, %q2"
+  "vshl.s%#<V_sz_elem>\t%q0, %q1, %q2"
+  [(set_attr "type" "mve_move")
+])
+(define_insn "mve_vshlq_u<mode>"
+  [
+   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
+	(ashift:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w")
+		      (match_operand:MVE_2 2 "s_register_operand" "w")))
+  ]
+  "TARGET_HAVE_MVE"
+  "vshl.u%#<V_sz_elem>\t%q0, %q1, %q2"
   [(set_attr "type" "mve_move")
 ])
 
@@ -1945,6 +1954,7 @@
 ;;
 ;; [vshlq_n_u, vshlq_n_s])
 ;;
+;; Version that takes an immediate as operand 2.
 (define_insn "mve_vshlq_n_<supf><mode>"
   [
    (set (match_operand:MVE_2 0 "s_register_operand" "=w")
@@ -1957,6 +1967,24 @@
   [(set_attr "type" "mve_move")
 ])
 
+;; Version with a vector of immediates as operand 2.
+;; We only emit signed ('s') versions, since it makes no difference.
+(define_insn "mve_vshlq_n_<mode>_imm"
+  [
+   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
+	(ashift:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w")
+		      (match_operand:MVE_2 2 "imm_for_neon_lshift_operand" "i")))
+  ]
+  "TARGET_HAVE_MVE"
+{
+  return neon_output_shift_immediate ("vshl", 's', &operands[2],
+				     <MODE>mode,
+				     VALID_NEON_QREG_MODE (<MODE>mode),
+				     true);
+}
+  [(set_attr "type" "mve_move")
+])
+
 ;;
 ;; [vshlq_r_s, vshlq_r_u])
 ;;
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index d2e92baeb49..8927e752ee9 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -870,7 +870,7 @@
 ; generic vectorizer code.  It ends up creating a V2DI constructor with
 ; SImode elements.
 
-(define_insn "vashl<mode>3"
+(define_insn "vashl<mode>3_neon"
   [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
 	(ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
 		      (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dm")))]
diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md
index 2d0932b95a1..779d6737e1b 100644
--- a/gcc/config/arm/vec-common.md
+++ b/gcc/config/arm/vec-common.md
@@ -205,3 +205,10 @@
 	(neg:VDQWH (match_operand:VDQWH 1 "s_register_operand" "")))]
   "ARM_HAVE_<MODE>_ARITH"
 )
+
+(define_expand "vashl<mode>3"
+  [(set (match_operand:VDQIW 0 "s_register_operand" "")
+	(ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
+		      (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "")))]
+  "ARM_HAVE_<MODE>_ARITH"
+)
diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vshl.c b/gcc/testsuite/gcc.target/arm/simd/mve-vshl.c
new file mode 100644
index 00000000000..2aca281ebee
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/mve-vshl.c
@@ -0,0 +1,59 @@
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O3" } */
+
+#include <stdint.h>
+
+#define FUNC(SIGN, TYPE, BITS, NB, OP, NAME)				\
+  void test_ ## NAME ##_ ## SIGN ## BITS ## x ## NB (TYPE##BITS##_t * __restrict__ dest, TYPE##BITS##_t *a, TYPE##BITS##_t *b) { \
+    int i;								\
+    for (i=0; i<NB; i++) {						\
+      dest[i] = a[i] OP b[i];						\
+    }									\
+}
+
+#define FUNC_IMM(SIGN, TYPE, BITS, NB, OP, NAME)				\
+  void test_ ## NAME ##_ ## SIGN ## BITS ## x ## NB (TYPE##BITS##_t * __restrict__ dest, TYPE##BITS##_t *a) { \
+    int i;								\
+    for (i=0; i<NB; i++) {						\
+      dest[i] = a[i] OP 5;						\
+    }									\
+}
+
+/* 64-bit vectors.  */
+FUNC(s, int, 32, 2, <<, vshl)
+FUNC(u, uint, 32, 2, <<, vshl)
+FUNC(s, int, 16, 4, <<, vshl)
+FUNC(u, uint, 16, 4, <<, vshl)
+FUNC(s, int, 8, 8, <<, vshl)
+FUNC(u, uint, 8, 8, <<, vshl)
+
+/* 128-bit vectors.  */
+FUNC(s, int, 32, 4, <<, vshl)
+FUNC(u, uint, 32, 4, <<, vshl)
+FUNC(s, int, 16, 8, <<, vshl)  /* FIXME: not vectorized */
+FUNC(u, uint, 16, 8, <<, vshl) /* FIXME: not vectorized */
+FUNC(s, int, 8, 16, <<, vshl)  /* FIXME: not vectorized */
+FUNC(u, uint, 8, 16, <<, vshl) /* FIXME: not vectorized */
+
+/* 64-bit vectors.  */
+FUNC_IMM(s, int, 32, 2, <<, vshlimm)
+FUNC_IMM(u, uint, 32, 2, <<, vshlimm)
+FUNC_IMM(s, int, 16, 4, <<, vshlimm)
+FUNC_IMM(u, uint, 16, 4, <<, vshlimm)
+FUNC_IMM(s, int, 8, 8, <<, vshlimm)
+FUNC_IMM(u, uint, 8, 8, <<, vshlimm)
+
+/* 128-bit vectors.  */
+FUNC_IMM(s, int, 32, 4, <<, vshlimm)
+FUNC_IMM(u, uint, 32, 4, <<, vshlimm)
+FUNC_IMM(s, int, 16, 8, <<, vshlimm)
+FUNC_IMM(u, uint, 16, 8, <<, vshlimm)
+FUNC_IMM(s, int, 8, 16, <<, vshlimm)
+FUNC_IMM(u, uint, 8, 16, <<, vshlimm)
+
+/* MVE has only 128-bit vectors, so we can vectorize only half of the
+   functions above.  */
+/* We only emit vshl.s, which is equivalent to vshl.u anyway.  */
+/* { dg-final { scan-assembler-times {vshl.s[0-9]+\tq[0-9]+, q[0-9]+} 8 } } */


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [gcc(refs/users/clyon/heads/mve-autovec)] arm: Auto-vectorization for MVE: vshl
@ 2020-11-24 22:31 Christophe Lyon
  0 siblings, 0 replies; 5+ messages in thread
From: Christophe Lyon @ 2020-11-24 22:31 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:c7b7b2038d3636d80ced7824528f8bbfc1602504

commit c7b7b2038d3636d80ced7824528f8bbfc1602504
Author: Christophe Lyon <christophe.lyon@linaro.org>
Date:   Mon Nov 16 14:58:19 2020 +0000

    arm: Auto-vectorization for MVE: vshl
    
    This patch enables MVE vshlq instructions for auto-vectorization.  A
    new MVE pattern is introduced that takes a vector of constants as
    second operand, all constants being equal.
    
    The existing mve_vshlq_n_<supf><mode> is kept, as it takes a single
    immediate as second operand, and is used by arm_mve.h.
    
    The vashl<mode>3 expander is added to vec-common.md.
    
    2020-11-12  Christophe Lyon  <christophe.lyon@linaro.org>
    
            gcc/
            * config/arm/mve.md (mve_vshlq_n_<mode>_imm): New entry.
            * config/arm/neon.md (vashl<mode>3): Rename into vashl<mode>3_neon.
            * config/arm/vec-common.md (vasl<mode>3): New expander.
    
            gcc/testsuite/
            * gcc.target/arm/simd/mve-vshl.c: Add tests for vshl.

Diff:
---
 gcc/config/arm/mve.md                        | 19 +++++++++++++++
 gcc/config/arm/neon.md                       |  2 +-
 gcc/config/arm/vec-common.md                 |  7 ++++++
 gcc/testsuite/gcc.target/arm/simd/mve-vshl.c | 35 ++++++++++++++++++++++++++++
 4 files changed, 62 insertions(+), 1 deletion(-)

diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index a5f5d7527f4..ce822586c4e 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -1924,6 +1924,7 @@
 ;;
 ;; [vshlq_n_u, vshlq_n_s])
 ;;
+;; Version that takes an immediate as operand 2.
 (define_insn "mve_vshlq_n_<supf><mode>"
   [
    (set (match_operand:MVE_2 0 "s_register_operand" "=w")
@@ -1936,6 +1937,24 @@
   [(set_attr "type" "mve_move")
 ])
 
+;; Version with a vector of immediates as operand 2.
+;; We only emit signed ('s') versions, since it makes no difference.
+(define_insn "mve_vshlq_n_<mode>_imm"
+  [
+   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
+	(ashift:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w")
+		      (match_operand:MVE_2 2 "imm_for_neon_lshift_operand" "i")))
+  ]
+  "TARGET_HAVE_MVE"
+{
+  return neon_output_shift_immediate ("vshl", 's', &operands[2],
+				     <MODE>mode,
+				     VALID_NEON_QREG_MODE (<MODE>mode),
+				     true);
+}
+  [(set_attr "type" "mve_move")
+])
+
 ;;
 ;; [vshlq_r_s, vshlq_r_u])
 ;;
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index e1263b00b39..cb7646ea752 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -870,7 +870,7 @@
 ; generic vectorizer code.  It ends up creating a V2DI constructor with
 ; SImode elements.
 
-(define_insn "vashl<mode>3"
+(define_insn "vashl<mode>3_neon"
   [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
 	(ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
 		      (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dm")))]
diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md
index 7825caa4e43..907b6b292ee 100644
--- a/gcc/config/arm/vec-common.md
+++ b/gcc/config/arm/vec-common.md
@@ -216,3 +216,10 @@
 		      (match_operand:VNINOTM1 2 "s_register_operand" "")))]
   "TARGET_NEON"
 )
+
+(define_expand "vashl<mode>3"
+  [(set (match_operand:VDQIW 0 "s_register_operand" "")
+	(ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
+		      (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "")))]
+  "TARGET_NEON || TARGET_HAVE_MVE"
+)
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vshl.c b/gcc/testsuite/gcc.target/arm/simd/mve-vshl.c
new file mode 100644
index 00000000000..4ccc9a2e927
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/mve-vshl.c
@@ -0,0 +1,35 @@
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O3" } */
+
+#include <stdint.h>
+
+#define FUNC(SIGN, TYPE, BITS, NB, OP, NAME)				\
+  void test_ ## NAME ##_ ## SIGN ## BITS ## x ## NB (TYPE##BITS##_t * __restrict__ dest, TYPE##BITS##_t *a) { \
+    int i;								\
+    for (i=0; i<NB; i++) {						\
+      dest[i] = a[i] OP 5;						\
+    }									\
+}
+
+/* 64-bit vectors.  */
+FUNC(s, int, 32, 2, <<, vshl)
+FUNC(u, uint, 32, 2, <<, vshl)
+FUNC(s, int, 16, 4, <<, vshl)
+FUNC(u, uint, 16, 4, <<, vshl)
+FUNC(s, int, 8, 8, <<, vshl)
+FUNC(u, uint, 8, 8, <<, vshl)
+
+/* 128-bit vectors.  */
+FUNC(s, int, 32, 4, <<, vshl)
+FUNC(u, uint, 32, 4, <<, vshl)
+FUNC(s, int, 16, 8, <<, vshl)
+FUNC(u, uint, 16, 8, <<, vshl)
+FUNC(s, int, 8, 16, <<, vshl)
+FUNC(u, uint, 8, 16, <<, vshl)
+
+/* MVE has only 128-bit vectors, so we can vectorize only half of the
+   functions above.  */
+/* We only emit vshl.s, which is equivalent to vshl.u anyway.  */
+/* { dg-final { scan-assembler-times {vshl.s[0-9]+\tq[0-9]+, q[0-9]+} 6 } } */


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [gcc(refs/users/clyon/heads/mve-autovec)] arm: Auto-vectorization for MVE: vshl
@ 2020-11-23 10:42 Christophe Lyon
  0 siblings, 0 replies; 5+ messages in thread
From: Christophe Lyon @ 2020-11-23 10:42 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:488e0869f9610981ae68e2a70a329f80d6f3ad72

commit 488e0869f9610981ae68e2a70a329f80d6f3ad72
Author: Christophe Lyon <christophe.lyon@linaro.org>
Date:   Mon Nov 16 14:58:19 2020 +0000

    arm: Auto-vectorization for MVE: vshl
    
    This patch enables MVE vshlq instructions for auto-vectorization.  A
    new MVE pattern is introduced that takes a vector of constants as
    second operand, all constants being equal.
    
    The existing mve_vshlq_n_<supf><mode> is kept, as it takes a single
    immediate as second operand, and is used by arm_mve.h.
    
    The vashl<mode>3 expander is added to vec-common.md.
    
    2020-11-12  Christophe Lyon  <christophe.lyon@linaro.org>
    
            gcc/
            * config/arm/mve.md (mve_vshlq_n_<mode>_imm): New entry.
            * config/arm/neon.md (vashl<mode>3): Rename into vashl<mode>3_neon.
            * config/arm/vec-common.md (vasl<mode>3): New expander.
    
            gcc/testsuite/
            * gcc.target/arm/simd/mve-vshl.c: Add tests for vshl.

Diff:
---
 gcc/config/arm/mve.md                        | 19 ++++++
 gcc/config/arm/neon.md                       |  2 +-
 gcc/config/arm/vec-common.md                 |  7 ++
 gcc/testsuite/gcc.target/arm/simd/mve-vshl.c | 96 ++++++++++++++++++++++++++++
 4 files changed, 123 insertions(+), 1 deletion(-)

diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index a5f5d7527f4..ce822586c4e 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -1924,6 +1924,7 @@
 ;;
 ;; [vshlq_n_u, vshlq_n_s])
 ;;
+;; Version that takes an immediate as operand 2.
 (define_insn "mve_vshlq_n_<supf><mode>"
   [
    (set (match_operand:MVE_2 0 "s_register_operand" "=w")
@@ -1936,6 +1937,24 @@
   [(set_attr "type" "mve_move")
 ])
 
+;; Version with a vector of immediates as operand 2.
+;; We only emit signed ('s') versions, since it makes no difference.
+(define_insn "mve_vshlq_n_<mode>_imm"
+  [
+   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
+	(ashift:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w")
+		      (match_operand:MVE_2 2 "imm_for_neon_lshift_operand" "i")))
+  ]
+  "TARGET_HAVE_MVE"
+{
+  return neon_output_shift_immediate ("vshl", 's', &operands[2],
+				     <MODE>mode,
+				     VALID_NEON_QREG_MODE (<MODE>mode),
+				     true);
+}
+  [(set_attr "type" "mve_move")
+])
+
 ;;
 ;; [vshlq_r_s, vshlq_r_u])
 ;;
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index e1263b00b39..cb7646ea752 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -870,7 +870,7 @@
 ; generic vectorizer code.  It ends up creating a V2DI constructor with
 ; SImode elements.
 
-(define_insn "vashl<mode>3"
+(define_insn "vashl<mode>3_neon"
   [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
 	(ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
 		      (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dm")))]
diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md
index 99ea1cc0b06..86496d06d7d 100644
--- a/gcc/config/arm/vec-common.md
+++ b/gcc/config/arm/vec-common.md
@@ -193,3 +193,10 @@
 		 (match_operand:VDQ 2 "s_register_operand" "")))]
   "TARGET_NEON || TARGET_HAVE_MVE"
 )
+
+(define_expand "vashl<mode>3"
+  [(set (match_operand:VDQIW 0 "s_register_operand" "")
+	(ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
+		      (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "")))]
+  "TARGET_NEON || TARGET_HAVE_MVE"
+)
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vshl.c b/gcc/testsuite/gcc.target/arm/simd/mve-vshl.c
new file mode 100644
index 00000000000..87865b77270
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/mve-vshl.c
@@ -0,0 +1,96 @@
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O3" } */
+
+#include <stdint.h>
+
+/* 64-bit vectors.  */
+void test_vshl_i32x2 (int32_t * __restrict__ dest, int32_t * a) {
+  int i;
+  for (i=0; i<2; i++) {
+    dest[i] = a[i] << 2;
+  }
+}
+
+void test_vshl_u32x2 (uint32_t * __restrict__ dest, uint32_t * a) {
+  int i;
+  for (i=0; i<2; i++) {
+    dest[i] = a[i] << 3;
+  }
+}
+
+void test_vshl_i16x4 (int16_t * __restrict__ dest, int16_t * a) {
+  int i;
+  for (i=0; i<4; i++) {
+    dest[i] = a[i] << 4;
+  }
+}
+
+void test_vshl_u16x4 (uint16_t * __restrict__ dest, uint16_t * a) {
+  int i;
+  for (i=0; i<4; i++) {
+    dest[i] = a[i] << 5;
+  }
+}
+
+void test_vshl_i8x8 (int8_t * __restrict__ dest, int8_t * a) {
+  int i;
+  for (i=0; i<8; i++) {
+    dest[i] = a[i] << 6;
+  }
+}
+
+void test_vshl_u8x8 (uint8_t * __restrict__ dest, uint8_t * a) {
+  int i;
+  for (i=0; i<8; i++) {
+    dest[i] = a[i] << 7;
+  }
+}
+/* 128-bit vectors.  */
+void test_vshl_i32x4 (int32_t * __restrict__ dest, int32_t * a) {
+  int i;
+  for (i=0; i<4; i++) {
+    dest[i] = a[i] << 2;
+  }
+}
+
+void test_vshl_u32x4 (uint32_t * __restrict__ dest, uint32_t * a) {
+  int i;
+  for (i=0; i<4; i++) {
+    dest[i] = a[i] << 3;
+  }
+}
+
+void test_vshl_i16x8 (int16_t * __restrict__ dest, int16_t * a) {
+  int i;
+  for (i=0; i<8; i++) {
+    dest[i] = a[i] << 4;
+  }
+}
+
+void test_vshl_u16x8 (uint16_t * __restrict__ dest, uint16_t * a) {
+  int i;
+  for (i=0; i<8; i++) {
+    dest[i] = a[i] << 5;
+  }
+}
+
+void test_vshl_i8x16 (int8_t * __restrict__ dest, int8_t * a) {
+  int i;
+  for (i=0; i<16; i++) {
+    dest[i] = a[i] << 6;
+  }
+}
+
+void test_vshl_u8x16 (uint8_t * __restrict__ dest, uint8_t * a) {
+  int i;
+  for (i=0; i<16; i++) {
+    dest[i] = a[i] << 7;
+  }
+}
+
+/* MVE has only 128-bit vectors, so we can vectorize only half of the
+   functions above.  */
+/* We only emit vshl.s, which is equivalent to vshl.u anyway.  */
+/* { dg-final { scan-assembler-times {vshl.s[0-9]+\tq[0-9]+, q[0-9]+} 6 } } */


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [gcc(refs/users/clyon/heads/mve-autovec)] arm: Auto-vectorization for MVE: vshl
@ 2020-11-20 15:05 Christophe Lyon
  0 siblings, 0 replies; 5+ messages in thread
From: Christophe Lyon @ 2020-11-20 15:05 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:e648e77bf4a5b3211bbf8b5f09eb50bfd87bf990

commit e648e77bf4a5b3211bbf8b5f09eb50bfd87bf990
Author: Christophe Lyon <christophe.lyon@linaro.org>
Date:   Mon Nov 16 14:58:19 2020 +0000

    arm: Auto-vectorization for MVE: vshl
    
    This patch enables MVE vshl instructions for auto-vectorization.  A
    new MVE pattern is introduced that takes a vector of constants as
    second operand, all constants being equal.
    
    The existing mve_vshlq_n_<supf><mode> is kept, as it takes a single
    immediate as second operand, and is used by arm_mve.h.
    
    The vashl<mode>3 expander is added to vec-common.md.
    
    2020-11-12  Christophe Lyon  <christophe.lyon@linaro.org>
    
            gcc/
            * config/arm/mve.md (mve_vshlq_n_<mode>_imm): New entry.
            * config/arm/neon.md (vashl<mode>3): Rename into vashl<mode>3_neon.
            * config/arm/vec-common.md (vasl<mode>3): New expander.
    
            gcc/testsuite/
            * gcc.target/arm/simd/mve-vshl.c: Add tests for vshl.

Diff:
---
 gcc/config/arm/mve.md                        | 18 ++++++
 gcc/config/arm/neon.md                       |  2 +-
 gcc/config/arm/vec-common.md                 |  7 ++
 gcc/testsuite/gcc.target/arm/simd/mve-vshl.c | 96 ++++++++++++++++++++++++++++
 4 files changed, 122 insertions(+), 1 deletion(-)

diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 4c6cc093822..ecc399b01f7 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -1939,6 +1939,24 @@
   [(set_attr "type" "mve_move")
 ])
 
+;; Version with a vector of immediates as operand 2.
+;; We only emit signed ('s') versions, since it makes no difference.
+(define_insn "mve_vshlq_n_<mode>_imm"
+  [
+   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
+	(ashift:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w")
+		      (match_operand:MVE_2 2 "imm_for_neon_lshift_operand" "i")))
+  ]
+  "TARGET_HAVE_MVE"
+{
+  return neon_output_shift_immediate ("vshl", 's', &operands[2],
+				     <MODE>mode,
+				     VALID_NEON_QREG_MODE (<MODE>mode),
+				     true);
+}
+  [(set_attr "type" "mve_move")
+])
+
 ;;
 ;; [vshlq_r_s, vshlq_r_u])
 ;;
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index e1263b00b39..cb7646ea752 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -870,7 +870,7 @@
 ; generic vectorizer code.  It ends up creating a V2DI constructor with
 ; SImode elements.
 
-(define_insn "vashl<mode>3"
+(define_insn "vashl<mode>3_neon"
   [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
 	(ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
 		      (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dm")))]
diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md
index 99ea1cc0b06..86496d06d7d 100644
--- a/gcc/config/arm/vec-common.md
+++ b/gcc/config/arm/vec-common.md
@@ -193,3 +193,10 @@
 		 (match_operand:VDQ 2 "s_register_operand" "")))]
   "TARGET_NEON || TARGET_HAVE_MVE"
 )
+
+(define_expand "vashl<mode>3"
+  [(set (match_operand:VDQIW 0 "s_register_operand" "")
+	(ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
+		      (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "")))]
+  "TARGET_NEON || TARGET_HAVE_MVE"
+)
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vshl.c b/gcc/testsuite/gcc.target/arm/simd/mve-vshl.c
new file mode 100644
index 00000000000..87865b77270
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/mve-vshl.c
@@ -0,0 +1,96 @@
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O3" } */
+
+#include <stdint.h>
+
+/* 64-bit vectors.  */
+void test_vshl_i32x2 (int32_t * __restrict__ dest, int32_t * a) {
+  int i;
+  for (i=0; i<2; i++) {
+    dest[i] = a[i] << 2;
+  }
+}
+
+void test_vshl_u32x2 (uint32_t * __restrict__ dest, uint32_t * a) {
+  int i;
+  for (i=0; i<2; i++) {
+    dest[i] = a[i] << 3;
+  }
+}
+
+void test_vshl_i16x4 (int16_t * __restrict__ dest, int16_t * a) {
+  int i;
+  for (i=0; i<4; i++) {
+    dest[i] = a[i] << 4;
+  }
+}
+
+void test_vshl_u16x4 (uint16_t * __restrict__ dest, uint16_t * a) {
+  int i;
+  for (i=0; i<4; i++) {
+    dest[i] = a[i] << 5;
+  }
+}
+
+void test_vshl_i8x8 (int8_t * __restrict__ dest, int8_t * a) {
+  int i;
+  for (i=0; i<8; i++) {
+    dest[i] = a[i] << 6;
+  }
+}
+
+void test_vshl_u8x8 (uint8_t * __restrict__ dest, uint8_t * a) {
+  int i;
+  for (i=0; i<8; i++) {
+    dest[i] = a[i] << 7;
+  }
+}
+/* 128-bit vectors.  */
+void test_vshl_i32x4 (int32_t * __restrict__ dest, int32_t * a) {
+  int i;
+  for (i=0; i<4; i++) {
+    dest[i] = a[i] << 2;
+  }
+}
+
+void test_vshl_u32x4 (uint32_t * __restrict__ dest, uint32_t * a) {
+  int i;
+  for (i=0; i<4; i++) {
+    dest[i] = a[i] << 3;
+  }
+}
+
+void test_vshl_i16x8 (int16_t * __restrict__ dest, int16_t * a) {
+  int i;
+  for (i=0; i<8; i++) {
+    dest[i] = a[i] << 4;
+  }
+}
+
+void test_vshl_u16x8 (uint16_t * __restrict__ dest, uint16_t * a) {
+  int i;
+  for (i=0; i<8; i++) {
+    dest[i] = a[i] << 5;
+  }
+}
+
+void test_vshl_i8x16 (int8_t * __restrict__ dest, int8_t * a) {
+  int i;
+  for (i=0; i<16; i++) {
+    dest[i] = a[i] << 6;
+  }
+}
+
+void test_vshl_u8x16 (uint8_t * __restrict__ dest, uint8_t * a) {
+  int i;
+  for (i=0; i<16; i++) {
+    dest[i] = a[i] << 7;
+  }
+}
+
+/* MVE has only 128-bit vectors, so we can vectorize only half of the
+   functions above.  */
+/* We only emit vshl.s, which is equivalent to vshl.u anyway.  */
+/* { dg-final { scan-assembler-times {vshl.s[0-9]+\tq[0-9]+, q[0-9]+} 6 } } */


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [gcc(refs/users/clyon/heads/mve-autovec)] arm: Auto-vectorization for MVE: vshl
@ 2020-11-16 15:04 Christophe Lyon
  0 siblings, 0 replies; 5+ messages in thread
From: Christophe Lyon @ 2020-11-16 15:04 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:9536498e0f2aeba5761ff4964c1851169023a35a

commit 9536498e0f2aeba5761ff4964c1851169023a35a
Author: Christophe Lyon <christophe.lyon@linaro.org>
Date:   Mon Nov 16 14:58:19 2020 +0000

    arm: Auto-vectorization for MVE: vshl
    
    This patch enables MVE vshl instructions for auto-vectorization.  New
    MVE patterns are introduced that take a vector of constants as second
    operand, all constants being equal.
    
    The existing mve_vshlq_n_<supf><mode> is kept, as it takes a single
    immediate as second operand, and is used by arm_mve.h
    
    The vashl<mode>3 expander is added to vec-common.md.
    
    2020-11-12  Christophe Lyon  <christophe.lyon@linaro.org>
    
            gcc/
            * config/arm/mve.md (mve_vshlq_n_s<mode>_vec): New entry.
            (mve_vshlq_n_u<mode>_vec): Likewise.
            * config/arm/neon.md (vashl<mode>3): Renamed into vashl<mode>3_neon.
            * config/arm/vec-common.md (vasl<mode>3): New expander.
    
            gcc/testsuite/
            * gcc.target/arm/simd/mve-vshl.c: Add tests for vshl.

Diff:
---
 gcc/config/arm/mve.md                        | 32 ++++++++++++++++++
 gcc/config/arm/neon.md                       |  2 +-
 gcc/config/arm/vec-common.md                 |  7 ++++
 gcc/testsuite/gcc.target/arm/simd/mve-vshl.c | 50 ++++++++++++++++++++++++++++
 4 files changed, 90 insertions(+), 1 deletion(-)

diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 4c6cc093822..1f8483d18d6 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -1939,6 +1939,38 @@
   [(set_attr "type" "mve_move")
 ])
 
+;; Versions with a vector of immediates as operand 2
+(define_insn "mve_vshlq_n_s<mode>_vec"
+  [
+   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
+	(ashift:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w")
+		      (match_operand:MVE_2 2 "imm_for_neon_lshift_operand" "i")))
+  ]
+  "TARGET_HAVE_MVE"
+{
+  return neon_output_shift_immediate ("vshl", 'i', &operands[2],
+				     <MODE>mode,
+				     VALID_NEON_QREG_MODE (<MODE>mode),
+				     true);
+}
+  [(set_attr "type" "mve_move")
+])
+(define_insn "mve_vshlq_n_u<mode>_vec"
+  [
+   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
+	(ashift:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w")
+		      (match_operand:MVE_2 2 "imm_for_neon_lshift_operand" "i")))
+  ]
+  "TARGET_HAVE_MVE"
+{
+  return neon_output_shift_immediate ("vshl", 'i', &operands[2],
+				     <MODE>mode,
+				     VALID_NEON_QREG_MODE (<MODE>mode),
+				     true);
+}
+  [(set_attr "type" "mve_move")
+])
+
 ;;
 ;; [vshlq_r_s, vshlq_r_u])
 ;;
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index e1263b00b39..cb7646ea752 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -870,7 +870,7 @@
 ; generic vectorizer code.  It ends up creating a V2DI constructor with
 ; SImode elements.
 
-(define_insn "vashl<mode>3"
+(define_insn "vashl<mode>3_neon"
   [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
 	(ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
 		      (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dm")))]
diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md
index 99ea1cc0b06..86496d06d7d 100644
--- a/gcc/config/arm/vec-common.md
+++ b/gcc/config/arm/vec-common.md
@@ -193,3 +193,10 @@
 		 (match_operand:VDQ 2 "s_register_operand" "")))]
   "TARGET_NEON || TARGET_HAVE_MVE"
 )
+
+(define_expand "vashl<mode>3"
+  [(set (match_operand:VDQIW 0 "s_register_operand" "")
+	(ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
+		      (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "")))]
+  "TARGET_NEON || TARGET_HAVE_MVE"
+)
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vshl.c b/gcc/testsuite/gcc.target/arm/simd/mve-vshl.c
new file mode 100644
index 00000000000..63893010e7e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/mve-vshl.c
@@ -0,0 +1,50 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O3" } */
+
+#include <stdint.h>
+
+void test_vshl_i32 (int32_t * __restrict__ dest, int32_t * a) {
+  int i;
+  for (i=0; i<4; i++) {
+    dest[i] = a[i] << 2;
+  }
+}
+
+void test_vshl_i32_u (uint32_t * __restrict__ dest, uint32_t * a) {
+  int i;
+  for (i=0; i<4; i++) {
+    dest[i] = a[i] << 3;
+  }
+}
+
+void test_vshl_i16 (int16_t * __restrict__ dest, int16_t * a) {
+  int i;
+  for (i=0; i<8; i++) {
+    dest[i] = a[i] << 4;
+  }
+}
+
+void test_vshl_i16_u (uint16_t * __restrict__ dest, uint16_t * a) {
+  int i;
+  for (i=0; i<8; i++) {
+    dest[i] = a[i] << 5;
+  }
+}
+
+void test_vshl_i8 (int8_t * __restrict__ dest, int8_t * a) {
+  int i;
+  for (i=0; i<16; i++) {
+    dest[i] = a[i] << 6;
+  }
+}
+
+void test_vshl_i8_u (uint8_t * __restrict__ dest, uint8_t * a) {
+  int i;
+  for (i=0; i<16; i++) {
+    dest[i] = a[i] << 7;
+  }
+}
+
+/* { dg-final { scan-assembler-times {vshl.i[0-9]+\tq[0-9]+, q[0-9]+} 6 } } */


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2020-12-16 13:48 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-12-16 13:48 [gcc(refs/users/clyon/heads/mve-autovec)] arm: Auto-vectorization for MVE: vshl Christophe Lyon
  -- strict thread matches above, loose matches on Subject: below --
2020-11-24 22:31 Christophe Lyon
2020-11-23 10:42 Christophe Lyon
2020-11-20 15:05 Christophe Lyon
2020-11-16 15:04 Christophe Lyon

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).