[PATCH v3 1/4] arm: Auto-vectorization for MVE: veor

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH v3 1/4] arm: Auto-vectorization for MVE: veor
@ 2020-12-11 18:37 Christophe Lyon
  2020-12-11 18:37 ` [PATCH v3 2/4] arm: Auto-vectorization for MVE: vbic Christophe Lyon
                   ` (3 more replies)
  0 siblings, 4 replies; 8+ messages in thread
From: Christophe Lyon @ 2020-12-11 18:37 UTC (permalink / raw)
  To: gcc-patches

This patch enables MVE veorq instructions for auto-vectorization.  MVE
veorq insns in mve.md are modified to use xor instead of unspec
expression to support xor<mode>3.  The xor<mode>3 expander is added to
vec-common.md

2020-12-11  Christophe Lyon  <christophe.lyon@linaro.org>

	gcc/
	* config/arm/iterators.md (supf): Remove VEORQ_S and VEORQ_U.
	(VEORQ): Remove.
	* config/arm/mve.md (mve_veorq_u<mode>): New entry for veor
	instruction using expression xor.
	(mve_veorq_s<mode>): New expander.
	(mve_veorq_f<mode>): Use 'xor' code instead of unspec.
	* config/arm/neon.md (xor<mode>3): Renamed into xor<mode>3_neon.
	* config/arm/unspecs.md (VEORQ_S, VEORQ_U, VEORQ_F): Remove.
	* config/arm/vec-common.md (xor<mode>3): New expander.

	gcc/testsuite/
	* gcc.target/arm/simd/mve-veor.c: Add tests for veor.
---
 gcc/config/arm/iterators.md                  |  3 +-
 gcc/config/arm/mve.md                        | 22 ++++++----
 gcc/config/arm/neon.md                       |  2 +-
 gcc/config/arm/unspecs.md                    |  3 --
 gcc/config/arm/vec-common.md                 |  7 ++++
 gcc/testsuite/gcc.target/arm/simd/mve-veor.c | 61 ++++++++++++++++++++++++++++
 6 files changed, 84 insertions(+), 14 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/arm/simd/mve-veor.c

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 5fcb7af..0195275 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -1237,7 +1237,7 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
 		       (VCADDQ_ROT270_U "u") (VCADDQ_ROT90_S "s")
 		       (VCMPEQQ_S "s") (VCMPEQQ_U "u") (VCADDQ_ROT90_U "u")
 		       (VCMPEQQ_N_S "s") (VCMPEQQ_N_U "u") (VCMPNEQ_N_S "s")
-		       (VCMPNEQ_N_U "u") (VEORQ_S "s") (VEORQ_U "u")
+		       (VCMPNEQ_N_U "u")
 		       (VHADDQ_N_S "s") (VHADDQ_N_U "u") (VHADDQ_S "s")
 		       (VHADDQ_U "u") (VHSUBQ_N_S "s")	(VHSUBQ_N_U "u")
 		       (VHSUBQ_S "s") (VMAXQ_S "s") (VMAXQ_U "u") (VHSUBQ_U "u")
@@ -1507,7 +1507,6 @@ (define_int_iterator VCADDQ_ROT90 [VCADDQ_ROT90_U VCADDQ_ROT90_S])
 (define_int_iterator VCMPEQQ [VCMPEQQ_U VCMPEQQ_S])
 (define_int_iterator VCMPEQQ_N [VCMPEQQ_N_S VCMPEQQ_N_U])
 (define_int_iterator VCMPNEQ_N [VCMPNEQ_N_U VCMPNEQ_N_S])
-(define_int_iterator VEORQ [VEORQ_U VEORQ_S])
 (define_int_iterator VHADDQ [VHADDQ_S VHADDQ_U])
 (define_int_iterator VHADDQ_N [VHADDQ_N_U VHADDQ_N_S])
 (define_int_iterator VHSUBQ [VHSUBQ_S VHSUBQ_U])
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 4b2e46a..10512ad 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -1207,17 +1207,24 @@ (define_insn "mve_vcmpneq_n_<supf><mode>"
 ;;
 ;; [veorq_u, veorq_s])
 ;;
-(define_insn "mve_veorq_<supf><mode>"
+(define_insn "mve_veorq_u<mode>"
   [
    (set (match_operand:MVE_2 0 "s_register_operand" "=w")
-	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")
-		       (match_operand:MVE_2 2 "s_register_operand" "w")]
-	 VEORQ))
+	(xor:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w")
+		   (match_operand:MVE_2 2 "s_register_operand" "w")))
   ]
   "TARGET_HAVE_MVE"
-  "veor %q0, %q1, %q2"
+  "veor\t%q0, %q1, %q2"
   [(set_attr "type" "mve_move")
 ])
+(define_expand "mve_veorq_s<mode>"
+  [
+   (set (match_operand:MVE_2 0 "s_register_operand")
+	(xor:MVE_2 (match_operand:MVE_2 1 "s_register_operand")
+		   (match_operand:MVE_2 2 "s_register_operand")))
+  ]
+  "TARGET_HAVE_MVE"
+)
 
 ;;
 ;; [vhaddq_n_u, vhaddq_n_s])
@@ -2404,9 +2411,8 @@ (define_insn "mve_vcvttq_f16_f32v8hf"
 (define_insn "mve_veorq_f<mode>"
   [
    (set (match_operand:MVE_0 0 "s_register_operand" "=w")
-	(unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
-		       (match_operand:MVE_0 2 "s_register_operand" "w")]
-	 VEORQ_F))
+	(xor:MVE_0 (match_operand:MVE_0 1 "s_register_operand" "w")
+		   (match_operand:MVE_0 2 "s_register_operand" "w")))
   ]
   "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
   "veor %q0, %q1, %q2"
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 669c34d..e1263b0 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -747,7 +747,7 @@ (define_insn "bic<mode>3_neon"
   [(set_attr "type" "neon_logic<q>")]
 )
 
-(define_insn "xor<mode>3"
+(define_insn "xor<mode>3_neon"
   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
 	(xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
 		 (match_operand:VDQ 2 "s_register_operand" "w")))]
diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
index c2076c9..fe240e8 100644
--- a/gcc/config/arm/unspecs.md
+++ b/gcc/config/arm/unspecs.md
@@ -608,7 +608,6 @@ (define_c_enum "unspec" [
   VCMPEQQ_S
   VCMPEQQ_N_S
   VCMPNEQ_N_S
-  VEORQ_S
   VHADDQ_S
   VHADDQ_N_S
   VHSUBQ_S
@@ -653,7 +652,6 @@ (define_c_enum "unspec" [
   VCMPEQQ_U
   VCMPEQQ_N_U
   VCMPNEQ_N_U
-  VEORQ_U
   VHADDQ_U
   VHADDQ_N_U
   VHSUBQ_U
@@ -736,7 +734,6 @@ (define_c_enum "unspec" [
   VCMULQ_ROT180_F
   VCMULQ_ROT270_F
   VCMULQ_ROT90_F
-  VEORQ_F
   VMAXNMAQ_F
   VMAXNMAVQ_F
   VMAXNMQ_F
diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md
index 8d9c89c..030ed82 100644
--- a/gcc/config/arm/vec-common.md
+++ b/gcc/config/arm/vec-common.md
@@ -186,3 +186,10 @@ (define_expand "ior<mode>3"
 		 (match_operand:VDQ 2 "neon_logic_op2" "")))]
   "ARM_HAVE_<MODE>_ARITH"
 )
+
+(define_expand "xor<mode>3"
+  [(set (match_operand:VDQ 0 "s_register_operand" "")
+	(xor:VDQ (match_operand:VDQ 1 "s_register_operand" "")
+		 (match_operand:VDQ 2 "s_register_operand" "")))]
+  "ARM_HAVE_<MODE>_ARITH"
+)
diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-veor.c b/gcc/testsuite/gcc.target/arm/simd/mve-veor.c
new file mode 100644
index 0000000..321961f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/mve-veor.c
@@ -0,0 +1,61 @@
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O3" } */
+
+#include <stdint.h>
+
+#define FUNC(SIGN, TYPE, BITS, NB, OP, NAME)				\
+  void test_ ## NAME ##_ ## SIGN ## BITS ## x ## NB (TYPE##BITS##_t * __restrict__ dest, TYPE##BITS##_t *a, TYPE##BITS##_t *b) { \
+    int i;								\
+    for (i=0; i<NB; i++) {						\
+      dest[i] = a[i] OP b[i];						\
+    }									\
+}
+
+#define FUNC_IMM(SIGN, TYPE, BITS, NB, OP, NAME)				\
+  void test_ ## NAME ##_ ## SIGN ## BITS ## x ## NB (TYPE##BITS##_t * __restrict__ dest, TYPE##BITS##_t *a) { \
+    int i;								\
+    for (i=0; i<NB; i++) {						\
+      dest[i] = a[i] OP 1;						\
+    }									\
+}
+
+/* 64-bit vectors.  */
+FUNC(s, int, 32, 2, ^, veor)
+FUNC(u, uint, 32, 2, ^, veor)
+FUNC(s, int, 16, 4, ^, veor)
+FUNC(u, uint, 16, 4, ^, veor)
+FUNC(s, int, 8, 8, ^, veor)
+FUNC(u, uint, 8, 8, ^, veor)
+
+/* 128-bit vectors.  */
+FUNC(s, int, 32, 4, ^, veor)
+FUNC(u, uint, 32, 4, ^, veor)
+FUNC(s, int, 16, 8, ^, veor)
+FUNC(u, uint, 16, 8, ^, veor)
+FUNC(s, int, 8, 16, ^, veor)
+FUNC(u, uint, 8, 16, ^, veor)
+
+/* 64-bit vectors.  */
+FUNC_IMM(s, int, 32, 2, ^, veorimm)
+FUNC_IMM(u, uint, 32, 2, ^, veorimm)
+FUNC_IMM(s, int, 16, 4, ^, veorimm)
+FUNC_IMM(u, uint, 16, 4, ^, veorimm)
+FUNC_IMM(s, int, 8, 8, ^, veorimm)
+FUNC_IMM(u, uint, 8, 8, ^, veorimm)
+
+/* 128-bit vectors.  */
+FUNC_IMM(s, int, 32, 4, ^, veorimm)
+FUNC_IMM(u, uint, 32, 4, ^, veorimm)
+FUNC_IMM(s, int, 16, 8, ^, veorimm)
+FUNC_IMM(u, uint, 16, 8, ^, veorimm)
+FUNC_IMM(s, int, 8, 16, ^, veorimm)
+FUNC_IMM(u, uint, 8, 16, ^, veorimm)
+
+/* MVE has only 128-bit vectors, so we can vectorize only half of the
+   functions above.  */
+/* Although float16 and float32 types are supported at assembly level,
+   we cannot test them with the '^' operator, so we check only the
+   integer variants.  */
+/* { dg-final { scan-assembler-times {veor\tq[0-9]+, q[0-9]+, q[0-9]+} 12 } } */
-- 
2.7.4


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v3 2/4] arm: Auto-vectorization for MVE: vbic
  2020-12-11 18:37 [PATCH v3 1/4] arm: Auto-vectorization for MVE: veor Christophe Lyon
@ 2020-12-11 18:37 ` Christophe Lyon
  2020-12-14  9:48   ` Kyrylo Tkachov
  2020-12-11 18:37 ` [PATCH v3 3/4] arm: Auto-vectorization for MVE: vmvn Christophe Lyon
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 8+ messages in thread
From: Christophe Lyon @ 2020-12-11 18:37 UTC (permalink / raw)
  To: gcc-patches

This patch enables MVE vbic instructions for auto-vectorization.  MVE
vbicq insns in mve.md are modified to use 'and not' instead of unspec
expression.

2020-12-11  Christophe Lyon  <christophe.lyon@linaro.org>

	gcc/
	* config/arm/iterators.md (supf): Remove VBICQ_S and VBICQ_U.
	(VBICQ): Remove.
	* config/arm/mve.md (mve_vbicq_u<mode>): New entry for vbic
	instruction using expression and not.
	(mve_vbicq_s<mode>): New expander.
	(mve_vbicq_f<mode>): Replace use of unspec by 'and not'.
	* config/arm/unspecs.md (VBICQ_S, VBICQ_U, VBICQ_F): Remove.

	gcc/testsuite/
	* gcc.target/arm/simd/mve-vbic.c: Add tests for vbic.
---
 gcc/config/arm/iterators.md                  |  3 +-
 gcc/config/arm/mve.md                        | 23 ++++++----
 gcc/config/arm/unspecs.md                    |  3 --
 gcc/testsuite/gcc.target/arm/simd/mve-vbic.c | 65 ++++++++++++++++++++++++++++
 4 files changed, 81 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/arm/simd/mve-vbic.c

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 0195275..26351e0 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -1232,7 +1232,7 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
 		       (VADDLVQ_P_U "u") (VCMPNEQ_U "u") (VCMPNEQ_S "s")
 		       (VABDQ_M_S "s") (VABDQ_M_U "u") (VABDQ_S "s")
 		       (VABDQ_U "u") (VADDQ_N_S "s") (VADDQ_N_U "u")
-		       (VADDVQ_P_S "s")	(VADDVQ_P_U "u") (VBICQ_S "s") (VBICQ_U "u")
+		       (VADDVQ_P_S "s")	(VADDVQ_P_U "u")
 		       (VBRSRQ_N_S "s") (VBRSRQ_N_U "u") (VCADDQ_ROT270_S "s")
 		       (VCADDQ_ROT270_U "u") (VCADDQ_ROT90_S "s")
 		       (VCMPEQQ_S "s") (VCMPEQQ_U "u") (VCADDQ_ROT90_U "u")
@@ -1500,7 +1500,6 @@ (define_int_iterator VABDQ [VABDQ_S VABDQ_U])
 (define_int_iterator VADDQ_N [VADDQ_N_S VADDQ_N_U])
 (define_int_iterator VADDVAQ [VADDVAQ_S VADDVAQ_U])
 (define_int_iterator VADDVQ_P [VADDVQ_P_U VADDVQ_P_S])
-(define_int_iterator VBICQ [VBICQ_S VBICQ_U])
 (define_int_iterator VBRSRQ_N [VBRSRQ_N_U VBRSRQ_N_S])
 (define_int_iterator VCADDQ_ROT270 [VCADDQ_ROT270_S VCADDQ_ROT270_U])
 (define_int_iterator VCADDQ_ROT90 [VCADDQ_ROT90_U VCADDQ_ROT90_S])
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 10512ad..0505537 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -922,18 +922,26 @@ (define_expand "mve_vandq_s<mode>"
 ;;
 ;; [vbicq_s, vbicq_u])
 ;;
-(define_insn "mve_vbicq_<supf><mode>"
+(define_insn "mve_vbicq_u<mode>"
   [
    (set (match_operand:MVE_2 0 "s_register_operand" "=w")
-	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")
-		       (match_operand:MVE_2 2 "s_register_operand" "w")]
-	 VBICQ))
+	(and:MVE_2 (not:MVE_2 (match_operand:MVE_2 2 "s_register_operand" "w"))
+			      (match_operand:MVE_2 1 "s_register_operand" "w")))
   ]
   "TARGET_HAVE_MVE"
-  "vbic %q0, %q1, %q2"
+  "vbic\t%q0, %q1, %q2"
   [(set_attr "type" "mve_move")
 ])
 
+(define_expand "mve_vbicq_s<mode>"
+  [
+   (set (match_operand:MVE_2 0 "s_register_operand")
+	(and:MVE_2 (not:MVE_2 (match_operand:MVE_2 2 "s_register_operand"))
+		   (match_operand:MVE_2 1 "s_register_operand")))
+  ]
+  "TARGET_HAVE_MVE"
+)
+
 ;;
 ;; [vbrsrq_n_u, vbrsrq_n_s])
 ;;
@@ -2066,9 +2074,8 @@ (define_insn "mve_vandq_f<mode>"
 (define_insn "mve_vbicq_f<mode>"
   [
    (set (match_operand:MVE_0 0 "s_register_operand" "=w")
-	(unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
-		       (match_operand:MVE_0 2 "s_register_operand" "w")]
-	 VBICQ_F))
+	(and:MVE_0 (not:MVE_0 (match_operand:MVE_0 1 "s_register_operand" "w"))
+			      (match_operand:MVE_0 2 "s_register_operand" "w")))
   ]
   "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
   "vbic %q0, %q1, %q2"
diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
index fe240e8..8a4389a 100644
--- a/gcc/config/arm/unspecs.md
+++ b/gcc/config/arm/unspecs.md
@@ -601,7 +601,6 @@ (define_c_enum "unspec" [
   VADDQ_N_S
   VADDVAQ_S
   VADDVQ_P_S
-  VBICQ_S
   VBRSRQ_N_S
   VCADDQ_ROT270_S
   VCADDQ_ROT90_S
@@ -645,7 +644,6 @@ (define_c_enum "unspec" [
   VADDQ_N_U
   VADDVAQ_U
   VADDVQ_P_U
-  VBICQ_U
   VBRSRQ_N_U
   VCADDQ_ROT270_U
   VCADDQ_ROT90_U
@@ -715,7 +713,6 @@ (define_c_enum "unspec" [
   VABDQ_M_U
   VABDQ_F
   VADDQ_N_F
-  VBICQ_F
   VCADDQ_ROT270_F
   VCADDQ_ROT90_F
   VCMPEQQ_F
diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vbic.c b/gcc/testsuite/gcc.target/arm/simd/mve-vbic.c
new file mode 100644
index 0000000..c9a64c6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/mve-vbic.c
@@ -0,0 +1,65 @@
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O3" } */
+
+#include <stdint.h>
+
+#define FUNC(SIGN, TYPE, BITS, NB, OP, NAME)				\
+  void test_ ## NAME ##_ ## SIGN ## BITS ## x ## NB (TYPE##BITS##_t * __restrict__ dest, TYPE##BITS##_t *a, TYPE##BITS##_t *b) { \
+    int i;								\
+    for (i=0; i<NB; i++) {						\
+      dest[i] = a[i] OP b[i];						\
+    }									\
+}
+
+#define FUNC_IMM(SIGN, TYPE, BITS, NB, OP, NAME)				\
+  void test_ ## NAME ##_ ## SIGN ## BITS ## x ## NB (TYPE##BITS##_t * __restrict__ dest, TYPE##BITS##_t *a) { \
+    int i;								\
+    for (i=0; i<NB; i++) {						\
+      dest[i] = a[i] OP 1;						\
+    }									\
+}
+
+/* 64-bit vectors.  */
+FUNC(s, int, 32, 2, & ~ , vbic)
+FUNC(u, uint, 32, 2, & ~ , vbic)
+FUNC(s, int, 16, 4, & ~ , vbic)
+FUNC(u, uint, 16, 4, & ~ , vbic)
+FUNC(s, int, 8, 8, & ~ , vbic)
+FUNC(u, uint, 8, 8, & ~ , vbic)
+
+/* 128-bit vectors.  */
+FUNC(s, int, 32, 4, & ~ , vbic)
+FUNC(u, uint, 32, 4, & ~ , vbic)
+FUNC(s, int, 16, 8, & ~ , vbic)
+FUNC(u, uint, 16, 8, & ~ , vbic)
+FUNC(s, int, 8, 16, & ~ , vbic)
+FUNC(u, uint, 8, 16, & ~ , vbic)
+
+/* 64-bit vectors.  */
+FUNC_IMM(s, int, 32, 2, & ~, vbicimm)
+FUNC_IMM(u, uint, 32, 2, & ~, vbicimm)
+FUNC_IMM(s, int, 16, 4, & ~, vbicimm)
+FUNC_IMM(u, uint, 16, 4, & ~, vbicimm)
+FUNC_IMM(s, int, 8, 8, & ~, vbicimm)
+FUNC_IMM(u, uint, 8, 8, & ~, vbicimm)
+
+/* 128-bit vectors.  */
+FUNC_IMM(s, int, 32, 4, & ~, vbicimm)
+FUNC_IMM(u, uint, 32, 4, & ~, vbicimm)
+FUNC_IMM(s, int, 16, 8, & ~, vbicimm)
+FUNC_IMM(u, uint, 16, 8, & ~, vbicimm)
+FUNC_IMM(s, int, 8, 16, & ~, vbicimm)
+FUNC_IMM(u, uint, 8, 16, & ~, vbicimm)
+
+/* MVE has only 128-bit vectors, so we can vectorize only half of the
+   functions above.  */
+/* We emit vand.i[16|32] qX, #XX for the first four versions of the
+   128-bit vector vbicimm tests.  */
+/* For some reason, we do not generate the immediate version for
+   int8x16 and uint8x16, we still use vldr to load the vector of
+   immediates.  */
+/* { dg-final { scan-assembler-times {vbic\tq[0-9]+, q[0-9]+, q[0-9]+} 6 } } */
+/* { dg-final { scan-assembler-times {vand.i[0-9]+\tq[0-9]+} 4 } } */
+/* { dg-final { scan-assembler-times {vand\tq[0-9]+, q[0-9]+, q[0-9]+} 2 } } */
-- 
2.7.4


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v3 3/4] arm: Auto-vectorization for MVE: vmvn
  2020-12-11 18:37 [PATCH v3 1/4] arm: Auto-vectorization for MVE: veor Christophe Lyon
  2020-12-11 18:37 ` [PATCH v3 2/4] arm: Auto-vectorization for MVE: vbic Christophe Lyon
@ 2020-12-11 18:37 ` Christophe Lyon
  2020-12-14  9:54   ` Kyrylo Tkachov
  2020-12-11 18:37 ` [PATCH v3 4/4] arm: Auto-vectorization for MVE: vneg Christophe Lyon
  2020-12-14  9:31 ` [PATCH v3 1/4] arm: Auto-vectorization for MVE: veor Kyrylo Tkachov
  3 siblings, 1 reply; 8+ messages in thread
From: Christophe Lyon @ 2020-12-11 18:37 UTC (permalink / raw)
  To: gcc-patches

This patch enables MVE vmvnq instructions for auto-vectorization.  MVE
vmvnq insns in mve.md are modified to use 'not' instead of unspec
expression to support one_cmpl<mode>2.  The one_cmpl<mode>2 expander
is added to vec-common.md.

2020-12-11  Christophe Lyon  <christophe.lyon@linaro.org>

	gcc/
	* config/arm/iterators.md (VDQNOTM2): New mode iterator.
	(supf): Remove VMVNQ_S and VMVNQ_U.
	(VMVNQ): Remove.
	* config/arm/mve.md (mve_vmvnq_u<mode>): New entry for vmvn
	instruction using expression not.
	(mve_vmvnq_s<mode>): New expander.
	* config/arm/neon.md (one_cmpl<mode>2): Renamed into
	one_cmpl<mode>2_neon.
	* config/arm/unspecs.md (VMVNQ_S, VMVNQ_U): Remove.
	* config/arm/vec-common.md (one_cmpl<mode>2): New expander.

	gcc/testsuite/
	* gcc.target/arm/simd/mve-vmvn.c: Add tests for vmvn.
---
 gcc/config/arm/iterators.md                  |  3 +--
 gcc/config/arm/mve.md                        | 14 +++++++----
 gcc/config/arm/neon.md                       |  4 ++--
 gcc/config/arm/unspecs.md                    |  2 --
 gcc/config/arm/vec-common.md                 |  6 +++++
 gcc/testsuite/gcc.target/arm/simd/mve-vmvn.c | 35 ++++++++++++++++++++++++++++
 6 files changed, 54 insertions(+), 10 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/arm/simd/mve-vmvn.c

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 26351e0..036a939 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -1216,7 +1216,7 @@ (define_int_attr mmla_sfx [(UNSPEC_MATMUL_S "s8") (UNSPEC_MATMUL_U "u8")
 (define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
 		       (VREV16Q_U "u") (VMVNQ_N_S "s") (VMVNQ_N_U "u")
 		       (VCVTAQ_U "u") (VCVTAQ_S "s") (VREV64Q_S "s")
-		       (VREV64Q_U "u") (VMVNQ_S "s") (VMVNQ_U "u")
+		       (VREV64Q_U "u")
 		       (VDUPQ_N_U "u") (VDUPQ_N_S"s") (VADDVQ_S "s")
 		       (VADDVQ_U "u") (VADDVQ_S "s") (VADDVQ_U "u")
 		       (VMOVLTQ_U "u") (VMOVLTQ_S "s") (VMOVLBQ_S "s")
@@ -1476,7 +1476,6 @@ (define_int_iterator VREV64Q [VREV64Q_S VREV64Q_U])
 (define_int_iterator VCVTQ_FROM_F [VCVTQ_FROM_F_S VCVTQ_FROM_F_U])
 (define_int_iterator VREV16Q [VREV16Q_U VREV16Q_S])
 (define_int_iterator VCVTAQ [VCVTAQ_U VCVTAQ_S])
-(define_int_iterator VMVNQ [VMVNQ_U VMVNQ_S])
 (define_int_iterator VDUPQ_N [VDUPQ_N_U VDUPQ_N_S])
 (define_int_iterator VCLZQ [VCLZQ_U VCLZQ_S])
 (define_int_iterator VADDVQ [VADDVQ_U VADDVQ_S])
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 0505537..86d7fc6 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -433,16 +433,22 @@ (define_insn "mve_vnegq_s<mode>"
 ;;
 ;; [vmvnq_u, vmvnq_s])
 ;;
-(define_insn "mve_vmvnq_<supf><mode>"
+(define_insn "mve_vmvnq_u<mode>"
   [
    (set (match_operand:MVE_2 0 "s_register_operand" "=w")
-	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")]
-	 VMVNQ))
+	(not:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w")))
   ]
   "TARGET_HAVE_MVE"
-  "vmvn %q0, %q1"
+  "vmvn\t%q0, %q1"
   [(set_attr "type" "mve_move")
 ])
+(define_expand "mve_vmvnq_s<mode>"
+  [
+   (set (match_operand:MVE_2 0 "s_register_operand")
+	(not:MVE_2 (match_operand:MVE_2 1 "s_register_operand")))
+  ]
+  "TARGET_HAVE_MVE"
+)
 
 ;;
 ;; [vdupq_n_u, vdupq_n_s])
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index e1263b0..f58d4f5 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -756,7 +756,7 @@ (define_insn "xor<mode>3_neon"
   [(set_attr "type" "neon_logic<q>")]
 )
 
-(define_insn "one_cmpl<mode>2"
+(define_insn "one_cmpl<mode>2_neon"
   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
         (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
   "TARGET_NEON"
@@ -3240,7 +3240,7 @@ (define_expand "neon_vmvn<mode>"
    (match_operand:VDQIW 1 "s_register_operand")]
   "TARGET_NEON"
 {
-  emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
+  emit_insn (gen_one_cmpl<mode>2_neon (operands[0], operands[1]));
   DONE;
 })
 
diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
index 8a4389a..e581645 100644
--- a/gcc/config/arm/unspecs.md
+++ b/gcc/config/arm/unspecs.md
@@ -550,8 +550,6 @@ (define_c_enum "unspec" [
   VREV64Q_U
   VQABSQ_S
   VNEGQ_S
-  VMVNQ_S
-  VMVNQ_U
   VDUPQ_N_U
   VDUPQ_N_S
   VCLZQ_U
diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md
index 030ed82..37ff518 100644
--- a/gcc/config/arm/vec-common.md
+++ b/gcc/config/arm/vec-common.md
@@ -193,3 +193,9 @@ (define_expand "xor<mode>3"
 		 (match_operand:VDQ 2 "s_register_operand" "")))]
   "ARM_HAVE_<MODE>_ARITH"
 )
+
+(define_expand "one_cmpl<mode>2"
+  [(set (match_operand:VDQ 0 "s_register_operand")
+	(not:VDQ (match_operand:VDQ 1 "s_register_operand")))]
+  "ARM_HAVE_<MODE>_ARITH"
+)
diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vmvn.c b/gcc/testsuite/gcc.target/arm/simd/mve-vmvn.c
new file mode 100644
index 0000000..73e897a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/mve-vmvn.c
@@ -0,0 +1,35 @@
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O3" } */
+
+#include <stdint.h>
+
+#define FUNC(SIGN, TYPE, BITS, NB, OP, NAME)				\
+  void test_ ## NAME ##_ ## SIGN ## BITS ## x ## NB (TYPE##BITS##_t * __restrict__ dest, TYPE##BITS##_t *a) { \
+    int i;								\
+    for (i=0; i<NB; i++) {						\
+      dest[i] = OP a[i];						\
+    }									\
+}
+
+/* vmnvq supports only 16-bit and 32-bit elements.  */
+/* 64-bit vectors.  */
+FUNC(s, int, 32, 2, ~, vmvn)
+FUNC(u, uint, 32, 2, ~, vmvn)
+FUNC(s, int, 16, 4, ~, vmvn)
+FUNC(u, uint, 16, 4, ~, vmvn)
+FUNC(s, int, 8, 8, ~, vmvn)
+FUNC(u, uint, 8, 8, ~, vmvn)
+
+/* 128-bit vectors.  */
+FUNC(s, int, 32, 4, ~, vmvn)
+FUNC(u, uint, 32, 4, ~, vmvn)
+FUNC(s, int, 16, 8, ~, vmvn)
+FUNC(u, uint, 16, 8, ~, vmvn)
+FUNC(s, int, 8, 16, ~, vmvn)
+FUNC(u, uint, 8, 16, ~, vmvn)
+
+/* MVE has only 128-bit vectors, so we can vectorize only half of the
+   functions above.  */
+/* { dg-final { scan-assembler-times {vmvn\tq[0-9]+, q[0-9]+} 6 } } */
-- 
2.7.4


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v3 4/4] arm: Auto-vectorization for MVE: vneg
  2020-12-11 18:37 [PATCH v3 1/4] arm: Auto-vectorization for MVE: veor Christophe Lyon
  2020-12-11 18:37 ` [PATCH v3 2/4] arm: Auto-vectorization for MVE: vbic Christophe Lyon
  2020-12-11 18:37 ` [PATCH v3 3/4] arm: Auto-vectorization for MVE: vmvn Christophe Lyon
@ 2020-12-11 18:37 ` Christophe Lyon
  2020-12-14 10:03   ` Kyrylo Tkachov
  2020-12-14  9:31 ` [PATCH v3 1/4] arm: Auto-vectorization for MVE: veor Kyrylo Tkachov
  3 siblings, 1 reply; 8+ messages in thread
From: Christophe Lyon @ 2020-12-11 18:37 UTC (permalink / raw)
  To: gcc-patches

This patch enables MVE vneg instructions for auto-vectorization.  MVE
vnegq insns in mve.md are modified to use 'neg' instead of unspec
expression.  The neg<mode>2 expander is added to vec-common.md.

Existing patterns in neon.md are prefixed with neon_.
It's not clear why we have different patterns for VDQW
and VH in neon.md, when WDQWH handles both, and patterns
with VDQ have provision for attributes for FP modes.

Another question is why <absneg_str><mode>2 always sets
neon_abs<q> type when it also handles neon_neq<q> cases.

2020-12-11  Christophe Lyon  <christophe.lyon@linaro.org>

	gcc/
	* config/arm/mve.md (mve_vnegq_f): Use 'neg' instead of unspec.
	(mve_vnegq_s): Likewise.
	* config/arm/neon.md (neg<mode>2): Rename into neon_neg<mode>2.
	(<absneg_str><mode>2): Rename into neon_<absneg_str><mode>2.
	(neon_v<absneg_str><mode>): Call gen_neon_<absneg_str><mode>2.
	(vashr<mode>3): Call gen_neon_neg<mode>2.
	(vlshr<mode>3): Call gen_neon_neg<mode>2.
	(neon_vneg<mode>): Call gen_neon_neg<mode>2.
	* config/arm/unspecs.md (VNEGQ_F, VNEGQ_S): Remove.
	* config/arm/vec-common.md (neg<mode>2): New expander.

	gcc/testsuite/
	* gcc.target/arm/simd/mve-vneg.c: Add tests for vneg.
---
 gcc/config/arm/mve.md                        |  6 ++--
 gcc/config/arm/neon.md                       | 12 +++----
 gcc/config/arm/unspecs.md                    |  2 --
 gcc/config/arm/vec-common.md                 |  6 ++++
 gcc/testsuite/gcc.target/arm/simd/mve-vneg.c | 49 ++++++++++++++++++++++++++++
 5 files changed, 63 insertions(+), 12 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/arm/simd/mve-vneg.c

diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 86d7fc6..b4c5a1e2 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -271,8 +271,7 @@ (define_insn "mve_vrev64q_f<mode>"
 (define_insn "mve_vnegq_f<mode>"
   [
    (set (match_operand:MVE_0 0 "s_register_operand" "=w")
-	(unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")]
-	 VNEGQ_F))
+	(neg:MVE_0 (match_operand:MVE_0 1 "s_register_operand" "w")))
   ]
   "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
   "vneg.f%#<V_sz_elem>  %q0, %q1"
@@ -422,8 +421,7 @@ (define_insn "mve_vqabsq_s<mode>"
 (define_insn "mve_vnegq_s<mode>"
   [
    (set (match_operand:MVE_2 0 "s_register_operand" "=w")
-	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")]
-	 VNEGQ_S))
+	(neg:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w")))
   ]
   "TARGET_HAVE_MVE"
   "vneg.s%#<V_sz_elem>  %q0, %q1"
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index f58d4f5..d2e92ba 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -775,7 +775,7 @@ (define_insn "abs<mode>2"
                     (const_string "neon_abs<q>")))]
 )
 
-(define_insn "neg<mode>2"
+(define_insn "neon_neg<mode>2"
   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
 	(neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
   "TARGET_NEON"
@@ -786,7 +786,7 @@ (define_insn "neg<mode>2"
                     (const_string "neon_neg<q>")))]
 )
 
-(define_insn "<absneg_str><mode>2"
+(define_insn "neon_<absneg_str><mode>2"
   [(set (match_operand:VH 0 "s_register_operand" "=w")
     (ABSNEG:VH (match_operand:VH 1 "s_register_operand" "w")))]
  "TARGET_NEON_FP16INST"
@@ -800,7 +800,7 @@ (define_expand "neon_v<absneg_str><mode>"
    (ABSNEG:VH (match_operand:VH 1 "s_register_operand")))]
  "TARGET_NEON_FP16INST"
 {
-  emit_insn (gen_<absneg_str><mode>2 (operands[0], operands[1]));
+  emit_insn (gen_neon_<absneg_str><mode>2 (operands[0], operands[1]));
   DONE;
 })
 
@@ -952,7 +952,7 @@ (define_expand "vashr<mode>3"
   if (s_register_operand (operands[2], <MODE>mode))
     {
       rtx neg = gen_reg_rtx (<MODE>mode);
-      emit_insn (gen_neg<mode>2 (neg, operands[2]));
+      emit_insn (gen_neon_neg<mode>2 (neg, operands[2]));
       emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
     }
   else
@@ -969,7 +969,7 @@ (define_expand "vlshr<mode>3"
   if (s_register_operand (operands[2], <MODE>mode))
     {
       rtx neg = gen_reg_rtx (<MODE>mode);
-      emit_insn (gen_neg<mode>2 (neg, operands[2]));
+      emit_insn (gen_neon_neg<mode>2 (neg, operands[2]));
       emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
     }
   else
@@ -2953,7 +2953,7 @@ (define_expand "neon_vneg<mode>"
    (match_operand:VDQW 1 "s_register_operand")]
   "TARGET_NEON"
 {
-  emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
+  emit_insn (gen_neon_neg<mode>2 (operands[0], operands[1]));
   DONE;
 })
 
diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
index e581645..ef64989 100644
--- a/gcc/config/arm/unspecs.md
+++ b/gcc/config/arm/unspecs.md
@@ -530,7 +530,6 @@ (define_c_enum "unspec" [
   VRNDMQ_F
   VRNDAQ_F
   VREV64Q_F
-  VNEGQ_F
   VDUPQ_N_F
   VABSQ_F
   VREV32Q_F
@@ -549,7 +548,6 @@ (define_c_enum "unspec" [
   VREV64Q_S
   VREV64Q_U
   VQABSQ_S
-  VNEGQ_S
   VDUPQ_N_U
   VDUPQ_N_S
   VCLZQ_U
diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md
index 37ff518..2d0932b 100644
--- a/gcc/config/arm/vec-common.md
+++ b/gcc/config/arm/vec-common.md
@@ -199,3 +199,9 @@ (define_expand "one_cmpl<mode>2"
 	(not:VDQ (match_operand:VDQ 1 "s_register_operand")))]
   "ARM_HAVE_<MODE>_ARITH"
 )
+
+(define_expand "neg<mode>2"
+  [(set (match_operand:VDQWH 0 "s_register_operand" "")
+	(neg:VDQWH (match_operand:VDQWH 1 "s_register_operand" "")))]
+  "ARM_HAVE_<MODE>_ARITH"
+)
diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vneg.c b/gcc/testsuite/gcc.target/arm/simd/mve-vneg.c
new file mode 100644
index 0000000..afd0d60
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/mve-vneg.c
@@ -0,0 +1,49 @@
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O3" } */
+
+#include <stdint.h>
+#include <arm_mve.h>
+
+#define FUNC(SIGN, TYPE, BITS, NB, OP, NAME)				\
+  void test_ ## NAME ##_ ## SIGN ## BITS ## x ## NB (TYPE##BITS##_t * __restrict__ dest, TYPE##BITS##_t *a) { \
+    int i;								\
+    for (i=0; i<NB; i++) {						\
+      dest[i] = OP a[i];						\
+    }									\
+}
+
+#define FUNC_FLOAT(SIGN, TYPE, BITS, NB, OP, NAME)				\
+  void test_ ## NAME ##_ ## SIGN ## BITS ## x ## NB (TYPE * __restrict__ dest, TYPE *a) { \
+    int i;								\
+    for (i=0; i<NB; i++) {						\
+      dest[i] = OP a[i];						\
+    }									\
+}
+
+/* vmnvq supports only 16-bit and 32-bit elements.  */
+/* 64-bit vectors.  */
+FUNC(s, int, 32, 2, -, vneg)
+FUNC(u, uint, 32, 2, -, vneg)
+FUNC(s, int, 16, 4, -, vneg)
+FUNC(u, uint, 16, 4, -, vneg)
+FUNC(s, int, 8, 8, -, vneg)
+FUNC(u, uint, 8, 8, -, vneg)
+FUNC_FLOAT(f, float, 32, 2, -, vneg)
+FUNC(f, float, 16, 4, -, vneg)
+
+/* 128-bit vectors.  */
+FUNC(s, int, 32, 4, -, vneg)
+FUNC(u, uint, 32, 4, -, vneg)
+FUNC(s, int, 16, 8, -, vneg)
+FUNC(u, uint, 16, 8, -, vneg)
+FUNC(s, int, 8, 16, -, vneg)
+FUNC(u, uint, 8, 16, -, vneg)
+FUNC_FLOAT(f, float, 32, 4, -, vneg)
+FUNC(f, float, 16, 8, -, vneg)
+
+/* MVE has only 128-bit vectors, so we can vectorize only half of the
+   functions above.  */
+/* { dg-final { scan-assembler-times {vneg.s[0-9]+  q[0-9]+, q[0-9]+} 6 } } */
+/* { dg-final { scan-assembler-times {vneg.f[0-9]+  q[0-9]+, q[0-9]+} 2 } } */
-- 
2.7.4


^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH v3 1/4] arm: Auto-vectorization for MVE: veor
  2020-12-11 18:37 [PATCH v3 1/4] arm: Auto-vectorization for MVE: veor Christophe Lyon
                   ` (2 preceding siblings ...)
  2020-12-11 18:37 ` [PATCH v3 4/4] arm: Auto-vectorization for MVE: vneg Christophe Lyon
@ 2020-12-14  9:31 ` Kyrylo Tkachov
  3 siblings, 0 replies; 8+ messages in thread
From: Kyrylo Tkachov @ 2020-12-14  9:31 UTC (permalink / raw)
  To: Christophe Lyon; +Cc: gcc-patches



> -----Original Message-----
> From: Gcc-patches <gcc-patches-bounces@gcc.gnu.org> On Behalf Of
> Christophe Lyon via Gcc-patches
> Sent: 11 December 2020 18:37
> To: gcc-patches@gcc.gnu.org
> Subject: [PATCH v3 1/4] arm: Auto-vectorization for MVE: veor
> 
> This patch enables MVE veorq instructions for auto-vectorization.  MVE
> veorq insns in mve.md are modified to use xor instead of unspec
> expression to support xor<mode>3.  The xor<mode>3 expander is added to
> vec-common.md
> 

Ok.
Thanks,
Kyrill

> 2020-12-11  Christophe Lyon  <christophe.lyon@linaro.org>
> 
> 	gcc/
> 	* config/arm/iterators.md (supf): Remove VEORQ_S and VEORQ_U.
> 	(VEORQ): Remove.
> 	* config/arm/mve.md (mve_veorq_u<mode>): New entry for veor
> 	instruction using expression xor.
> 	(mve_veorq_s<mode>): New expander.
> 	(mve_veorq_f<mode>): Use 'xor' code instead of unspec.
> 	* config/arm/neon.md (xor<mode>3): Renamed into
> xor<mode>3_neon.
> 	* config/arm/unspecs.md (VEORQ_S, VEORQ_U, VEORQ_F): Remove.
> 	* config/arm/vec-common.md (xor<mode>3): New expander.
> 
> 	gcc/testsuite/
> 	* gcc.target/arm/simd/mve-veor.c: Add tests for veor.
> ---
>  gcc/config/arm/iterators.md                  |  3 +-
>  gcc/config/arm/mve.md                        | 22 ++++++----
>  gcc/config/arm/neon.md                       |  2 +-
>  gcc/config/arm/unspecs.md                    |  3 --
>  gcc/config/arm/vec-common.md                 |  7 ++++
>  gcc/testsuite/gcc.target/arm/simd/mve-veor.c | 61
> ++++++++++++++++++++++++++++
>  6 files changed, 84 insertions(+), 14 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/arm/simd/mve-veor.c
> 
> diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
> index 5fcb7af..0195275 100644
> --- a/gcc/config/arm/iterators.md
> +++ b/gcc/config/arm/iterators.md
> @@ -1237,7 +1237,7 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s")
> (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
>  		       (VCADDQ_ROT270_U "u") (VCADDQ_ROT90_S "s")
>  		       (VCMPEQQ_S "s") (VCMPEQQ_U "u")
> (VCADDQ_ROT90_U "u")
>  		       (VCMPEQQ_N_S "s") (VCMPEQQ_N_U "u")
> (VCMPNEQ_N_S "s")
> -		       (VCMPNEQ_N_U "u") (VEORQ_S "s") (VEORQ_U "u")
> +		       (VCMPNEQ_N_U "u")
>  		       (VHADDQ_N_S "s") (VHADDQ_N_U "u") (VHADDQ_S "s")
>  		       (VHADDQ_U "u") (VHSUBQ_N_S "s")
> 	(VHSUBQ_N_U "u")
>  		       (VHSUBQ_S "s") (VMAXQ_S "s") (VMAXQ_U "u")
> (VHSUBQ_U "u")
> @@ -1507,7 +1507,6 @@ (define_int_iterator VCADDQ_ROT90
> [VCADDQ_ROT90_U VCADDQ_ROT90_S])
>  (define_int_iterator VCMPEQQ [VCMPEQQ_U VCMPEQQ_S])
>  (define_int_iterator VCMPEQQ_N [VCMPEQQ_N_S VCMPEQQ_N_U])
>  (define_int_iterator VCMPNEQ_N [VCMPNEQ_N_U VCMPNEQ_N_S])
> -(define_int_iterator VEORQ [VEORQ_U VEORQ_S])
>  (define_int_iterator VHADDQ [VHADDQ_S VHADDQ_U])
>  (define_int_iterator VHADDQ_N [VHADDQ_N_U VHADDQ_N_S])
>  (define_int_iterator VHSUBQ [VHSUBQ_S VHSUBQ_U])
> diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
> index 4b2e46a..10512ad 100644
> --- a/gcc/config/arm/mve.md
> +++ b/gcc/config/arm/mve.md
> @@ -1207,17 +1207,24 @@ (define_insn "mve_vcmpneq_n_<supf><mode>"
>  ;;
>  ;; [veorq_u, veorq_s])
>  ;;
> -(define_insn "mve_veorq_<supf><mode>"
> +(define_insn "mve_veorq_u<mode>"
>    [
>     (set (match_operand:MVE_2 0 "s_register_operand" "=w")
> -	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")
> -		       (match_operand:MVE_2 2 "s_register_operand" "w")]
> -	 VEORQ))
> +	(xor:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w")
> +		   (match_operand:MVE_2 2 "s_register_operand" "w")))
>    ]
>    "TARGET_HAVE_MVE"
> -  "veor %q0, %q1, %q2"
> +  "veor\t%q0, %q1, %q2"
>    [(set_attr "type" "mve_move")
>  ])
> +(define_expand "mve_veorq_s<mode>"
> +  [
> +   (set (match_operand:MVE_2 0 "s_register_operand")
> +	(xor:MVE_2 (match_operand:MVE_2 1 "s_register_operand")
> +		   (match_operand:MVE_2 2 "s_register_operand")))
> +  ]
> +  "TARGET_HAVE_MVE"
> +)
> 
>  ;;
>  ;; [vhaddq_n_u, vhaddq_n_s])
> @@ -2404,9 +2411,8 @@ (define_insn "mve_vcvttq_f16_f32v8hf"
>  (define_insn "mve_veorq_f<mode>"
>    [
>     (set (match_operand:MVE_0 0 "s_register_operand" "=w")
> -	(unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
> -		       (match_operand:MVE_0 2 "s_register_operand" "w")]
> -	 VEORQ_F))
> +	(xor:MVE_0 (match_operand:MVE_0 1 "s_register_operand" "w")
> +		   (match_operand:MVE_0 2 "s_register_operand" "w")))
>    ]
>    "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
>    "veor %q0, %q1, %q2"
> diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
> index 669c34d..e1263b0 100644
> --- a/gcc/config/arm/neon.md
> +++ b/gcc/config/arm/neon.md
> @@ -747,7 +747,7 @@ (define_insn "bic<mode>3_neon"
>    [(set_attr "type" "neon_logic<q>")]
>  )
> 
> -(define_insn "xor<mode>3"
> +(define_insn "xor<mode>3_neon"
>    [(set (match_operand:VDQ 0 "s_register_operand" "=w")
>  	(xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
>  		 (match_operand:VDQ 2 "s_register_operand" "w")))]
> diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
> index c2076c9..fe240e8 100644
> --- a/gcc/config/arm/unspecs.md
> +++ b/gcc/config/arm/unspecs.md
> @@ -608,7 +608,6 @@ (define_c_enum "unspec" [
>    VCMPEQQ_S
>    VCMPEQQ_N_S
>    VCMPNEQ_N_S
> -  VEORQ_S
>    VHADDQ_S
>    VHADDQ_N_S
>    VHSUBQ_S
> @@ -653,7 +652,6 @@ (define_c_enum "unspec" [
>    VCMPEQQ_U
>    VCMPEQQ_N_U
>    VCMPNEQ_N_U
> -  VEORQ_U
>    VHADDQ_U
>    VHADDQ_N_U
>    VHSUBQ_U
> @@ -736,7 +734,6 @@ (define_c_enum "unspec" [
>    VCMULQ_ROT180_F
>    VCMULQ_ROT270_F
>    VCMULQ_ROT90_F
> -  VEORQ_F
>    VMAXNMAQ_F
>    VMAXNMAVQ_F
>    VMAXNMQ_F
> diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-
> common.md
> index 8d9c89c..030ed82 100644
> --- a/gcc/config/arm/vec-common.md
> +++ b/gcc/config/arm/vec-common.md
> @@ -186,3 +186,10 @@ (define_expand "ior<mode>3"
>  		 (match_operand:VDQ 2 "neon_logic_op2" "")))]
>    "ARM_HAVE_<MODE>_ARITH"
>  )
> +
> +(define_expand "xor<mode>3"
> +  [(set (match_operand:VDQ 0 "s_register_operand" "")
> +	(xor:VDQ (match_operand:VDQ 1 "s_register_operand" "")
> +		 (match_operand:VDQ 2 "s_register_operand" "")))]
> +  "ARM_HAVE_<MODE>_ARITH"
> +)
> diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-veor.c
> b/gcc/testsuite/gcc.target/arm/simd/mve-veor.c
> new file mode 100644
> index 0000000..321961f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/simd/mve-veor.c
> @@ -0,0 +1,61 @@
> +/* { dg-do assemble } */
> +/* { dg-require-effective-target arm_v8_1m_mve_ok } */
> +/* { dg-add-options arm_v8_1m_mve } */
> +/* { dg-additional-options "-O3" } */
> +
> +#include <stdint.h>
> +
> +#define FUNC(SIGN, TYPE, BITS, NB, OP, NAME)
> 	\
> +  void test_ ## NAME ##_ ## SIGN ## BITS ## x ## NB (TYPE##BITS##_t *
> __restrict__ dest, TYPE##BITS##_t *a, TYPE##BITS##_t *b) { \
> +    int i;								\
> +    for (i=0; i<NB; i++) {						\
> +      dest[i] = a[i] OP b[i];						\
> +    }									\
> +}
> +
> +#define FUNC_IMM(SIGN, TYPE, BITS, NB, OP, NAME)
> 	\
> +  void test_ ## NAME ##_ ## SIGN ## BITS ## x ## NB (TYPE##BITS##_t *
> __restrict__ dest, TYPE##BITS##_t *a) { \
> +    int i;								\
> +    for (i=0; i<NB; i++) {						\
> +      dest[i] = a[i] OP 1;						\
> +    }									\
> +}
> +
> +/* 64-bit vectors.  */
> +FUNC(s, int, 32, 2, ^, veor)
> +FUNC(u, uint, 32, 2, ^, veor)
> +FUNC(s, int, 16, 4, ^, veor)
> +FUNC(u, uint, 16, 4, ^, veor)
> +FUNC(s, int, 8, 8, ^, veor)
> +FUNC(u, uint, 8, 8, ^, veor)
> +
> +/* 128-bit vectors.  */
> +FUNC(s, int, 32, 4, ^, veor)
> +FUNC(u, uint, 32, 4, ^, veor)
> +FUNC(s, int, 16, 8, ^, veor)
> +FUNC(u, uint, 16, 8, ^, veor)
> +FUNC(s, int, 8, 16, ^, veor)
> +FUNC(u, uint, 8, 16, ^, veor)
> +
> +/* 64-bit vectors.  */
> +FUNC_IMM(s, int, 32, 2, ^, veorimm)
> +FUNC_IMM(u, uint, 32, 2, ^, veorimm)
> +FUNC_IMM(s, int, 16, 4, ^, veorimm)
> +FUNC_IMM(u, uint, 16, 4, ^, veorimm)
> +FUNC_IMM(s, int, 8, 8, ^, veorimm)
> +FUNC_IMM(u, uint, 8, 8, ^, veorimm)
> +
> +/* 128-bit vectors.  */
> +FUNC_IMM(s, int, 32, 4, ^, veorimm)
> +FUNC_IMM(u, uint, 32, 4, ^, veorimm)
> +FUNC_IMM(s, int, 16, 8, ^, veorimm)
> +FUNC_IMM(u, uint, 16, 8, ^, veorimm)
> +FUNC_IMM(s, int, 8, 16, ^, veorimm)
> +FUNC_IMM(u, uint, 8, 16, ^, veorimm)
> +
> +/* MVE has only 128-bit vectors, so we can vectorize only half of the
> +   functions above.  */
> +/* Although float16 and float32 types are supported at assembly level,
> +   we cannot test them with the '^' operator, so we check only the
> +   integer variants.  */
> +/* { dg-final { scan-assembler-times {veor\tq[0-9]+, q[0-9]+, q[0-9]+} 12 } }
> */
> --
> 2.7.4


^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH v3 2/4] arm: Auto-vectorization for MVE: vbic
  2020-12-11 18:37 ` [PATCH v3 2/4] arm: Auto-vectorization for MVE: vbic Christophe Lyon
@ 2020-12-14  9:48   ` Kyrylo Tkachov
  0 siblings, 0 replies; 8+ messages in thread
From: Kyrylo Tkachov @ 2020-12-14  9:48 UTC (permalink / raw)
  To: Christophe Lyon; +Cc: gcc-patches



> -----Original Message-----
> From: Gcc-patches <gcc-patches-bounces@gcc.gnu.org> On Behalf Of
> Christophe Lyon via Gcc-patches
> Sent: 11 December 2020 18:37
> To: gcc-patches@gcc.gnu.org
> Subject: [PATCH v3 2/4] arm: Auto-vectorization for MVE: vbic
> 
> This patch enables MVE vbic instructions for auto-vectorization.  MVE
> vbicq insns in mve.md are modified to use 'and not' instead of unspec
> expression.

Ok.
Thanks,
Kyrill

> 
> 2020-12-11  Christophe Lyon  <christophe.lyon@linaro.org>
> 
> 	gcc/
> 	* config/arm/iterators.md (supf): Remove VBICQ_S and VBICQ_U.
> 	(VBICQ): Remove.
> 	* config/arm/mve.md (mve_vbicq_u<mode>): New entry for vbic
> 	instruction using expression and not.
> 	(mve_vbicq_s<mode>): New expander.
> 	(mve_vbicq_f<mode>): Replace use of unspec by 'and not'.
> 	* config/arm/unspecs.md (VBICQ_S, VBICQ_U, VBICQ_F): Remove.
> 
> 	gcc/testsuite/
> 	* gcc.target/arm/simd/mve-vbic.c: Add tests for vbic.
> ---
>  gcc/config/arm/iterators.md                  |  3 +-
>  gcc/config/arm/mve.md                        | 23 ++++++----
>  gcc/config/arm/unspecs.md                    |  3 --
>  gcc/testsuite/gcc.target/arm/simd/mve-vbic.c | 65
> ++++++++++++++++++++++++++++
>  4 files changed, 81 insertions(+), 13 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/arm/simd/mve-vbic.c
> 
> diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
> index 0195275..26351e0 100644
> --- a/gcc/config/arm/iterators.md
> +++ b/gcc/config/arm/iterators.md
> @@ -1232,7 +1232,7 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s")
> (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
>  		       (VADDLVQ_P_U "u") (VCMPNEQ_U "u") (VCMPNEQ_S "s")
>  		       (VABDQ_M_S "s") (VABDQ_M_U "u") (VABDQ_S "s")
>  		       (VABDQ_U "u") (VADDQ_N_S "s") (VADDQ_N_U "u")
> -		       (VADDVQ_P_S "s")	(VADDVQ_P_U "u") (VBICQ_S "s")
> (VBICQ_U "u")
> +		       (VADDVQ_P_S "s")	(VADDVQ_P_U "u")
>  		       (VBRSRQ_N_S "s") (VBRSRQ_N_U "u")
> (VCADDQ_ROT270_S "s")
>  		       (VCADDQ_ROT270_U "u") (VCADDQ_ROT90_S "s")
>  		       (VCMPEQQ_S "s") (VCMPEQQ_U "u")
> (VCADDQ_ROT90_U "u")
> @@ -1500,7 +1500,6 @@ (define_int_iterator VABDQ [VABDQ_S VABDQ_U])
>  (define_int_iterator VADDQ_N [VADDQ_N_S VADDQ_N_U])
>  (define_int_iterator VADDVAQ [VADDVAQ_S VADDVAQ_U])
>  (define_int_iterator VADDVQ_P [VADDVQ_P_U VADDVQ_P_S])
> -(define_int_iterator VBICQ [VBICQ_S VBICQ_U])
>  (define_int_iterator VBRSRQ_N [VBRSRQ_N_U VBRSRQ_N_S])
>  (define_int_iterator VCADDQ_ROT270 [VCADDQ_ROT270_S
> VCADDQ_ROT270_U])
>  (define_int_iterator VCADDQ_ROT90 [VCADDQ_ROT90_U
> VCADDQ_ROT90_S])
> diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
> index 10512ad..0505537 100644
> --- a/gcc/config/arm/mve.md
> +++ b/gcc/config/arm/mve.md
> @@ -922,18 +922,26 @@ (define_expand "mve_vandq_s<mode>"
>  ;;
>  ;; [vbicq_s, vbicq_u])
>  ;;
> -(define_insn "mve_vbicq_<supf><mode>"
> +(define_insn "mve_vbicq_u<mode>"
>    [
>     (set (match_operand:MVE_2 0 "s_register_operand" "=w")
> -	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")
> -		       (match_operand:MVE_2 2 "s_register_operand" "w")]
> -	 VBICQ))
> +	(and:MVE_2 (not:MVE_2 (match_operand:MVE_2 2
> "s_register_operand" "w"))
> +			      (match_operand:MVE_2 1 "s_register_operand"
> "w")))
>    ]
>    "TARGET_HAVE_MVE"
> -  "vbic %q0, %q1, %q2"
> +  "vbic\t%q0, %q1, %q2"
>    [(set_attr "type" "mve_move")
>  ])
> 
> +(define_expand "mve_vbicq_s<mode>"
> +  [
> +   (set (match_operand:MVE_2 0 "s_register_operand")
> +	(and:MVE_2 (not:MVE_2 (match_operand:MVE_2 2
> "s_register_operand"))
> +		   (match_operand:MVE_2 1 "s_register_operand")))
> +  ]
> +  "TARGET_HAVE_MVE"
> +)
> +
>  ;;
>  ;; [vbrsrq_n_u, vbrsrq_n_s])
>  ;;
> @@ -2066,9 +2074,8 @@ (define_insn "mve_vandq_f<mode>"
>  (define_insn "mve_vbicq_f<mode>"
>    [
>     (set (match_operand:MVE_0 0 "s_register_operand" "=w")
> -	(unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
> -		       (match_operand:MVE_0 2 "s_register_operand" "w")]
> -	 VBICQ_F))
> +	(and:MVE_0 (not:MVE_0 (match_operand:MVE_0 1
> "s_register_operand" "w"))
> +			      (match_operand:MVE_0 2 "s_register_operand"
> "w")))
>    ]
>    "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
>    "vbic %q0, %q1, %q2"
> diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
> index fe240e8..8a4389a 100644
> --- a/gcc/config/arm/unspecs.md
> +++ b/gcc/config/arm/unspecs.md
> @@ -601,7 +601,6 @@ (define_c_enum "unspec" [
>    VADDQ_N_S
>    VADDVAQ_S
>    VADDVQ_P_S
> -  VBICQ_S
>    VBRSRQ_N_S
>    VCADDQ_ROT270_S
>    VCADDQ_ROT90_S
> @@ -645,7 +644,6 @@ (define_c_enum "unspec" [
>    VADDQ_N_U
>    VADDVAQ_U
>    VADDVQ_P_U
> -  VBICQ_U
>    VBRSRQ_N_U
>    VCADDQ_ROT270_U
>    VCADDQ_ROT90_U
> @@ -715,7 +713,6 @@ (define_c_enum "unspec" [
>    VABDQ_M_U
>    VABDQ_F
>    VADDQ_N_F
> -  VBICQ_F
>    VCADDQ_ROT270_F
>    VCADDQ_ROT90_F
>    VCMPEQQ_F
> diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vbic.c
> b/gcc/testsuite/gcc.target/arm/simd/mve-vbic.c
> new file mode 100644
> index 0000000..c9a64c6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/simd/mve-vbic.c
> @@ -0,0 +1,65 @@
> +/* { dg-do assemble } */
> +/* { dg-require-effective-target arm_v8_1m_mve_ok } */
> +/* { dg-add-options arm_v8_1m_mve } */
> +/* { dg-additional-options "-O3" } */
> +
> +#include <stdint.h>
> +
> +#define FUNC(SIGN, TYPE, BITS, NB, OP, NAME)
> 	\
> +  void test_ ## NAME ##_ ## SIGN ## BITS ## x ## NB (TYPE##BITS##_t *
> __restrict__ dest, TYPE##BITS##_t *a, TYPE##BITS##_t *b) { \
> +    int i;								\
> +    for (i=0; i<NB; i++) {						\
> +      dest[i] = a[i] OP b[i];						\
> +    }									\
> +}
> +
> +#define FUNC_IMM(SIGN, TYPE, BITS, NB, OP, NAME)
> 	\
> +  void test_ ## NAME ##_ ## SIGN ## BITS ## x ## NB (TYPE##BITS##_t *
> __restrict__ dest, TYPE##BITS##_t *a) { \
> +    int i;								\
> +    for (i=0; i<NB; i++) {						\
> +      dest[i] = a[i] OP 1;						\
> +    }									\
> +}
> +
> +/* 64-bit vectors.  */
> +FUNC(s, int, 32, 2, & ~ , vbic)
> +FUNC(u, uint, 32, 2, & ~ , vbic)
> +FUNC(s, int, 16, 4, & ~ , vbic)
> +FUNC(u, uint, 16, 4, & ~ , vbic)
> +FUNC(s, int, 8, 8, & ~ , vbic)
> +FUNC(u, uint, 8, 8, & ~ , vbic)
> +
> +/* 128-bit vectors.  */
> +FUNC(s, int, 32, 4, & ~ , vbic)
> +FUNC(u, uint, 32, 4, & ~ , vbic)
> +FUNC(s, int, 16, 8, & ~ , vbic)
> +FUNC(u, uint, 16, 8, & ~ , vbic)
> +FUNC(s, int, 8, 16, & ~ , vbic)
> +FUNC(u, uint, 8, 16, & ~ , vbic)
> +
> +/* 64-bit vectors.  */
> +FUNC_IMM(s, int, 32, 2, & ~, vbicimm)
> +FUNC_IMM(u, uint, 32, 2, & ~, vbicimm)
> +FUNC_IMM(s, int, 16, 4, & ~, vbicimm)
> +FUNC_IMM(u, uint, 16, 4, & ~, vbicimm)
> +FUNC_IMM(s, int, 8, 8, & ~, vbicimm)
> +FUNC_IMM(u, uint, 8, 8, & ~, vbicimm)
> +
> +/* 128-bit vectors.  */
> +FUNC_IMM(s, int, 32, 4, & ~, vbicimm)
> +FUNC_IMM(u, uint, 32, 4, & ~, vbicimm)
> +FUNC_IMM(s, int, 16, 8, & ~, vbicimm)
> +FUNC_IMM(u, uint, 16, 8, & ~, vbicimm)
> +FUNC_IMM(s, int, 8, 16, & ~, vbicimm)
> +FUNC_IMM(u, uint, 8, 16, & ~, vbicimm)
> +
> +/* MVE has only 128-bit vectors, so we can vectorize only half of the
> +   functions above.  */
> +/* We emit vand.i[16|32] qX, #XX for the first four versions of the
> +   128-bit vector vbicimm tests.  */
> +/* For some reason, we do not generate the immediate version for
> +   int8x16 and uint8x16, we still use vldr to load the vector of
> +   immediates.  */
> +/* { dg-final { scan-assembler-times {vbic\tq[0-9]+, q[0-9]+, q[0-9]+} 6 } } */
> +/* { dg-final { scan-assembler-times {vand.i[0-9]+\tq[0-9]+} 4 } } */
> +/* { dg-final { scan-assembler-times {vand\tq[0-9]+, q[0-9]+, q[0-9]+} 2 } }
> */
> --
> 2.7.4


^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH v3 3/4] arm: Auto-vectorization for MVE: vmvn
  2020-12-11 18:37 ` [PATCH v3 3/4] arm: Auto-vectorization for MVE: vmvn Christophe Lyon
@ 2020-12-14  9:54   ` Kyrylo Tkachov
  0 siblings, 0 replies; 8+ messages in thread
From: Kyrylo Tkachov @ 2020-12-14  9:54 UTC (permalink / raw)
  To: Christophe Lyon; +Cc: gcc-patches



> -----Original Message-----
> From: Gcc-patches <gcc-patches-bounces@gcc.gnu.org> On Behalf Of
> Christophe Lyon via Gcc-patches
> Sent: 11 December 2020 18:37
> To: gcc-patches@gcc.gnu.org
> Subject: [PATCH v3 3/4] arm: Auto-vectorization for MVE: vmvn
> 
> This patch enables MVE vmvnq instructions for auto-vectorization.  MVE
> vmvnq insns in mve.md are modified to use 'not' instead of unspec
> expression to support one_cmpl<mode>2.  The one_cmpl<mode>2 expander
> is added to vec-common.md.

Ok.
Thanks,
Kyrill

> 
> 2020-12-11  Christophe Lyon  <christophe.lyon@linaro.org>
> 
> 	gcc/
> 	* config/arm/iterators.md (VDQNOTM2): New mode iterator.
> 	(supf): Remove VMVNQ_S and VMVNQ_U.
> 	(VMVNQ): Remove.
> 	* config/arm/mve.md (mve_vmvnq_u<mode>): New entry for vmvn
> 	instruction using expression not.
> 	(mve_vmvnq_s<mode>): New expander.
> 	* config/arm/neon.md (one_cmpl<mode>2): Renamed into
> 	one_cmpl<mode>2_neon.
> 	* config/arm/unspecs.md (VMVNQ_S, VMVNQ_U): Remove.
> 	* config/arm/vec-common.md (one_cmpl<mode>2): New expander.
> 
> 	gcc/testsuite/
> 	* gcc.target/arm/simd/mve-vmvn.c: Add tests for vmvn.
> ---
>  gcc/config/arm/iterators.md                  |  3 +--
>  gcc/config/arm/mve.md                        | 14 +++++++----
>  gcc/config/arm/neon.md                       |  4 ++--
>  gcc/config/arm/unspecs.md                    |  2 --
>  gcc/config/arm/vec-common.md                 |  6 +++++
>  gcc/testsuite/gcc.target/arm/simd/mve-vmvn.c | 35
> ++++++++++++++++++++++++++++
>  6 files changed, 54 insertions(+), 10 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/arm/simd/mve-vmvn.c
> 
> diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
> index 26351e0..036a939 100644
> --- a/gcc/config/arm/iterators.md
> +++ b/gcc/config/arm/iterators.md
> @@ -1216,7 +1216,7 @@ (define_int_attr mmla_sfx [(UNSPEC_MATMUL_S
> "s8") (UNSPEC_MATMUL_U "u8")
>  (define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u")
> (VREV16Q_S "s")
>  		       (VREV16Q_U "u") (VMVNQ_N_S "s") (VMVNQ_N_U "u")
>  		       (VCVTAQ_U "u") (VCVTAQ_S "s") (VREV64Q_S "s")
> -		       (VREV64Q_U "u") (VMVNQ_S "s") (VMVNQ_U "u")
> +		       (VREV64Q_U "u")
>  		       (VDUPQ_N_U "u") (VDUPQ_N_S"s") (VADDVQ_S "s")
>  		       (VADDVQ_U "u") (VADDVQ_S "s") (VADDVQ_U "u")
>  		       (VMOVLTQ_U "u") (VMOVLTQ_S "s") (VMOVLBQ_S "s")
> @@ -1476,7 +1476,6 @@ (define_int_iterator VREV64Q [VREV64Q_S
> VREV64Q_U])
>  (define_int_iterator VCVTQ_FROM_F [VCVTQ_FROM_F_S
> VCVTQ_FROM_F_U])
>  (define_int_iterator VREV16Q [VREV16Q_U VREV16Q_S])
>  (define_int_iterator VCVTAQ [VCVTAQ_U VCVTAQ_S])
> -(define_int_iterator VMVNQ [VMVNQ_U VMVNQ_S])
>  (define_int_iterator VDUPQ_N [VDUPQ_N_U VDUPQ_N_S])
>  (define_int_iterator VCLZQ [VCLZQ_U VCLZQ_S])
>  (define_int_iterator VADDVQ [VADDVQ_U VADDVQ_S])
> diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
> index 0505537..86d7fc6 100644
> --- a/gcc/config/arm/mve.md
> +++ b/gcc/config/arm/mve.md
> @@ -433,16 +433,22 @@ (define_insn "mve_vnegq_s<mode>"
>  ;;
>  ;; [vmvnq_u, vmvnq_s])
>  ;;
> -(define_insn "mve_vmvnq_<supf><mode>"
> +(define_insn "mve_vmvnq_u<mode>"
>    [
>     (set (match_operand:MVE_2 0 "s_register_operand" "=w")
> -	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand"
> "w")]
> -	 VMVNQ))
> +	(not:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w")))
>    ]
>    "TARGET_HAVE_MVE"
> -  "vmvn %q0, %q1"
> +  "vmvn\t%q0, %q1"
>    [(set_attr "type" "mve_move")
>  ])
> +(define_expand "mve_vmvnq_s<mode>"
> +  [
> +   (set (match_operand:MVE_2 0 "s_register_operand")
> +	(not:MVE_2 (match_operand:MVE_2 1 "s_register_operand")))
> +  ]
> +  "TARGET_HAVE_MVE"
> +)
> 
>  ;;
>  ;; [vdupq_n_u, vdupq_n_s])
> diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
> index e1263b0..f58d4f5 100644
> --- a/gcc/config/arm/neon.md
> +++ b/gcc/config/arm/neon.md
> @@ -756,7 +756,7 @@ (define_insn "xor<mode>3_neon"
>    [(set_attr "type" "neon_logic<q>")]
>  )
> 
> -(define_insn "one_cmpl<mode>2"
> +(define_insn "one_cmpl<mode>2_neon"
>    [(set (match_operand:VDQ 0 "s_register_operand" "=w")
>          (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
>    "TARGET_NEON"
> @@ -3240,7 +3240,7 @@ (define_expand "neon_vmvn<mode>"
>     (match_operand:VDQIW 1 "s_register_operand")]
>    "TARGET_NEON"
>  {
> -  emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
> +  emit_insn (gen_one_cmpl<mode>2_neon (operands[0], operands[1]));
>    DONE;
>  })
> 
> diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
> index 8a4389a..e581645 100644
> --- a/gcc/config/arm/unspecs.md
> +++ b/gcc/config/arm/unspecs.md
> @@ -550,8 +550,6 @@ (define_c_enum "unspec" [
>    VREV64Q_U
>    VQABSQ_S
>    VNEGQ_S
> -  VMVNQ_S
> -  VMVNQ_U
>    VDUPQ_N_U
>    VDUPQ_N_S
>    VCLZQ_U
> diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-
> common.md
> index 030ed82..37ff518 100644
> --- a/gcc/config/arm/vec-common.md
> +++ b/gcc/config/arm/vec-common.md
> @@ -193,3 +193,9 @@ (define_expand "xor<mode>3"
>  		 (match_operand:VDQ 2 "s_register_operand" "")))]
>    "ARM_HAVE_<MODE>_ARITH"
>  )
> +
> +(define_expand "one_cmpl<mode>2"
> +  [(set (match_operand:VDQ 0 "s_register_operand")
> +	(not:VDQ (match_operand:VDQ 1 "s_register_operand")))]
> +  "ARM_HAVE_<MODE>_ARITH"
> +)
> diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vmvn.c
> b/gcc/testsuite/gcc.target/arm/simd/mve-vmvn.c
> new file mode 100644
> index 0000000..73e897a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/simd/mve-vmvn.c
> @@ -0,0 +1,35 @@
> +/* { dg-do assemble } */
> +/* { dg-require-effective-target arm_v8_1m_mve_ok } */
> +/* { dg-add-options arm_v8_1m_mve } */
> +/* { dg-additional-options "-O3" } */
> +
> +#include <stdint.h>
> +
> +#define FUNC(SIGN, TYPE, BITS, NB, OP, NAME)
> 	\
> +  void test_ ## NAME ##_ ## SIGN ## BITS ## x ## NB (TYPE##BITS##_t *
> __restrict__ dest, TYPE##BITS##_t *a) { \
> +    int i;								\
> +    for (i=0; i<NB; i++) {						\
> +      dest[i] = OP a[i];						\
> +    }									\
> +}
> +
> +/* vmnvq supports only 16-bit and 32-bit elements.  */
> +/* 64-bit vectors.  */
> +FUNC(s, int, 32, 2, ~, vmvn)
> +FUNC(u, uint, 32, 2, ~, vmvn)
> +FUNC(s, int, 16, 4, ~, vmvn)
> +FUNC(u, uint, 16, 4, ~, vmvn)
> +FUNC(s, int, 8, 8, ~, vmvn)
> +FUNC(u, uint, 8, 8, ~, vmvn)
> +
> +/* 128-bit vectors.  */
> +FUNC(s, int, 32, 4, ~, vmvn)
> +FUNC(u, uint, 32, 4, ~, vmvn)
> +FUNC(s, int, 16, 8, ~, vmvn)
> +FUNC(u, uint, 16, 8, ~, vmvn)
> +FUNC(s, int, 8, 16, ~, vmvn)
> +FUNC(u, uint, 8, 16, ~, vmvn)
> +
> +/* MVE has only 128-bit vectors, so we can vectorize only half of the
> +   functions above.  */
> +/* { dg-final { scan-assembler-times {vmvn\tq[0-9]+, q[0-9]+} 6 } } */
> --
> 2.7.4


^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH v3 4/4] arm: Auto-vectorization for MVE: vneg
  2020-12-11 18:37 ` [PATCH v3 4/4] arm: Auto-vectorization for MVE: vneg Christophe Lyon
@ 2020-12-14 10:03   ` Kyrylo Tkachov
  0 siblings, 0 replies; 8+ messages in thread
From: Kyrylo Tkachov @ 2020-12-14 10:03 UTC (permalink / raw)
  To: Christophe Lyon; +Cc: gcc-patches



> -----Original Message-----
> From: Gcc-patches <gcc-patches-bounces@gcc.gnu.org> On Behalf Of
> Christophe Lyon via Gcc-patches
> Sent: 11 December 2020 18:37
> To: gcc-patches@gcc.gnu.org
> Subject: [PATCH v3 4/4] arm: Auto-vectorization for MVE: vneg
> 
> This patch enables MVE vneg instructions for auto-vectorization.  MVE
> vnegq insns in mve.md are modified to use 'neg' instead of unspec
> expression.  The neg<mode>2 expander is added to vec-common.md.
> 
> Existing patterns in neon.md are prefixed with neon_.
> It's not clear why we have different patterns for VDQW
> and VH in neon.md, when WDQWH handles both, and patterns
> with VDQ have provision for attributes for FP modes.

Not sure, may be a botched cleanup/refactoring in the past...
I guess you can try clean it up and see what breaks?

> 
> Another question is why <absneg_str><mode>2 always sets
> neon_abs<q> type when it also handles neon_neq<q> cases.

Might be an oversight as well that can be fixed separately.

> 
> 2020-12-11  Christophe Lyon  <christophe.lyon@linaro.org>
> 
> 	gcc/
> 	* config/arm/mve.md (mve_vnegq_f): Use 'neg' instead of unspec.
> 	(mve_vnegq_s): Likewise.
> 	* config/arm/neon.md (neg<mode>2): Rename into
> neon_neg<mode>2.
> 	(<absneg_str><mode>2): Rename into neon_<absneg_str><mode>2.
> 	(neon_v<absneg_str><mode>): Call
> gen_neon_<absneg_str><mode>2.
> 	(vashr<mode>3): Call gen_neon_neg<mode>2.
> 	(vlshr<mode>3): Call gen_neon_neg<mode>2.
> 	(neon_vneg<mode>): Call gen_neon_neg<mode>2.
> 	* config/arm/unspecs.md (VNEGQ_F, VNEGQ_S): Remove.
> 	* config/arm/vec-common.md (neg<mode>2): New expander.
> 
> 	gcc/testsuite/
> 	* gcc.target/arm/simd/mve-vneg.c: Add tests for vneg.

Ok.
Thanks,
Kyrill


> ---
>  gcc/config/arm/mve.md                        |  6 ++--
>  gcc/config/arm/neon.md                       | 12 +++----
>  gcc/config/arm/unspecs.md                    |  2 --
>  gcc/config/arm/vec-common.md                 |  6 ++++
>  gcc/testsuite/gcc.target/arm/simd/mve-vneg.c | 49
> ++++++++++++++++++++++++++++
>  5 files changed, 63 insertions(+), 12 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/arm/simd/mve-vneg.c
> 
> diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
> index 86d7fc6..b4c5a1e2 100644
> --- a/gcc/config/arm/mve.md
> +++ b/gcc/config/arm/mve.md
> @@ -271,8 +271,7 @@ (define_insn "mve_vrev64q_f<mode>"
>  (define_insn "mve_vnegq_f<mode>"
>    [
>     (set (match_operand:MVE_0 0 "s_register_operand" "=w")
> -	(unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand"
> "w")]
> -	 VNEGQ_F))
> +	(neg:MVE_0 (match_operand:MVE_0 1 "s_register_operand" "w")))
>    ]
>    "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
>    "vneg.f%#<V_sz_elem>  %q0, %q1"
> @@ -422,8 +421,7 @@ (define_insn "mve_vqabsq_s<mode>"
>  (define_insn "mve_vnegq_s<mode>"
>    [
>     (set (match_operand:MVE_2 0 "s_register_operand" "=w")
> -	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand"
> "w")]
> -	 VNEGQ_S))
> +	(neg:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w")))
>    ]
>    "TARGET_HAVE_MVE"
>    "vneg.s%#<V_sz_elem>  %q0, %q1"
> diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
> index f58d4f5..d2e92ba 100644
> --- a/gcc/config/arm/neon.md
> +++ b/gcc/config/arm/neon.md
> @@ -775,7 +775,7 @@ (define_insn "abs<mode>2"
>                      (const_string "neon_abs<q>")))]
>  )
> 
> -(define_insn "neg<mode>2"
> +(define_insn "neon_neg<mode>2"
>    [(set (match_operand:VDQW 0 "s_register_operand" "=w")
>  	(neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
>    "TARGET_NEON"
> @@ -786,7 +786,7 @@ (define_insn "neg<mode>2"
>                      (const_string "neon_neg<q>")))]
>  )
> 
> -(define_insn "<absneg_str><mode>2"
> +(define_insn "neon_<absneg_str><mode>2"
>    [(set (match_operand:VH 0 "s_register_operand" "=w")
>      (ABSNEG:VH (match_operand:VH 1 "s_register_operand" "w")))]
>   "TARGET_NEON_FP16INST"
> @@ -800,7 +800,7 @@ (define_expand "neon_v<absneg_str><mode>"
>     (ABSNEG:VH (match_operand:VH 1 "s_register_operand")))]
>   "TARGET_NEON_FP16INST"
>  {
> -  emit_insn (gen_<absneg_str><mode>2 (operands[0], operands[1]));
> +  emit_insn (gen_neon_<absneg_str><mode>2 (operands[0], operands[1]));
>    DONE;
>  })
> 
> @@ -952,7 +952,7 @@ (define_expand "vashr<mode>3"
>    if (s_register_operand (operands[2], <MODE>mode))
>      {
>        rtx neg = gen_reg_rtx (<MODE>mode);
> -      emit_insn (gen_neg<mode>2 (neg, operands[2]));
> +      emit_insn (gen_neon_neg<mode>2 (neg, operands[2]));
>        emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
>      }
>    else
> @@ -969,7 +969,7 @@ (define_expand "vlshr<mode>3"
>    if (s_register_operand (operands[2], <MODE>mode))
>      {
>        rtx neg = gen_reg_rtx (<MODE>mode);
> -      emit_insn (gen_neg<mode>2 (neg, operands[2]));
> +      emit_insn (gen_neon_neg<mode>2 (neg, operands[2]));
>        emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
>      }
>    else
> @@ -2953,7 +2953,7 @@ (define_expand "neon_vneg<mode>"
>     (match_operand:VDQW 1 "s_register_operand")]
>    "TARGET_NEON"
>  {
> -  emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
> +  emit_insn (gen_neon_neg<mode>2 (operands[0], operands[1]));
>    DONE;
>  })
> 
> diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
> index e581645..ef64989 100644
> --- a/gcc/config/arm/unspecs.md
> +++ b/gcc/config/arm/unspecs.md
> @@ -530,7 +530,6 @@ (define_c_enum "unspec" [
>    VRNDMQ_F
>    VRNDAQ_F
>    VREV64Q_F
> -  VNEGQ_F
>    VDUPQ_N_F
>    VABSQ_F
>    VREV32Q_F
> @@ -549,7 +548,6 @@ (define_c_enum "unspec" [
>    VREV64Q_S
>    VREV64Q_U
>    VQABSQ_S
> -  VNEGQ_S
>    VDUPQ_N_U
>    VDUPQ_N_S
>    VCLZQ_U
> diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-
> common.md
> index 37ff518..2d0932b 100644
> --- a/gcc/config/arm/vec-common.md
> +++ b/gcc/config/arm/vec-common.md
> @@ -199,3 +199,9 @@ (define_expand "one_cmpl<mode>2"
>  	(not:VDQ (match_operand:VDQ 1 "s_register_operand")))]
>    "ARM_HAVE_<MODE>_ARITH"
>  )
> +
> +(define_expand "neg<mode>2"
> +  [(set (match_operand:VDQWH 0 "s_register_operand" "")
> +	(neg:VDQWH (match_operand:VDQWH 1 "s_register_operand" "")))]
> +  "ARM_HAVE_<MODE>_ARITH"
> +)
> diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vneg.c
> b/gcc/testsuite/gcc.target/arm/simd/mve-vneg.c
> new file mode 100644
> index 0000000..afd0d60
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/simd/mve-vneg.c
> @@ -0,0 +1,49 @@
> +/* { dg-do assemble } */
> +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
> +/* { dg-add-options arm_v8_1m_mve_fp } */
> +/* { dg-additional-options "-O3" } */
> +
> +#include <stdint.h>
> +#include <arm_mve.h>
> +
> +#define FUNC(SIGN, TYPE, BITS, NB, OP, NAME)
> 	\
> +  void test_ ## NAME ##_ ## SIGN ## BITS ## x ## NB (TYPE##BITS##_t *
> __restrict__ dest, TYPE##BITS##_t *a) { \
> +    int i;								\
> +    for (i=0; i<NB; i++) {						\
> +      dest[i] = OP a[i];						\
> +    }									\
> +}
> +
> +#define FUNC_FLOAT(SIGN, TYPE, BITS, NB, OP, NAME)
> 		\
> +  void test_ ## NAME ##_ ## SIGN ## BITS ## x ## NB (TYPE * __restrict__
> dest, TYPE *a) { \
> +    int i;								\
> +    for (i=0; i<NB; i++) {						\
> +      dest[i] = OP a[i];						\
> +    }									\
> +}
> +
> +/* vmnvq supports only 16-bit and 32-bit elements.  */
> +/* 64-bit vectors.  */
> +FUNC(s, int, 32, 2, -, vneg)
> +FUNC(u, uint, 32, 2, -, vneg)
> +FUNC(s, int, 16, 4, -, vneg)
> +FUNC(u, uint, 16, 4, -, vneg)
> +FUNC(s, int, 8, 8, -, vneg)
> +FUNC(u, uint, 8, 8, -, vneg)
> +FUNC_FLOAT(f, float, 32, 2, -, vneg)
> +FUNC(f, float, 16, 4, -, vneg)
> +
> +/* 128-bit vectors.  */
> +FUNC(s, int, 32, 4, -, vneg)
> +FUNC(u, uint, 32, 4, -, vneg)
> +FUNC(s, int, 16, 8, -, vneg)
> +FUNC(u, uint, 16, 8, -, vneg)
> +FUNC(s, int, 8, 16, -, vneg)
> +FUNC(u, uint, 8, 16, -, vneg)
> +FUNC_FLOAT(f, float, 32, 4, -, vneg)
> +FUNC(f, float, 16, 8, -, vneg)
> +
> +/* MVE has only 128-bit vectors, so we can vectorize only half of the
> +   functions above.  */
> +/* { dg-final { scan-assembler-times {vneg.s[0-9]+  q[0-9]+, q[0-9]+} 6 } } */
> +/* { dg-final { scan-assembler-times {vneg.f[0-9]+  q[0-9]+, q[0-9]+} 2 } } */
> --
> 2.7.4


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2020-12-14 10:03 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-12-11 18:37 [PATCH v3 1/4] arm: Auto-vectorization for MVE: veor Christophe Lyon
2020-12-11 18:37 ` [PATCH v3 2/4] arm: Auto-vectorization for MVE: vbic Christophe Lyon
2020-12-14  9:48   ` Kyrylo Tkachov
2020-12-11 18:37 ` [PATCH v3 3/4] arm: Auto-vectorization for MVE: vmvn Christophe Lyon
2020-12-14  9:54   ` Kyrylo Tkachov
2020-12-11 18:37 ` [PATCH v3 4/4] arm: Auto-vectorization for MVE: vneg Christophe Lyon
2020-12-14 10:03   ` Kyrylo Tkachov
2020-12-14  9:31 ` [PATCH v3 1/4] arm: Auto-vectorization for MVE: veor Kyrylo Tkachov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).