public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH 1/3] arm: Add movmisalign patterns for MVE (PR target/97875)
@ 2020-12-17 17:48 Christophe Lyon
  2020-12-17 17:48 ` [PATCH 2/3] arm: Auto-vectorization for MVE: vshl Christophe Lyon
                   ` (3 more replies)
  0 siblings, 4 replies; 16+ messages in thread
From: Christophe Lyon @ 2020-12-17 17:48 UTC (permalink / raw)
  To: gcc-patches

This patch adds new movmisalign<mode>_mve_load and store patterns for
MVE to help vectorization. They are very similar to their Neon
counterparts, but use different iterators and instructions.

Indeed MVE supports less vectors modes than Neon, so we use
the MVE_VLD_ST iterator where Neon uses VQX.

Since the supported modes are different from the ones valid for
arithmetic operators, we introduce two new sets of macros:

ARM_HAVE_NEON_<MODE>_LDST
  true if Neon has vector load/store instructions for <MODE>

ARM_HAVE_<MODE>_LDST
  true if any vector extension has vector load/store instructions for <MODE>

We move the movmisalign<mode> expander from neon.md to vec-commond.md, and
replace the TARGET_NEON enabler with ARM_HAVE_<MODE>_LDST.

The patch also updates the mve-vneg.c test to scan for the better code
generation when loading and storing the vectors involved: it checks
that no 'orr' instruction is generated to cope with misalignment at
runtime.
This test was chosen among the other mve tests, but any other should
be OK. Using a plain vector copy loop (dest[i] = a[i]) is not a good
test because the compiler chooses to use memcpy.

For instance we now generate:
test_vneg_s32x4:
	vldrw.32       q3, [r1]
	vneg.s32  q3, q3
	vstrw.32       q3, [r0]
	bx      lr

instead of:
test_vneg_s32x4:
	orr     r3, r1, r0
	lsls    r3, r3, #28
	bne     .L15
	vldrw.32	q3, [r1]
	vneg.s32  q3, q3
	vstrw.32	q3, [r0]
	bx      lr
	.L15:
	push    {r4, r5}
	ldrd    r2, r3, [r1, #8]
	ldrd    r5, r4, [r1]
	rsbs    r2, r2, #0
	rsbs    r5, r5, #0
	rsbs    r4, r4, #0
	rsbs    r3, r3, #0
	strd    r5, r4, [r0]
	pop     {r4, r5}
	strd    r2, r3, [r0, #8]
	bx      lr

2020-12-15  Christophe Lyon  <christophe.lyon@linaro.org>

	PR target/97875
	gcc/
	* config/arm/arm.h (ARM_HAVE_NEON_V8QI_LDST): New macro.
	(ARM_HAVE_NEON_V16QI_LDST, ARM_HAVE_NEON_V4HI_LDST): Likewise.
	(ARM_HAVE_NEON_V8HI_LDST, ARM_HAVE_NEON_V2SI_LDST): Likewise.
	(ARM_HAVE_NEON_V4SI_LDST, ARM_HAVE_NEON_V4HF_LDST): Likewise.
	(ARM_HAVE_NEON_V8HF_LDST, ARM_HAVE_NEON_V4BF_LDST): Likewise.
	(ARM_HAVE_NEON_V8BF_LDST, ARM_HAVE_NEON_V2SF_LDST): Likewise.
	(ARM_HAVE_NEON_V4SF_LDST, ARM_HAVE_NEON_DI_LDST): Likewise.
	(ARM_HAVE_NEON_V2DI_LDST): Likewise.
	(ARM_HAVE_V8QI_LDST, ARM_HAVE_V16QI_LDST): Likewise.
	(ARM_HAVE_V4HI_LDST, ARM_HAVE_V8HI_LDST): Likewise.
	(ARM_HAVE_V2SI_LDST, ARM_HAVE_V4SI_LDST, ARM_HAVE_V4HF_LDST): Likewise.
	(ARM_HAVE_V8HF_LDST, ARM_HAVE_V4BF_LDST, ARM_HAVE_V8BF_LDST): Likewise.
	(ARM_HAVE_V2SF_LDST, ARM_HAVE_V4SF_LDST, ARM_HAVE_DI_LDST): Likewise.
	(ARM_HAVE_V2DI_LDST): Likewise.
	* config/arm/mve.md (*movmisalign<mode>_mve_store): New pattern.
	(*movmisalign<mode>_mve_load): New pattern.
	* config/arm/neon.md (movmisalign<mode>): Move to ...
	* config/arm/vec-common.md: ... here.

	PR target/97875
	gcc/testsuite/
	* gcc.target/arm/simd/mve-vneg.c: Update test.
---
 gcc/config/arm/arm.h                         | 40 ++++++++++++++++++++++++++++
 gcc/config/arm/mve.md                        | 25 +++++++++++++++++
 gcc/config/arm/neon.md                       | 25 -----------------
 gcc/config/arm/vec-common.md                 | 24 +++++++++++++++++
 gcc/testsuite/gcc.target/arm/simd/mve-vneg.c |  3 +++
 5 files changed, 92 insertions(+), 25 deletions(-)

diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 4a63d33..d44e0c6 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -1151,6 +1151,46 @@ extern const int arm_arch_cde_coproc_bits[];
 #define ARM_HAVE_V8HF_ARITH (ARM_HAVE_NEON_V8HF_ARITH || TARGET_HAVE_MVE_FLOAT)
 #define ARM_HAVE_V4SF_ARITH (ARM_HAVE_NEON_V4SF_ARITH || TARGET_HAVE_MVE_FLOAT)
 
+/* The conditions under which vector modes are supported by load/store
+   instructions using Neon.  */
+
+#define ARM_HAVE_NEON_V8QI_LDST TARGET_NEON
+#define ARM_HAVE_NEON_V16QI_LDST TARGET_NEON
+#define ARM_HAVE_NEON_V4HI_LDST TARGET_NEON
+#define ARM_HAVE_NEON_V8HI_LDST TARGET_NEON
+#define ARM_HAVE_NEON_V2SI_LDST TARGET_NEON
+#define ARM_HAVE_NEON_V4SI_LDST TARGET_NEON
+#define ARM_HAVE_NEON_V4HF_LDST TARGET_NEON_FP16INST
+#define ARM_HAVE_NEON_V8HF_LDST TARGET_NEON_FP16INST
+#define ARM_HAVE_NEON_V4BF_LDST TARGET_BF16_SIMD
+#define ARM_HAVE_NEON_V8BF_LDST TARGET_BF16_SIMD
+#define ARM_HAVE_NEON_V2SF_LDST TARGET_NEON
+#define ARM_HAVE_NEON_V4SF_LDST TARGET_NEON
+#define ARM_HAVE_NEON_DI_LDST TARGET_NEON
+#define ARM_HAVE_NEON_V2DI_LDST TARGET_NEON
+
+/* The conditions under which vector modes are supported by load/store
+   instructions by any vector extension.  */
+
+#define ARM_HAVE_V8QI_LDST (ARM_HAVE_NEON_V8QI_LDST || TARGET_REALLY_IWMMXT)
+#define ARM_HAVE_V4HI_LDST (ARM_HAVE_NEON_V4HI_LDST || TARGET_REALLY_IWMMXT)
+#define ARM_HAVE_V2SI_LDST (ARM_HAVE_NEON_V2SI_LDST || TARGET_REALLY_IWMMXT)
+
+#define ARM_HAVE_V16QI_LDST (ARM_HAVE_NEON_V16QI_LDST || TARGET_HAVE_MVE)
+#define ARM_HAVE_V8HI_LDST (ARM_HAVE_NEON_V8HI_LDST || TARGET_HAVE_MVE)
+#define ARM_HAVE_V4SI_LDST (ARM_HAVE_NEON_V4SI_LDST || TARGET_HAVE_MVE)
+#define ARM_HAVE_DI_LDST ARM_HAVE_NEON_DI_LDST
+#define ARM_HAVE_V2DI_LDST ARM_HAVE_NEON_V2DI_LDST
+
+#define ARM_HAVE_V4HF_LDST ARM_HAVE_NEON_V4HF_LDST
+#define ARM_HAVE_V2SF_LDST ARM_HAVE_NEON_V2SF_LDST
+
+#define ARM_HAVE_V4BF_LDST ARM_HAVE_NEON_V4BF_LDST
+#define ARM_HAVE_V8BF_LDST ARM_HAVE_NEON_V8BF_LDST
+
+#define ARM_HAVE_V8HF_LDST (ARM_HAVE_NEON_V8HF_LDST || TARGET_HAVE_MVE_FLOAT)
+#define ARM_HAVE_V4SF_LDST (ARM_HAVE_NEON_V4SF_LDST || TARGET_HAVE_MVE_FLOAT)
+
 /* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
 extern int arm_regs_in_sequence[];
 
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index b4c5a1e2..673a83c 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -10937,3 +10937,28 @@ (define_insn "arm_vcx3q<a>_p_v16qi"
   [(set_attr "type" "coproc")
    (set_attr "length" "8")]
 )
+
+(define_insn "*movmisalign<mode>_mve_store"
+  [(set (match_operand:MVE_VLD_ST 0 "neon_permissive_struct_operand"	     "=Um")
+	(unspec:MVE_VLD_ST [(match_operand:MVE_VLD_ST 1 "s_register_operand" " w")]
+	 UNSPEC_MISALIGNED_ACCESS))]
+  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
+   || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))
+   && !BYTES_BIG_ENDIAN && unaligned_access"
+  "vstr<V_sz_elem1>.<V_sz_elem>\t%q1, %E0"
+  [(set_attr "type" "mve_store")
+   (set_attr "length" "4")]
+)
+
+
+(define_insn "*movmisalign<mode>_mve_load"
+  [(set (match_operand:MVE_VLD_ST 0 "s_register_operand"				 "=w")
+	(unspec:MVE_VLD_ST [(match_operand:MVE_VLD_ST 1 "neon_permissive_struct_operand" " Um")]
+	 UNSPEC_MISALIGNED_ACCESS))]
+  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
+   || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))
+   && !BYTES_BIG_ENDIAN && unaligned_access"
+  "vldr<V_sz_elem1>.<V_sz_elem>\t%q0, %E1"
+  [(set_attr "type" "mve_load")
+   (set_attr "length" "4")]
+)
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index d2e92ba..50220be 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -280,31 +280,6 @@ (define_split
   neon_disambiguate_copy (operands, dest, src, 4);
 })
 
-(define_expand "movmisalign<mode>"
-  [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
-	(unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
-		     UNSPEC_MISALIGNED_ACCESS))]
-  "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
-{
-  rtx adjust_mem;
-  /* This pattern is not permitted to fail during expansion: if both arguments
-     are non-registers (e.g. memory := constant, which can be created by the
-     auto-vectorizer), force operand 1 into a register.  */
-  if (!s_register_operand (operands[0], <MODE>mode)
-      && !s_register_operand (operands[1], <MODE>mode))
-    operands[1] = force_reg (<MODE>mode, operands[1]);
-
-  if (s_register_operand (operands[0], <MODE>mode))
-    adjust_mem = operands[1];
-  else
-    adjust_mem = operands[0];
-
-  /* Legitimize address.  */
-  if (!neon_vector_mem_operand (adjust_mem, 2, true))
-    XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
-
-})
-
 (define_insn "*movmisalign<mode>_neon_store"
   [(set (match_operand:VDX 0 "neon_permissive_struct_operand"	"=Um")
 	(unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md
index 2d0932b..f6a79e2 100644
--- a/gcc/config/arm/vec-common.md
+++ b/gcc/config/arm/vec-common.md
@@ -205,3 +205,27 @@ (define_expand "neg<mode>2"
 	(neg:VDQWH (match_operand:VDQWH 1 "s_register_operand" "")))]
   "ARM_HAVE_<MODE>_ARITH"
 )
+
+(define_expand "movmisalign<mode>"
+  [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
+	(unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
+	 UNSPEC_MISALIGNED_ACCESS))]
+  "ARM_HAVE_<MODE>_LDST && !BYTES_BIG_ENDIAN && unaligned_access"
+{
+  rtx adjust_mem;
+  /* This pattern is not permitted to fail during expansion: if both arguments
+     are non-registers (e.g. memory := constant, which can be created by the
+     auto-vectorizer), force operand 1 into a register.  */
+  if (!s_register_operand (operands[0], <MODE>mode)
+      && !s_register_operand (operands[1], <MODE>mode))
+    operands[1] = force_reg (<MODE>mode, operands[1]);
+
+  if (s_register_operand (operands[0], <MODE>mode))
+    adjust_mem = operands[1];
+  else
+    adjust_mem = operands[0];
+
+  /* Legitimize address.  */
+  if (!neon_vector_mem_operand (adjust_mem, 2, true))
+    XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
+})
diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vneg.c b/gcc/testsuite/gcc.target/arm/simd/mve-vneg.c
index afd0d60..7945a06 100644
--- a/gcc/testsuite/gcc.target/arm/simd/mve-vneg.c
+++ b/gcc/testsuite/gcc.target/arm/simd/mve-vneg.c
@@ -47,3 +47,6 @@ FUNC(f, float, 16, 8, -, vneg)
    functions above.  */
 /* { dg-final { scan-assembler-times {vneg.s[0-9]+  q[0-9]+, q[0-9]+} 6 } } */
 /* { dg-final { scan-assembler-times {vneg.f[0-9]+  q[0-9]+, q[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-times {vldr[bhw].[0-9]+\tq[0-9]+} 8 } } */
+/* { dg-final { scan-assembler-times {vstr[bhw].[0-9]+\tq[0-9]+} 8 } } */
+/* { dg-final { scan-assembler-not {orr\tr[0-9]+, r[0-9]+, r[0-9]+} } } */
-- 
2.7.4


^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2021-01-15 10:46 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-12-17 17:48 [PATCH 1/3] arm: Add movmisalign patterns for MVE (PR target/97875) Christophe Lyon
2020-12-17 17:48 ` [PATCH 2/3] arm: Auto-vectorization for MVE: vshl Christophe Lyon
2020-12-30 10:34   ` Christophe Lyon
2021-01-07 12:20     ` Christophe Lyon
2021-01-15  9:29       ` Christophe Lyon
2021-01-15  9:42   ` Kyrylo Tkachov
2021-01-15 10:45     ` Christophe Lyon
2020-12-17 17:48 ` [PATCH 3/3] arm: Auto-vectorization for MVE: vshr Christophe Lyon
2020-12-30 10:34   ` Christophe Lyon
2021-01-07 12:20     ` Christophe Lyon
2021-01-15  9:29       ` Christophe Lyon
2021-01-15  9:44   ` Kyrylo Tkachov
2020-12-30 10:33 ` [PATCH 1/3] arm: Add movmisalign patterns for MVE (PR target/97875) Christophe Lyon
2021-01-07 12:20   ` Christophe Lyon
2021-01-08  9:50 ` Kyrylo Tkachov
2021-01-08 11:04   ` Christophe Lyon

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).