public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] RISC-V: Add VECTOR_ALIGNMENT_REACHABLE && BUILTIN_VECTORIZATION_COST target hook to optimize RVV VLS auto-vectorization codegen
@ 2023-05-15  2:26 juzhe.zhong
  0 siblings, 0 replies; only message in thread
From: juzhe.zhong @ 2023-05-15  2:26 UTC (permalink / raw)
  To: gcc-patches
  Cc: kito.cheng, kito.cheng, palmer, palmer, jeffreyalaw, rdapp.gcc,
	Juzhe-Zhong

From: Juzhe-Zhong <juzhe.zhong@rivai.ai>

This patch is to optimize the codegen of RVV VLS auto-vectorizaiton codegen due to
alignment.

void __attribute__((noinline, noclone))
f3 (int * __restrict dst, int * __restrict op1, int * __restrict op2, int count)
{
  for (int i = 0; i < count; ++i)
    dst[i] = op1[i] + op2[i];
}

Before this patch:
f3:
	ble	a3,zero,.L1
	srli	a5,a1,2
	negw	a5,a5
	andi	a4,a5,3
	sext.w	a3,a3
	beq	a4,zero,.L3
	lw	a7,0(a1)
	lw	a6,0(a2)
	andi	a5,a5,2
	addw	a6,a6,a7
	sw	a6,0(a0)
	beq	a5,zero,.L3
	lw	a7,4(a1)
	lw	a5,4(a2)
	li	a6,3
	addw	a5,a5,a7
	sw	a5,4(a0)
	bne	a4,a6,.L3
	lw	a6,8(a2)
	lw	a5,8(a1)
	addw	a5,a5,a6
	sw	a5,8(a0)
.L3:
	subw	a3,a3,a4
	slli	a6,a4,2
	slli	a5,a3,32
	srli	a5,a5,32
	add	a1,a1,a6
	add	a2,a2,a6
	add	a0,a0,a6
	li	a3,4
.L6:
	mv	a4,a5
	bleu	a5,a3,.L5
	li	a4,4
.L5:
	vsetvli	zero,a4,e32,m1,ta,ma
	vle32.v	v1,0(a1)
	vle32.v	v2,0(a2)
	vsetivli	zero,4,e32,m1,ta,ma
	sub	a5,a5,a4
	vadd.vv	v1,v1,v2
	vsetvli	zero,a4,e32,m1,ta,ma
	vse32.v	v1,0(a0)
	addi	a1,a1,16
	addi	a2,a2,16
	addi	a0,a0,16
	bne	a5,zero,.L6
.L1:
	ret

After this patch:
f3:
	ble	a3,zero,.L1
	li	a4,4
.L4:
	mv	a5,a3
	bleu	a3,a4,.L3
	li	a5,4
.L3:
	vsetvli	zero,a5,e32,m1,ta,ma
	vle32.v	v2,0(a1)
	vle32.v	v1,0(a2)
	vsetivli	zero,4,e32,m1,ta,ma
	sub	a3,a3,a5
	vadd.vv	v1,v1,v2
	vsetvli	zero,a5,e32,m1,ta,ma
	vse32.v	v1,0(a0)
	addi	a2,a2,16
	addi	a0,a0,16
	addi	a1,a1,16
	bne	a3,zero,.L4
.L1:
	ret

The TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE is directly coming from ARM SVE.

The TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST is same as GCN port that vectorize
all cases by default. We will need to support accurate vector cost model in the future.

gcc/ChangeLog:

        * config/riscv/riscv.cc (riscv_simd_vector_alignment_reachable): New function.
        (riscv_vectorization_cost): New function.
        (TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE): New target hook.
        (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): New target hook.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/rvv/autovec/align-2.c: New test.

---
 gcc/config/riscv/riscv.cc                     | 39 +++++++++++++++++++
 .../gcc.target/riscv/rvv/autovec/align-2.c    | 12 ++++++
 2 files changed, 51 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/align-2.c

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index a5776a550b2..54306327cb3 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -7517,6 +7517,39 @@ riscv_vectorize_preferred_vector_alignment (const_tree type)
   return TYPE_ALIGN (type);
 }
 
+/* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE.  */
+
+static bool
+riscv_simd_vector_alignment_reachable (const_tree type, bool is_packed)
+{
+  if (is_packed)
+    return false;
+
+  /* For fixed-length vectors, check that the vectorizer will aim for
+     full-vector alignment.  This isn't true for generic GCC vectors
+     that are wider than the ABI maximum of 128 bits.  */
+  poly_uint64 preferred_alignment
+    = riscv_vectorize_preferred_vector_alignment (type);
+  if (TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
+      && maybe_ne (wi::to_widest (TYPE_SIZE (type)), preferred_alignment))
+    return false;
+
+  /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned.  */
+  return true;
+}
+
+/* Implement TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST.  */
+
+int
+riscv_vectorization_cost (enum vect_cost_for_stmt ARG_UNUSED (type_of_cost),
+			  tree ARG_UNUSED (vectype), int ARG_UNUSED (misalign))
+{
+  /* TODO: Always vectorize. The vectorization COST model is not accurate,
+     we will need to support accurate vectorization COST model according
+     to '-mtune' in the future.  */
+  return 1;
+}
+
 /* Initialize the GCC target structure.  */
 #undef TARGET_ASM_ALIGNED_HI_OP
 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
@@ -7792,6 +7825,12 @@ riscv_vectorize_preferred_vector_alignment (const_tree type)
 #undef TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT
 #define TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT \
   riscv_vectorize_preferred_vector_alignment
+#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
+#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
+  riscv_simd_vector_alignment_reachable
+#undef  TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
+  riscv_vectorization_cost
 
 struct gcc_target targetm = TARGET_INITIALIZER;
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/align-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/align-2.c
new file mode 100644
index 00000000000..812584e9d25
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/align-2.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3 --param riscv-autovec-preference=fixed-vlmax" } */
+
+void __attribute__((noinline, noclone))
+f (int * __restrict dst, int * __restrict op1, int * __restrict op2, int count)
+{
+  for (int i = 0; i < count; ++i)
+    dst[i] = op1[i] + op2[i];
+}
+
+/* { dg-final { scan-assembler-not "lw" } } */
+/* { dg-final { scan-assembler-not "sw" } } */
-- 
2.36.1


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-05-15  2:27 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-15  2:26 [PATCH] RISC-V: Add VECTOR_ALIGNMENT_REACHABLE && BUILTIN_VECTORIZATION_COST target hook to optimize RVV VLS auto-vectorization codegen juzhe.zhong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).