public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] RISC-V: Count pointer type SSA into RVV regs liveness for dynamic LMUL cost model
@ 2023-12-29  1:21 Juzhe-Zhong
  2023-12-31 17:54 ` Jeff Law
  0 siblings, 1 reply; 2+ messages in thread
From: Juzhe-Zhong @ 2023-12-29  1:21 UTC (permalink / raw)
  To: gcc-patches; +Cc: kito.cheng, kito.cheng, jeffreyalaw, rdapp.gcc, Juzhe-Zhong

This patch fixes the following choosing unexpected big LMUL which cause register spillings.

Before this patch, choosing LMUL = 4:

	addi	sp,sp,-160
	addiw	t1,a2,-1
	li	a5,7
	bleu	t1,a5,.L16
	vsetivli	zero,8,e64,m4,ta,ma
	vmv.v.x	v4,a0
	vs4r.v	v4,0(sp)                        ---> spill to the stack.
	vmv.v.x	v4,a1
	addi	a5,sp,64
	vs4r.v	v4,0(a5)                        ---> spill to the stack.

The root cause is the following codes:

                  if (poly_int_tree_p (var)
                      || (is_gimple_val (var)
                         && !POINTER_TYPE_P (TREE_TYPE (var))))

We count the variable as consuming a RVV reg group when it is not POINTER_TYPE.

It is right for load/store STMT for example:

_1 = (MEM)*addr -->  addr won't be allocated an RVV vector group.

However, we find it is not right for non-load/store STMT:

_3 = _1 == x_8(D);

_1 is pointer type too but we does allocate a RVV register group for it.

So after this patch, we are choosing the perfect LMUL for the testcase in this patch:

	ble	a2,zero,.L17
	addiw	a7,a2,-1
	li	a5,3
	bleu	a7,a5,.L15
	srliw	a5,a7,2
	slli	a6,a5,1
	add	a6,a6,a5
	lui	a5,%hi(replacements)
	addi	t1,a5,%lo(replacements)
	slli	a6,a6,5
	lui	t4,%hi(.LANCHOR0)
	lui	t3,%hi(.LANCHOR0+8)
	lui	a3,%hi(.LANCHOR0+16)
	lui	a4,%hi(.LC1)
	vsetivli	zero,4,e16,mf2,ta,ma
	addi	t4,t4,%lo(.LANCHOR0)
	addi	t3,t3,%lo(.LANCHOR0+8)
	addi	a3,a3,%lo(.LANCHOR0+16)
	addi	a4,a4,%lo(.LC1)
	add	a6,t1,a6
	addi	a5,a5,%lo(replacements)
	vle16.v	v18,0(t4)
	vle16.v	v17,0(t3)
	vle16.v	v16,0(a3)
	vmsgeu.vi	v25,v18,4
	vadd.vi	v24,v18,-4
	vmsgeu.vi	v23,v17,4
	vadd.vi	v22,v17,-4
	vlm.v	v21,0(a4)
	vmsgeu.vi	v20,v16,4
	vadd.vi	v19,v16,-4
	vsetvli	zero,zero,e64,m2,ta,mu
	vmv.v.x	v12,a0
	vmv.v.x	v14,a1
.L4:
	vlseg3e64.v	v6,(a5)
	vmseq.vv	v2,v6,v12
	vmseq.vv	v0,v8,v12
	vmsne.vv	v1,v8,v12
	vmand.mm	v1,v1,v2
	vmerge.vvm	v2,v8,v14,v0
	vmv1r.v	v0,v1
	addi	a4,a5,24
	vmerge.vvm	v6,v6,v14,v0
	vmerge.vim	v2,v2,0,v0
	vrgatherei16.vv	v4,v6,v18
	vmv1r.v	v0,v25
	vrgatherei16.vv	v4,v2,v24,v0.t
	vs1r.v	v4,0(a5)
	addi	a3,a5,48
	vmv1r.v	v0,v21
	vmv2r.v	v4,v2
	vcompress.vm	v4,v6,v0
	vs1r.v	v4,0(a4)
	vmv1r.v	v0,v23
	addi	a4,a5,72
	vrgatherei16.vv	v4,v6,v17
	vrgatherei16.vv	v4,v2,v22,v0.t
	vs1r.v	v4,0(a3)
	vmv1r.v	v0,v20
	vrgatherei16.vv	v4,v6,v16
	addi	a5,a5,96
	vrgatherei16.vv	v4,v2,v19,v0.t
	vs1r.v	v4,0(a4)
	bne	a6,a5,.L4

No spillings, no "sp" register used.

Tested on both RV32 and RV64, no regression.

Ok for trunk ?

	PR target/113112

gcc/ChangeLog:

	* config/riscv/riscv-vector-costs.cc (compute_nregs_for_mode): Fix pointer type liveness count.

gcc/testsuite/ChangeLog:

	* gcc.dg/vect/costmodel/riscv/rvv/pr113112-4.c: New test.

---
 gcc/config/riscv/riscv-vector-costs.cc        | 12 ++++++--
 .../vect/costmodel/riscv/rvv/pr113112-4.c     | 28 +++++++++++++++++++
 2 files changed, 37 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113112-4.c

diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc
index 0c485dc4f29..b41a79429d4 100644
--- a/gcc/config/riscv/riscv-vector-costs.cc
+++ b/gcc/config/riscv/riscv-vector-costs.cc
@@ -277,9 +277,12 @@ compute_local_live_ranges (
 	    {
 	      unsigned int point = program_point.point;
 	      gimple *stmt = program_point.stmt;
+	      stmt_vec_info stmt_info = program_point.stmt_info;
 	      tree lhs = gimple_get_lhs (stmt);
 	      if (lhs != NULL_TREE && is_gimple_reg (lhs)
-		  && !POINTER_TYPE_P (TREE_TYPE (lhs)))
+		  && (!POINTER_TYPE_P (TREE_TYPE (lhs))
+		      || STMT_VINFO_TYPE (vect_stmt_to_vectorize (stmt_info))
+			   != store_vec_info_type))
 		{
 		  biggest_mode = get_biggest_mode (biggest_mode,
 						   TYPE_MODE (TREE_TYPE (lhs)));
@@ -305,7 +308,10 @@ compute_local_live_ranges (
 		     the future.  */
 		  if (poly_int_tree_p (var)
 		      || (is_gimple_val (var)
-			  && !POINTER_TYPE_P (TREE_TYPE (var))))
+			  && (!POINTER_TYPE_P (TREE_TYPE (var))
+			      || STMT_VINFO_TYPE (
+				   vect_stmt_to_vectorize (stmt_info))
+				   != load_vec_info_type)))
 		    {
 		      biggest_mode
 			= get_biggest_mode (biggest_mode,
@@ -374,7 +380,7 @@ compute_nregs_for_mode (machine_mode mode, machine_mode biggest_mode, int lmul)
   unsigned int biggest_size = GET_MODE_SIZE (biggest_mode).to_constant ();
   gcc_assert (biggest_size >= mode_size);
   unsigned int ratio = biggest_size / mode_size;
-  return lmul / ratio;
+  return MAX (lmul / ratio, 1);
 }
 
 /* This function helps to determine whether current LMUL will cause
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113112-4.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113112-4.c
new file mode 100644
index 00000000000..5c55a66ed77
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113112-4.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -Ofast -ftree-vectorize --param riscv-autovec-lmul=dynamic --param riscv-autovec-preference=fixed-vlmax -fno-schedule-insns -fno-schedule-insns2" } */
+
+typedef struct rtx_def *rtx;
+struct replacement {
+    rtx *where;
+    rtx *subreg_loc;
+    int mode;
+};
+static struct replacement replacements[150];
+void move_replacements (rtx *x, rtx *y, int n_replacements)
+{
+  int i;
+  for (i = 0; i < n_replacements; i++)
+    if (replacements[i].subreg_loc == x)
+      replacements[i].subreg_loc = y;
+    else if (replacements[i].where == x) 
+      {
+	replacements[i].where = y;
+	replacements[i].subreg_loc = 0;
+      }
+}
+
+/* { dg-final { scan-assembler {e64,m2} } } */
+/* { dg-final { scan-assembler-not {e64,m4} } } */
+/* { dg-final { scan-assembler-not {jr} } } */
+/* { dg-final { scan-assembler {ret} } } */
+/* { dg-final { scan-assembler-not {sp} } } */
-- 
2.36.3


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] RISC-V: Count pointer type SSA into RVV regs liveness for dynamic LMUL cost model
  2023-12-29  1:21 [PATCH] RISC-V: Count pointer type SSA into RVV regs liveness for dynamic LMUL cost model Juzhe-Zhong
@ 2023-12-31 17:54 ` Jeff Law
  0 siblings, 0 replies; 2+ messages in thread
From: Jeff Law @ 2023-12-31 17:54 UTC (permalink / raw)
  To: Juzhe-Zhong, gcc-patches; +Cc: kito.cheng, kito.cheng, rdapp.gcc



On 12/28/23 18:21, Juzhe-Zhong wrote:
> This patch fixes the following choosing unexpected big LMUL which cause register spillings.
> 
> Before this patch, choosing LMUL = 4:
> 
> 	addi	sp,sp,-160
> 	addiw	t1,a2,-1
> 	li	a5,7
> 	bleu	t1,a5,.L16
> 	vsetivli	zero,8,e64,m4,ta,ma
> 	vmv.v.x	v4,a0
> 	vs4r.v	v4,0(sp)                        ---> spill to the stack.
> 	vmv.v.x	v4,a1
> 	addi	a5,sp,64
> 	vs4r.v	v4,0(a5)                        ---> spill to the stack.
> 
> The root cause is the following codes:
> 
>                    if (poly_int_tree_p (var)
>                        || (is_gimple_val (var)
>                           && !POINTER_TYPE_P (TREE_TYPE (var))))
> 
> We count the variable as consuming a RVV reg group when it is not POINTER_TYPE.
> 
> It is right for load/store STMT for example:
> 
> _1 = (MEM)*addr -->  addr won't be allocated an RVV vector group.
> 
> However, we find it is not right for non-load/store STMT:
> 
> _3 = _1 == x_8(D);
> 
> _1 is pointer type too but we does allocate a RVV register group for it.
> 
> So after this patch, we are choosing the perfect LMUL for the testcase in this patch:
> 
> 	ble	a2,zero,.L17
> 	addiw	a7,a2,-1
> 	li	a5,3
> 	bleu	a7,a5,.L15
> 	srliw	a5,a7,2
> 	slli	a6,a5,1
> 	add	a6,a6,a5
> 	lui	a5,%hi(replacements)
> 	addi	t1,a5,%lo(replacements)
> 	slli	a6,a6,5
> 	lui	t4,%hi(.LANCHOR0)
> 	lui	t3,%hi(.LANCHOR0+8)
> 	lui	a3,%hi(.LANCHOR0+16)
> 	lui	a4,%hi(.LC1)
> 	vsetivli	zero,4,e16,mf2,ta,ma
> 	addi	t4,t4,%lo(.LANCHOR0)
> 	addi	t3,t3,%lo(.LANCHOR0+8)
> 	addi	a3,a3,%lo(.LANCHOR0+16)
> 	addi	a4,a4,%lo(.LC1)
> 	add	a6,t1,a6
> 	addi	a5,a5,%lo(replacements)
> 	vle16.v	v18,0(t4)
> 	vle16.v	v17,0(t3)
> 	vle16.v	v16,0(a3)
> 	vmsgeu.vi	v25,v18,4
> 	vadd.vi	v24,v18,-4
> 	vmsgeu.vi	v23,v17,4
> 	vadd.vi	v22,v17,-4
> 	vlm.v	v21,0(a4)
> 	vmsgeu.vi	v20,v16,4
> 	vadd.vi	v19,v16,-4
> 	vsetvli	zero,zero,e64,m2,ta,mu
> 	vmv.v.x	v12,a0
> 	vmv.v.x	v14,a1
> .L4:
> 	vlseg3e64.v	v6,(a5)
> 	vmseq.vv	v2,v6,v12
> 	vmseq.vv	v0,v8,v12
> 	vmsne.vv	v1,v8,v12
> 	vmand.mm	v1,v1,v2
> 	vmerge.vvm	v2,v8,v14,v0
> 	vmv1r.v	v0,v1
> 	addi	a4,a5,24
> 	vmerge.vvm	v6,v6,v14,v0
> 	vmerge.vim	v2,v2,0,v0
> 	vrgatherei16.vv	v4,v6,v18
> 	vmv1r.v	v0,v25
> 	vrgatherei16.vv	v4,v2,v24,v0.t
> 	vs1r.v	v4,0(a5)
> 	addi	a3,a5,48
> 	vmv1r.v	v0,v21
> 	vmv2r.v	v4,v2
> 	vcompress.vm	v4,v6,v0
> 	vs1r.v	v4,0(a4)
> 	vmv1r.v	v0,v23
> 	addi	a4,a5,72
> 	vrgatherei16.vv	v4,v6,v17
> 	vrgatherei16.vv	v4,v2,v22,v0.t
> 	vs1r.v	v4,0(a3)
> 	vmv1r.v	v0,v20
> 	vrgatherei16.vv	v4,v6,v16
> 	addi	a5,a5,96
> 	vrgatherei16.vv	v4,v2,v19,v0.t
> 	vs1r.v	v4,0(a4)
> 	bne	a6,a5,.L4
> 
> No spillings, no "sp" register used.
> 
> Tested on both RV32 and RV64, no regression.
> 
> Ok for trunk ?
> 
> 	PR target/113112
> 
> gcc/ChangeLog:
> 
> 	* config/riscv/riscv-vector-costs.cc (compute_nregs_for_mode): Fix pointer type liveness count.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.dg/vect/costmodel/riscv/rvv/pr113112-4.c: New test.
> 
> ---
>   gcc/config/riscv/riscv-vector-costs.cc        | 12 ++++++--
>   .../vect/costmodel/riscv/rvv/pr113112-4.c     | 28 +++++++++++++++++++
>   2 files changed, 37 insertions(+), 3 deletions(-)
>   create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113112-4.c
> 
> diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc
> index 0c485dc4f29..b41a79429d4 100644
> --- a/gcc/config/riscv/riscv-vector-costs.cc
> +++ b/gcc/config/riscv/riscv-vector-costs.cc
> @@ -277,9 +277,12 @@ compute_local_live_ranges (
>   	    {
>   	      unsigned int point = program_point.point;
>   	      gimple *stmt = program_point.stmt;
> +	      stmt_vec_info stmt_info = program_point.stmt_info;
>   	      tree lhs = gimple_get_lhs (stmt);
>   	      if (lhs != NULL_TREE && is_gimple_reg (lhs)
> -		  && !POINTER_TYPE_P (TREE_TYPE (lhs)))
> +		  && (!POINTER_TYPE_P (TREE_TYPE (lhs))
> +		      || STMT_VINFO_TYPE (vect_stmt_to_vectorize (stmt_info))
> +			   != store_vec_info_type))
>   		{
>   		  biggest_mode = get_biggest_mode (biggest_mode,
>   						   TYPE_MODE (TREE_TYPE (lhs)));
> @@ -305,7 +308,10 @@ compute_local_live_ranges (
>   		     the future.  */
>   		  if (poly_int_tree_p (var)
>   		      || (is_gimple_val (var)
> -			  && !POINTER_TYPE_P (TREE_TYPE (var))))
> +			  && (!POINTER_TYPE_P (TREE_TYPE (var))
> +			      || STMT_VINFO_TYPE (
> +				   vect_stmt_to_vectorize (stmt_info))
> +				   != load_vec_info_type)))
>   		    {
>   		      biggest_mode
>   			= get_biggest_mode (biggest_mode,
Just a nit.  Why not compute vect_stmt_to_vectorize (stmt_info) into a 
local to improve the bad line break?  Or perhaps even compute 
STMT_VINFO_TYPE (...) into a local?

OK with or without a change for that nit.

jeff

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2023-12-31 17:54 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-12-29  1:21 [PATCH] RISC-V: Count pointer type SSA into RVV regs liveness for dynamic LMUL cost model Juzhe-Zhong
2023-12-31 17:54 ` Jeff Law

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).