public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] RISC-V: Teach liveness estimation be aware of .vi variant
@ 2024-01-04  8:27 Juzhe-Zhong
  2024-01-04  8:45 ` Kito Cheng
  0 siblings, 1 reply; 2+ messages in thread
From: Juzhe-Zhong @ 2024-01-04  8:27 UTC (permalink / raw)
  To: gcc-patches; +Cc: kito.cheng, kito.cheng, jeffreyalaw, rdapp.gcc, Juzhe-Zhong

Consider this following case:

void
f (int *restrict a, int *restrict b, int *restrict c, int *restrict d, int n)
{
  for (int i = 0; i < n; i++)
    {
      int tmp = b[i] + 15;
      int tmp2 = tmp + b[i];
      c[i] = tmp2 + b[i];
      d[i] = tmp + tmp2 + b[i];
    }
}

Current dynamic LMUL cost model choose LMUL = 4 because we count the "15" as
consuming 1 vector register group which is not accurate.

We teach the dynamic LMUL cost model be aware of the potential vi variant instructions
transformation, so that we can choose LMUL = 8 according to more accurate cost model.

After this patch:

f:
	ble	a4,zero,.L5
.L3:
	vsetvli	a5,a4,e32,m8,ta,ma
	slli	a0,a5,2
	vle32.v	v16,0(a1)
	vadd.vi	v24,v16,15
	vadd.vv	v8,v24,v16
	vadd.vv	v0,v8,v16
	vse32.v	v0,0(a2)
	vadd.vv	v8,v8,v24
	vadd.vv	v8,v8,v16
	vse32.v	v8,0(a3)
	add	a1,a1,a0
	add	a2,a2,a0
	add	a3,a3,a0
	sub	a4,a4,a5
	bne	a4,zero,.L3
.L5:
	ret

Tested on both RV32 and RV64 no regression. Ok for trunk ?

gcc/ChangeLog:

	* config/riscv/riscv-vector-costs.cc (variable_vectorized_p): Teach vi variant.

gcc/testsuite/ChangeLog:

	* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-13.c: New test.

---
 gcc/config/riscv/riscv-vector-costs.cc        | 30 ++++++--
 .../costmodel/riscv/rvv/dynamic-lmul8-13.c    | 74 +++++++++++++++++++
 2 files changed, 97 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-13.c

diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc
index 21f8a81c89c..7f083b04edd 100644
--- a/gcc/config/riscv/riscv-vector-costs.cc
+++ b/gcc/config/riscv/riscv-vector-costs.cc
@@ -255,6 +255,29 @@ variable_vectorized_p (stmt_vec_info stmt_info, tree var, bool lhs_p)
 	    return false;
 	}
     }
+  else if (is_gimple_assign (stmt))
+    {
+      tree_code tcode = gimple_assign_rhs_code (stmt);
+      /* vi variant doesn't need to allocate such statement.
+	 E.g. tmp_15 = _4 + 1; will be transformed into vadd.vi
+	 so the INTEGER_CST '1' doesn't need vector a register.  */
+      switch (tcode)
+	{
+	case PLUS_EXPR:
+	case BIT_IOR_EXPR:
+	case BIT_XOR_EXPR:
+	case BIT_AND_EXPR:
+	  return TREE_CODE (var) != INTEGER_CST
+		 || !IN_RANGE (tree_to_shwi (var), -16, 15);
+	case MINUS_EXPR:
+	  return TREE_CODE (var) != INTEGER_CST
+		 || !IN_RANGE (tree_to_shwi (var), -16, 15)
+		 || gimple_assign_rhs1 (stmt) != var;
+	default:
+	  break;
+	}
+    }
+
   if (lhs_p)
     return is_gimple_reg (var)
 	   && (!POINTER_TYPE_P (TREE_TYPE (var))
@@ -331,13 +354,6 @@ compute_local_live_ranges (
 	      for (i = 0; i < gimple_num_args (stmt); i++)
 		{
 		  tree var = gimple_arg (stmt, i);
-		  /* Both IMM and REG are included since a VECTOR_CST may be
-		     potentially held in a vector register.  However, it's not
-		     accurate, since a PLUS_EXPR can be vectorized into vadd.vi
-		     if IMM is -16 ~ 15.
-
-		     TODO: We may elide the cases that the unnecessary IMM in
-		     the future.  */
 		  if (variable_vectorized_p (program_point.stmt_info, var,
 					     false))
 		    {
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-13.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-13.c
new file mode 100644
index 00000000000..baef4e39014
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-13.c
@@ -0,0 +1,74 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize --param riscv-autovec-lmul=dynamic -fdump-tree-vect-details" } */
+
+void
+f (int *restrict a, int *restrict b, int *restrict c, int *restrict d, int n)
+{
+  for (int i = 0; i < n; i++)
+    {
+      int tmp = b[i] + 15;
+      int tmp2 = tmp + b[i];
+      c[i] = tmp2 + b[i];
+      d[i] = tmp + tmp2 + b[i];
+    }
+}
+
+void
+f2 (int *restrict a, int *restrict b, int *restrict c, int *restrict d, int n)
+{
+  for (int i = 0; i < n; i++)
+    {
+      int tmp = 15 - b[i];
+      int tmp2 = tmp * b[i];
+      c[i] = tmp2 * b[i];
+      d[i] = tmp * tmp2 * b[i];
+    }
+}
+
+void
+f3 (int *restrict a, int *restrict b, int *restrict c, int *restrict d, int n)
+{
+  for (int i = 0; i < n; i++)
+    {
+      int tmp = b[i] & 15;
+      int tmp2 = tmp * b[i];
+      c[i] = tmp2 * b[i];
+      d[i] = tmp * tmp2 * b[i];
+    }
+}
+
+void
+f4 (int *restrict a, int *restrict b, int *restrict c, int *restrict d, int n)
+{
+  for (int i = 0; i < n; i++)
+    {
+      int tmp = b[i] | 15;
+      int tmp2 = tmp * b[i];
+      c[i] = tmp2 * b[i];
+      d[i] = tmp * tmp2 * b[i];
+    }
+}
+
+void
+f5 (int *restrict a, int *restrict b, int *restrict c, int *restrict d, int n)
+{
+  for (int i = 0; i < n; i++)
+    {
+      int tmp = b[i] ^ 15;
+      int tmp2 = tmp * b[i];
+      c[i] = tmp2 * b[i];
+      d[i] = tmp * tmp2 * b[i];
+    }
+}
+
+/* { dg-final { scan-assembler-times {e32,m8} 5 } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-assembler-not {jr} } } */
+/* { dg-final { scan-assembler-not {e32,m4} } } */
+/* { dg-final { scan-assembler-not {e32,m2} } } */
+/* { dg-final { scan-assembler-not {e32,m1} } } */
+/* { dg-final { scan-assembler-times {ret} 5 } } */
+/* { dg-final { scan-tree-dump-not "Preferring smaller LMUL loop because it has unexpected spills" "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 5 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 5 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 5 "vect" } } */
-- 
2.36.3



^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] RISC-V: Teach liveness estimation be aware of .vi variant
  2024-01-04  8:27 [PATCH] RISC-V: Teach liveness estimation be aware of .vi variant Juzhe-Zhong
@ 2024-01-04  8:45 ` Kito Cheng
  0 siblings, 0 replies; 2+ messages in thread
From: Kito Cheng @ 2024-01-04  8:45 UTC (permalink / raw)
  To: Juzhe-Zhong; +Cc: gcc-patches, kito.cheng, jeffreyalaw, rdapp.gcc

Nice catch, LGTM :)

On Thu, Jan 4, 2024 at 4:28 PM Juzhe-Zhong <juzhe.zhong@rivai.ai> wrote:
>
> Consider this following case:
>
> void
> f (int *restrict a, int *restrict b, int *restrict c, int *restrict d, int n)
> {
>   for (int i = 0; i < n; i++)
>     {
>       int tmp = b[i] + 15;
>       int tmp2 = tmp + b[i];
>       c[i] = tmp2 + b[i];
>       d[i] = tmp + tmp2 + b[i];
>     }
> }
>
> Current dynamic LMUL cost model choose LMUL = 4 because we count the "15" as
> consuming 1 vector register group which is not accurate.
>
> We teach the dynamic LMUL cost model be aware of the potential vi variant instructions
> transformation, so that we can choose LMUL = 8 according to more accurate cost model.
>
> After this patch:
>
> f:
>         ble     a4,zero,.L5
> .L3:
>         vsetvli a5,a4,e32,m8,ta,ma
>         slli    a0,a5,2
>         vle32.v v16,0(a1)
>         vadd.vi v24,v16,15
>         vadd.vv v8,v24,v16
>         vadd.vv v0,v8,v16
>         vse32.v v0,0(a2)
>         vadd.vv v8,v8,v24
>         vadd.vv v8,v8,v16
>         vse32.v v8,0(a3)
>         add     a1,a1,a0
>         add     a2,a2,a0
>         add     a3,a3,a0
>         sub     a4,a4,a5
>         bne     a4,zero,.L3
> .L5:
>         ret
>
> Tested on both RV32 and RV64 no regression. Ok for trunk ?
>
> gcc/ChangeLog:
>
>         * config/riscv/riscv-vector-costs.cc (variable_vectorized_p): Teach vi variant.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-13.c: New test.
>
> ---
>  gcc/config/riscv/riscv-vector-costs.cc        | 30 ++++++--
>  .../costmodel/riscv/rvv/dynamic-lmul8-13.c    | 74 +++++++++++++++++++
>  2 files changed, 97 insertions(+), 7 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-13.c
>
> diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc
> index 21f8a81c89c..7f083b04edd 100644
> --- a/gcc/config/riscv/riscv-vector-costs.cc
> +++ b/gcc/config/riscv/riscv-vector-costs.cc
> @@ -255,6 +255,29 @@ variable_vectorized_p (stmt_vec_info stmt_info, tree var, bool lhs_p)
>             return false;
>         }
>      }
> +  else if (is_gimple_assign (stmt))
> +    {
> +      tree_code tcode = gimple_assign_rhs_code (stmt);
> +      /* vi variant doesn't need to allocate such statement.
> +        E.g. tmp_15 = _4 + 1; will be transformed into vadd.vi
> +        so the INTEGER_CST '1' doesn't need vector a register.  */
> +      switch (tcode)
> +       {
> +       case PLUS_EXPR:
> +       case BIT_IOR_EXPR:
> +       case BIT_XOR_EXPR:
> +       case BIT_AND_EXPR:
> +         return TREE_CODE (var) != INTEGER_CST
> +                || !IN_RANGE (tree_to_shwi (var), -16, 15);
> +       case MINUS_EXPR:
> +         return TREE_CODE (var) != INTEGER_CST
> +                || !IN_RANGE (tree_to_shwi (var), -16, 15)
> +                || gimple_assign_rhs1 (stmt) != var;
> +       default:
> +         break;
> +       }
> +    }
> +
>    if (lhs_p)
>      return is_gimple_reg (var)
>            && (!POINTER_TYPE_P (TREE_TYPE (var))
> @@ -331,13 +354,6 @@ compute_local_live_ranges (
>               for (i = 0; i < gimple_num_args (stmt); i++)
>                 {
>                   tree var = gimple_arg (stmt, i);
> -                 /* Both IMM and REG are included since a VECTOR_CST may be
> -                    potentially held in a vector register.  However, it's not
> -                    accurate, since a PLUS_EXPR can be vectorized into vadd.vi
> -                    if IMM is -16 ~ 15.
> -
> -                    TODO: We may elide the cases that the unnecessary IMM in
> -                    the future.  */
>                   if (variable_vectorized_p (program_point.stmt_info, var,
>                                              false))
>                     {
> diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-13.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-13.c
> new file mode 100644
> index 00000000000..baef4e39014
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-13.c
> @@ -0,0 +1,74 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize --param riscv-autovec-lmul=dynamic -fdump-tree-vect-details" } */
> +
> +void
> +f (int *restrict a, int *restrict b, int *restrict c, int *restrict d, int n)
> +{
> +  for (int i = 0; i < n; i++)
> +    {
> +      int tmp = b[i] + 15;
> +      int tmp2 = tmp + b[i];
> +      c[i] = tmp2 + b[i];
> +      d[i] = tmp + tmp2 + b[i];
> +    }
> +}
> +
> +void
> +f2 (int *restrict a, int *restrict b, int *restrict c, int *restrict d, int n)
> +{
> +  for (int i = 0; i < n; i++)
> +    {
> +      int tmp = 15 - b[i];
> +      int tmp2 = tmp * b[i];
> +      c[i] = tmp2 * b[i];
> +      d[i] = tmp * tmp2 * b[i];
> +    }
> +}
> +
> +void
> +f3 (int *restrict a, int *restrict b, int *restrict c, int *restrict d, int n)
> +{
> +  for (int i = 0; i < n; i++)
> +    {
> +      int tmp = b[i] & 15;
> +      int tmp2 = tmp * b[i];
> +      c[i] = tmp2 * b[i];
> +      d[i] = tmp * tmp2 * b[i];
> +    }
> +}
> +
> +void
> +f4 (int *restrict a, int *restrict b, int *restrict c, int *restrict d, int n)
> +{
> +  for (int i = 0; i < n; i++)
> +    {
> +      int tmp = b[i] | 15;
> +      int tmp2 = tmp * b[i];
> +      c[i] = tmp2 * b[i];
> +      d[i] = tmp * tmp2 * b[i];
> +    }
> +}
> +
> +void
> +f5 (int *restrict a, int *restrict b, int *restrict c, int *restrict d, int n)
> +{
> +  for (int i = 0; i < n; i++)
> +    {
> +      int tmp = b[i] ^ 15;
> +      int tmp2 = tmp * b[i];
> +      c[i] = tmp2 * b[i];
> +      d[i] = tmp * tmp2 * b[i];
> +    }
> +}
> +
> +/* { dg-final { scan-assembler-times {e32,m8} 5 } } */
> +/* { dg-final { scan-assembler-not {csrr} } } */
> +/* { dg-final { scan-assembler-not {jr} } } */
> +/* { dg-final { scan-assembler-not {e32,m4} } } */
> +/* { dg-final { scan-assembler-not {e32,m2} } } */
> +/* { dg-final { scan-assembler-not {e32,m1} } } */
> +/* { dg-final { scan-assembler-times {ret} 5 } } */
> +/* { dg-final { scan-tree-dump-not "Preferring smaller LMUL loop because it has unexpected spills" "vect" } } */
> +/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 5 "vect" } } */
> +/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 5 "vect" } } */
> +/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 5 "vect" } } */
> --
> 2.36.3
>
>

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2024-01-04  8:45 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-01-04  8:27 [PATCH] RISC-V: Teach liveness estimation be aware of .vi variant Juzhe-Zhong
2024-01-04  8:45 ` Kito Cheng

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).