[PATCH v1] RISC-V: Bugfix for the const vector in single steps

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH v1] RISC-V: Bugfix for the const vector in single steps
@ 2023-12-20  2:39 pan2.li
  2023-12-20  2:50 ` juzhe.zhong
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: pan2.li @ 2023-12-20  2:39 UTC (permalink / raw)
  To: gcc-patches; +Cc: juzhe.zhong, pan2.li, yanzhang.wang, kito.cheng

From: Pan Li <pan2.li@intel.com>

For generating the const vector with single step, we have code
gen similar as below.  We have npatterns = 4.

v1= {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8... }

v2 (diff) = {3 - 0, 2 - 1, 1 - 2, 0 - 3, 7 - 4, 6 - 5, 5 - 6, 4 - 7...}
          = {3, 1, -1, 3, 3, 1, -1, 3 ...}

v1 = vd + vid.

But this requires the diff is npattern size repeated like {3, 1, -1, 3}
as above. And it cannot take care of single step as below:

{ -4, 4, -4 + 1, 4 + 1, -4 + 2, 4 + 2, -4 + 3, 4 + 3, ...

This patch would like to add the restriction to above code gen and
implement one for the general case.

gcc/ChangeLog:

	* config/riscv/riscv-v.cc (expand_const_vector): Add restriction
	for the vid-diff code gen and implement general one.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/autovec/bug-7.c: New test.

Signed-off-by: Pan Li <pan2.li@intel.com>
---
 gcc/config/riscv/riscv-v.cc                   | 73 +++++++++++++++----
 .../gcc.target/riscv/rvv/autovec/bug-7.c      | 61 ++++++++++++++++
 2 files changed, 119 insertions(+), 15 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 486f5deb296..946588b7b1f 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1257,24 +1257,67 @@ expand_const_vector (rtx target, rtx src)
 	  else
 	    {
 	      /* Generate the variable-length vector following this rule:
-		 { a, b, a, b, a + step, b + step, a + step*2, b + step*2, ...}
-		   E.g. { 3, 2, 1, 0, 7, 6, 5, 4, ... } */
-	      /* Step 2: Generate diff = TARGET - VID:
-		 { 3-0, 2-1, 1-2, 0-3, 7-4, 6-5, 5-6, 4-7, ... }*/
+		{ a, b, a + step, b + step, a + step*2, b + step*2, ... }  */
 	      rvv_builder v (builder.mode (), builder.npatterns (), 1);
-	      for (unsigned int i = 0; i < v.npatterns (); ++i)
+	      poly_int64 ele_0 = rtx_to_poly_int64 (builder.elt (0));
+	      poly_int64 ele_n
+		= rtx_to_poly_int64 (builder.elt (v.npatterns ()));
+
+	      if (known_eq (ele_0 - 0, ele_n - v.npatterns ()))
+		{
+		  /* Case 1: For example as below:
+		     {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8... }
+		     We have 3 - 0 = 3 equals 7 - 4 = 3, the sequence is
+		     repeated as below after minus vid.
+		     {3, 1, -1, -3, 3, 1, -1, -3...}
+		     Then we can simplify the diff code gen to at most
+		     npatterns().  */
+
+		  /* Step 1: Generate diff = TARGET - VID.  */
+		  for (unsigned int i = 0; i < v.npatterns (); ++i)
+		    {
+		     poly_int64 diff = rtx_to_poly_int64 (builder.elt (i)) - i;
+		     v.quick_push (gen_int_mode (diff, v.inner_mode ()));
+		    }
+
+		  /* Step 2: Generate result = VID + diff.  */
+		  rtx vec = v.build ();
+		  rtx add_ops[] = {target, vid, vec};
+		  emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
+				   BINARY_OP, add_ops);
+		}
+	      else
 		{
-		  /* Calculate the diff between the target sequence and
-		     vid sequence.  The elt (i) can be either const_int or
-		     const_poly_int. */
-		  poly_int64 diff = rtx_to_poly_int64 (builder.elt (i)) - i;
-		  v.quick_push (gen_int_mode (diff, v.inner_mode ()));
+		  /* Case 2: For example as below:
+		     { -4, 4, -4 + 1, 4 + 1, -4 + 2, 4 + 2, -4 + 3, 4 + 3, ... }
+		   */
+
+		  /* Step 1: Generate { a, b, a, b, ... }  */
+		  for (unsigned int i = 0; i < v.npatterns (); ++i)
+		    v.quick_push (builder.elt (i));
+		  rtx new_base = v.build ();
+
+		  /* Step 2: Generate tmp = VID >> LOG2 (NPATTERNS).  */
+		  rtx shift_count
+		    = gen_int_mode (exact_log2 (builder.npatterns ()),
+				    builder.inner_mode ());
+		  rtx tmp = expand_simple_binop (builder.mode (), LSHIFTRT,
+						 vid, shift_count, NULL_RTX,
+						 false, OPTAB_DIRECT);
+
+		  /* Step 3: Generate tmp2 = tmp * step.  */
+		  rtx tmp2 = gen_reg_rtx (builder.mode ());
+		  rtx step
+		    = simplify_binary_operation (MINUS, builder.inner_mode (),
+						 builder.elt (v.npatterns()),
+						 builder.elt (0));
+		  expand_vec_series (tmp2, const0_rtx, step, tmp);
+
+		  /* Step 4: Generate target = tmp2 + new_base.  */
+		  rtx add_ops[] = {target, tmp2, new_base};
+		  emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
+				   BINARY_OP, add_ops);
 		}
-	      /* Step 2: Generate result = VID + diff.  */
-	      rtx vec = v.build ();
-	      rtx add_ops[] = {target, vid, vec};
-	      emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
-				BINARY_OP, add_ops);
 	    }
 	}
       else if (builder.interleaved_stepped_npatterns_p ())
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c
new file mode 100644
index 00000000000..9acac391f65
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c
@@ -0,0 +1,61 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99 -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+#define N 4
+struct C { int l, r; };
+struct C a[N], b[N], c[N];
+struct C a1[N], b1[N], c1[N];
+
+void __attribute__((noinline))
+init_data_vec (struct C * __restrict a, struct C * __restrict b,
+	       struct C * __restrict c)
+{
+  int i;
+
+  for (i = 0; i < N; ++i)
+    {
+      a[i].l = N - i;
+      a[i].r = i - N;
+
+      b[i].l = i - N;
+      b[i].r = i + N;
+
+      c[i].l = -1 - i;
+      c[i].r = 2 * N - 1 - i;
+    }
+}
+
+int
+main ()
+{
+  int i;
+
+  init_data_vec (a, b, c);
+
+#pragma GCC novector
+  for (i = 0; i < N; ++i)
+    {
+      a1[i].l = N - i;
+      a1[i].r = i - N;
+
+      b1[i].l = i - N;
+      b1[i].r = i + N;
+
+      c1[i].l = -1 - i;
+      c1[i].r = 2 * N - 1 - i;
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      if (a[i].l != a1[i].l || a[i].r != a1[i].r)
+	__builtin_abort ();
+
+      if (b[i].l != b1[i].l || b[i].r != b1[i].r)
+	__builtin_abort ();
+
+      if (c[i].l != c1[i].l || c[i].r != c1[i].r)
+	__builtin_abort ();
+    }
+
+  return 0;
+}
-- 
2.34.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v1] RISC-V: Bugfix for the const vector in single steps
  2023-12-20  2:39 [PATCH v1] RISC-V: Bugfix for the const vector in single steps pan2.li
@ 2023-12-20  2:50 ` juzhe.zhong
  2023-12-20  4:02   ` Jeff Law
  2023-12-20  6:56 ` [PATCH v2] " pan2.li
  2023-12-20  9:35 ` [PATCH v3] " pan2.li
  2 siblings, 1 reply; 10+ messages in thread
From: juzhe.zhong @ 2023-12-20  2:50 UTC (permalink / raw)
  To: pan2.li, gcc-patches; +Cc: pan2.li, yanzhang.wang, kito.cheng

[-- Attachment #1: Type: text/plain, Size: 6420 bytes --]


+       if (known_eq (ele_0 - 0, ele_n - v.npatterns ()))


->

for (i = 0; i < v.npatterns (); )
  check each nelt of npatterns is equal to vid.


juzhe.zhong@rivai.ai
 
From: pan2.li
Date: 2023-12-20 10:39
To: gcc-patches
CC: juzhe.zhong; pan2.li; yanzhang.wang; kito.cheng
Subject: [PATCH v1] RISC-V: Bugfix for the const vector in single steps
From: Pan Li <pan2.li@intel.com>
 
For generating the const vector with single step, we have code
gen similar as below.  We have npatterns = 4.
 
v1= {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8... }
 
v2 (diff) = {3 - 0, 2 - 1, 1 - 2, 0 - 3, 7 - 4, 6 - 5, 5 - 6, 4 - 7...}
          = {3, 1, -1, 3, 3, 1, -1, 3 ...}
 
v1 = vd + vid.
 
But this requires the diff is npattern size repeated like {3, 1, -1, 3}
as above. And it cannot take care of single step as below:
 
{ -4, 4, -4 + 1, 4 + 1, -4 + 2, 4 + 2, -4 + 3, 4 + 3, ...
 
This patch would like to add the restriction to above code gen and
implement one for the general case.
 
gcc/ChangeLog:
 
* config/riscv/riscv-v.cc (expand_const_vector): Add restriction
for the vid-diff code gen and implement general one.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/rvv/autovec/bug-7.c: New test.
 
Signed-off-by: Pan Li <pan2.li@intel.com>
---
gcc/config/riscv/riscv-v.cc                   | 73 +++++++++++++++----
.../gcc.target/riscv/rvv/autovec/bug-7.c      | 61 ++++++++++++++++
2 files changed, 119 insertions(+), 15 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c
 
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 486f5deb296..946588b7b1f 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1257,24 +1257,67 @@ expand_const_vector (rtx target, rtx src)
  else
    {
      /* Generate the variable-length vector following this rule:
- { a, b, a, b, a + step, b + step, a + step*2, b + step*2, ...}
-    E.g. { 3, 2, 1, 0, 7, 6, 5, 4, ... } */
-       /* Step 2: Generate diff = TARGET - VID:
- { 3-0, 2-1, 1-2, 0-3, 7-4, 6-5, 5-6, 4-7, ... }*/
+ { a, b, a + step, b + step, a + step*2, b + step*2, ... }  */
      rvv_builder v (builder.mode (), builder.npatterns (), 1);
-       for (unsigned int i = 0; i < v.npatterns (); ++i)
+       poly_int64 ele_0 = rtx_to_poly_int64 (builder.elt (0));
+       poly_int64 ele_n
+ = rtx_to_poly_int64 (builder.elt (v.npatterns ()));
+
+       if (known_eq (ele_0 - 0, ele_n - v.npatterns ()))
+ {
+   /* Case 1: For example as below:
+      {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8... }
+      We have 3 - 0 = 3 equals 7 - 4 = 3, the sequence is
+      repeated as below after minus vid.
+      {3, 1, -1, -3, 3, 1, -1, -3...}
+      Then we can simplify the diff code gen to at most
+      npatterns().  */
+
+   /* Step 1: Generate diff = TARGET - VID.  */
+   for (unsigned int i = 0; i < v.npatterns (); ++i)
+     {
+      poly_int64 diff = rtx_to_poly_int64 (builder.elt (i)) - i;
+      v.quick_push (gen_int_mode (diff, v.inner_mode ()));
+     }
+
+   /* Step 2: Generate result = VID + diff.  */
+   rtx vec = v.build ();
+   rtx add_ops[] = {target, vid, vec};
+   emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
+    BINARY_OP, add_ops);
+ }
+       else
{
-   /* Calculate the diff between the target sequence and
-      vid sequence.  The elt (i) can be either const_int or
-      const_poly_int. */
-   poly_int64 diff = rtx_to_poly_int64 (builder.elt (i)) - i;
-   v.quick_push (gen_int_mode (diff, v.inner_mode ()));
+   /* Case 2: For example as below:
+      { -4, 4, -4 + 1, 4 + 1, -4 + 2, 4 + 2, -4 + 3, 4 + 3, ... }
+    */
+
+   /* Step 1: Generate { a, b, a, b, ... }  */
+   for (unsigned int i = 0; i < v.npatterns (); ++i)
+     v.quick_push (builder.elt (i));
+   rtx new_base = v.build ();
+
+   /* Step 2: Generate tmp = VID >> LOG2 (NPATTERNS).  */
+   rtx shift_count
+     = gen_int_mode (exact_log2 (builder.npatterns ()),
+     builder.inner_mode ());
+   rtx tmp = expand_simple_binop (builder.mode (), LSHIFTRT,
+ vid, shift_count, NULL_RTX,
+ false, OPTAB_DIRECT);
+
+   /* Step 3: Generate tmp2 = tmp * step.  */
+   rtx tmp2 = gen_reg_rtx (builder.mode ());
+   rtx step
+     = simplify_binary_operation (MINUS, builder.inner_mode (),
+ builder.elt (v.npatterns()),
+ builder.elt (0));
+   expand_vec_series (tmp2, const0_rtx, step, tmp);
+
+   /* Step 4: Generate target = tmp2 + new_base.  */
+   rtx add_ops[] = {target, tmp2, new_base};
+   emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
+    BINARY_OP, add_ops);
}
-       /* Step 2: Generate result = VID + diff.  */
-       rtx vec = v.build ();
-       rtx add_ops[] = {target, vid, vec};
-       emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
- BINARY_OP, add_ops);
    }
}
       else if (builder.interleaved_stepped_npatterns_p ())
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c
new file mode 100644
index 00000000000..9acac391f65
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c
@@ -0,0 +1,61 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99 -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+#define N 4
+struct C { int l, r; };
+struct C a[N], b[N], c[N];
+struct C a1[N], b1[N], c1[N];
+
+void __attribute__((noinline))
+init_data_vec (struct C * __restrict a, struct C * __restrict b,
+        struct C * __restrict c)
+{
+  int i;
+
+  for (i = 0; i < N; ++i)
+    {
+      a[i].l = N - i;
+      a[i].r = i - N;
+
+      b[i].l = i - N;
+      b[i].r = i + N;
+
+      c[i].l = -1 - i;
+      c[i].r = 2 * N - 1 - i;
+    }
+}
+
+int
+main ()
+{
+  int i;
+
+  init_data_vec (a, b, c);
+
+#pragma GCC novector
+  for (i = 0; i < N; ++i)
+    {
+      a1[i].l = N - i;
+      a1[i].r = i - N;
+
+      b1[i].l = i - N;
+      b1[i].r = i + N;
+
+      c1[i].l = -1 - i;
+      c1[i].r = 2 * N - 1 - i;
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      if (a[i].l != a1[i].l || a[i].r != a1[i].r)
+ __builtin_abort ();
+
+      if (b[i].l != b1[i].l || b[i].r != b1[i].r)
+ __builtin_abort ();
+
+      if (c[i].l != c1[i].l || c[i].r != c1[i].r)
+ __builtin_abort ();
+    }
+
+  return 0;
+}
-- 
2.34.1
 
 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v1] RISC-V: Bugfix for the const vector in single steps
  2023-12-20  2:50 ` juzhe.zhong
@ 2023-12-20  4:02   ` Jeff Law
  2023-12-20  4:29     ` Li, Pan2
  0 siblings, 1 reply; 10+ messages in thread
From: Jeff Law @ 2023-12-20  4:02 UTC (permalink / raw)
  To: juzhe.zhong, pan2.li, gcc-patches; +Cc: yanzhang.wang, kito.cheng



On 12/19/23 19:50, juzhe.zhong@rivai.ai wrote:
> 
> +       if (known_eq (ele_0 - 0, ele_n - v.npatterns ()))
> 
> 
> ->
> 
> for (i = 0; i < v.npatterns (); )
>    check each nelt of npatterns is equal to vid.
Pan -- please indicate what testing was performed.  The standard is to 
test with and without the patch to verify there are no regressions.  You 
don't have to test every multilib or anything like that.  Just pick a 
configuration and test it.

No patch should be committed to the tree without this basic information. 
  We've been lax on that policy, but that really needs to change.

jeff

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH v1] RISC-V: Bugfix for the const vector in single steps
  2023-12-20  4:02   ` Jeff Law
@ 2023-12-20  4:29     ` Li, Pan2
  0 siblings, 0 replies; 10+ messages in thread
From: Li, Pan2 @ 2023-12-20  4:29 UTC (permalink / raw)
  To: Jeff Law, juzhe.zhong, gcc-patches; +Cc: Wang, Yanzhang, kito.cheng

Oh, I see. Thanks Jeff for suggestion, will refine the commit log in V2.

Pan

-----Original Message-----
From: Jeff Law <jeffreyalaw@gmail.com> 
Sent: Wednesday, December 20, 2023 12:03 PM
To: juzhe.zhong@rivai.ai; Li, Pan2 <pan2.li@intel.com>; gcc-patches <gcc-patches@gcc.gnu.org>
Cc: Wang, Yanzhang <yanzhang.wang@intel.com>; kito.cheng <kito.cheng@gmail.com>
Subject: Re: [PATCH v1] RISC-V: Bugfix for the const vector in single steps



On 12/19/23 19:50, juzhe.zhong@rivai.ai wrote:
> 
> +       if (known_eq (ele_0 - 0, ele_n - v.npatterns ()))
> 
> 
> ->
> 
> for (i = 0; i < v.npatterns (); )
>    check each nelt of npatterns is equal to vid.
Pan -- please indicate what testing was performed.  The standard is to 
test with and without the patch to verify there are no regressions.  You 
don't have to test every multilib or anything like that.  Just pick a 
configuration and test it.

No patch should be committed to the tree without this basic information. 
  We've been lax on that policy, but that really needs to change.

jeff

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v2] RISC-V: Bugfix for the const vector in single steps
  2023-12-20  2:39 [PATCH v1] RISC-V: Bugfix for the const vector in single steps pan2.li
  2023-12-20  2:50 ` juzhe.zhong
@ 2023-12-20  6:56 ` pan2.li
  2023-12-20  6:58   ` juzhe.zhong
  2023-12-20  9:35 ` [PATCH v3] " pan2.li
  2 siblings, 1 reply; 10+ messages in thread
From: pan2.li @ 2023-12-20  6:56 UTC (permalink / raw)
  To: gcc-patches; +Cc: juzhe.zhong, pan2.li, yanzhang.wang, kito.cheng, jeffreyalaw

From: Pan Li <pan2.li@intel.com>

This patch would like to fix the below execution failure.

FAIL: gcc.dg/vect/pr92420.c -flto -ffat-lto-objects execution test

The will be one single step const vector like { -4, 4, -3, 5, -2, 6, -1, 7, ...}.
For such const vector generation with single step, we will generate vid
+ diff here. For example as below, given npatterns = 4.

v1= {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8... }
v2 (diff) = {3 - 0, 2 - 1, 1 - 2, 0 - 3, 7 - 4, 6 - 5, 5 - 6, 4 - 7...}
          = {3, 1, -1, 3, 3, 1, -1, 3 ...}
v1 = vd + vid.

Unfortunately, that cannot work well for { -4, 4, -3, 5, -2, 6, -1, 7, ...}
because it has one implicit requirement for the diff. Aka, the diff
sequence in npattern are repeated. For example the v2 (diff) as above.

The diff between { -4, 4, -3, 5, -2, 6, -1, 7, ...} and vid are not
npattern size repeated and then we have wrong code here. We implement
one new code gen the sequence like { -4, 4, -3, 5, -2, 6, -1, 7, ...}.

The below tests are passed for this patch.

* The RV64 regression test with rv64gcv configuration.
* The run test gcc.dg/vect/pr92420.c for below configurations.

riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8/--param=riscv-autovec-preference=fixed-vlmax

gcc/ChangeLog:

	* config/riscv/riscv-v.cc (expand_const_vector): Add restriction
	for the vid-diff code gen and implement general one.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/autovec/bug-7.c: New test.

Signed-off-by: Pan Li <pan2.li@intel.com>
---
 gcc/config/riscv/riscv-v.cc                   | 84 +++++++++++++++----
 .../gcc.target/riscv/rvv/autovec/bug-7.c      | 61 ++++++++++++++
 2 files changed, 130 insertions(+), 15 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 486f5deb296..5a5899e85ae 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1257,24 +1257,78 @@ expand_const_vector (rtx target, rtx src)
 	  else
 	    {
 	      /* Generate the variable-length vector following this rule:
-		 { a, b, a, b, a + step, b + step, a + step*2, b + step*2, ...}
-		   E.g. { 3, 2, 1, 0, 7, 6, 5, 4, ... } */
-	      /* Step 2: Generate diff = TARGET - VID:
-		 { 3-0, 2-1, 1-2, 0-3, 7-4, 6-5, 5-6, 4-7, ... }*/
+		{ a, b, a + step, b + step, a + step*2, b + step*2, ... }  */
 	      rvv_builder v (builder.mode (), builder.npatterns (), 1);
-	      for (unsigned int i = 0; i < v.npatterns (); ++i)
+	      bool diff_seq_repeated_p = true;
+
+	      for (unsigned i = 0; i < v.npatterns (); i++)
+		{
+		  poly_int64 diff_0 = rtx_to_poly_int64 (builder.elt (i)) - i;
+		  poly_int64 diff_1 = rtx_to_poly_int64 (
+		    builder.elt (v.npatterns () + i)) - v.npatterns () - i;
+
+		  if (maybe_ne (diff_0, diff_1))
+		    {
+		      diff_seq_repeated_p = false;
+		      break;
+		    }
+		}
+
+	      if (diff_seq_repeated_p)
+		{
+		  /* Case 1: For example as below:
+		     {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8... }
+		     We have 3 - 0 = 3 equals 7 - 4 = 3, the sequence is
+		     repeated as below after minus vid.
+		     {3, 1, -1, -3, 3, 1, -1, -3...}
+		     Then we can simplify the diff code gen to at most
+		     npatterns().  */
+
+		  /* Step 1: Generate diff = TARGET - VID.  */
+		  for (unsigned int i = 0; i < v.npatterns (); ++i)
+		    {
+		     poly_int64 diff = rtx_to_poly_int64 (builder.elt (i)) - i;
+		     v.quick_push (gen_int_mode (diff, v.inner_mode ()));
+		    }
+
+		  /* Step 2: Generate result = VID + diff.  */
+		  rtx vec = v.build ();
+		  rtx add_ops[] = {target, vid, vec};
+		  emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
+				   BINARY_OP, add_ops);
+		}
+	      else
 		{
-		  /* Calculate the diff between the target sequence and
-		     vid sequence.  The elt (i) can be either const_int or
-		     const_poly_int. */
-		  poly_int64 diff = rtx_to_poly_int64 (builder.elt (i)) - i;
-		  v.quick_push (gen_int_mode (diff, v.inner_mode ()));
+		  /* Case 2: For example as below:
+		     { -4, 4, -4 + 1, 4 + 1, -4 + 2, 4 + 2, -4 + 3, 4 + 3, ... }
+		   */
+
+		  /* Step 1: Generate { a, b, a, b, ... }  */
+		  for (unsigned int i = 0; i < v.npatterns (); ++i)
+		    v.quick_push (builder.elt (i));
+		  rtx new_base = v.build ();
+
+		  /* Step 2: Generate tmp = VID >> LOG2 (NPATTERNS).  */
+		  rtx shift_count
+		    = gen_int_mode (exact_log2 (builder.npatterns ()),
+				    builder.inner_mode ());
+		  rtx tmp = expand_simple_binop (builder.mode (), LSHIFTRT,
+						 vid, shift_count, NULL_RTX,
+						 false, OPTAB_DIRECT);
+
+		  /* Step 3: Generate tmp2 = tmp * step.  */
+		  rtx tmp2 = gen_reg_rtx (builder.mode ());
+		  rtx step
+		    = simplify_binary_operation (MINUS, builder.inner_mode (),
+						 builder.elt (v.npatterns()),
+						 builder.elt (0));
+		  expand_vec_series (tmp2, const0_rtx, step, tmp);
+
+		  /* Step 4: Generate target = tmp2 + new_base.  */
+		  rtx add_ops[] = {target, tmp2, new_base};
+		  emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
+				   BINARY_OP, add_ops);
 		}
-	      /* Step 2: Generate result = VID + diff.  */
-	      rtx vec = v.build ();
-	      rtx add_ops[] = {target, vid, vec};
-	      emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
-				BINARY_OP, add_ops);
 	    }
 	}
       else if (builder.interleaved_stepped_npatterns_p ())
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c
new file mode 100644
index 00000000000..9acac391f65
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c
@@ -0,0 +1,61 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99 -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+#define N 4
+struct C { int l, r; };
+struct C a[N], b[N], c[N];
+struct C a1[N], b1[N], c1[N];
+
+void __attribute__((noinline))
+init_data_vec (struct C * __restrict a, struct C * __restrict b,
+	       struct C * __restrict c)
+{
+  int i;
+
+  for (i = 0; i < N; ++i)
+    {
+      a[i].l = N - i;
+      a[i].r = i - N;
+
+      b[i].l = i - N;
+      b[i].r = i + N;
+
+      c[i].l = -1 - i;
+      c[i].r = 2 * N - 1 - i;
+    }
+}
+
+int
+main ()
+{
+  int i;
+
+  init_data_vec (a, b, c);
+
+#pragma GCC novector
+  for (i = 0; i < N; ++i)
+    {
+      a1[i].l = N - i;
+      a1[i].r = i - N;
+
+      b1[i].l = i - N;
+      b1[i].r = i + N;
+
+      c1[i].l = -1 - i;
+      c1[i].r = 2 * N - 1 - i;
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      if (a[i].l != a1[i].l || a[i].r != a1[i].r)
+	__builtin_abort ();
+
+      if (b[i].l != b1[i].l || b[i].r != b1[i].r)
+	__builtin_abort ();
+
+      if (c[i].l != c1[i].l || c[i].r != c1[i].r)
+	__builtin_abort ();
+    }
+
+  return 0;
+}
-- 
2.34.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] RISC-V: Bugfix for the const vector in single steps
  2023-12-20  6:56 ` [PATCH v2] " pan2.li
@ 2023-12-20  6:58   ` juzhe.zhong
  2023-12-20  7:01     ` Li, Pan2
  0 siblings, 1 reply; 10+ messages in thread
From: juzhe.zhong @ 2023-12-20  6:58 UTC (permalink / raw)
  To: pan2.li, gcc-patches; +Cc: pan2.li, yanzhang.wang, kito.cheng, jeffreyalaw

[-- Attachment #1: Type: text/plain, Size: 10239 bytes --]

+	      bool diff_seq_repeated_p = true;
+
+	      for (unsigned i = 0; i < v.npatterns (); i++)
+		{
+		  poly_int64 diff_0 = rtx_to_poly_int64 (builder.elt (i)) - i;
+		  poly_int64 diff_1 = rtx_to_poly_int64 (
+		    builder.elt (v.npatterns () + i)) - v.npatterns () - i;
+
+		  if (maybe_ne (diff_0, diff_1))
+		    {
+		      diff_seq_repeated_p = false;
+		      break;
+		    }
+		}

This code should be wrapped as a helper function inside class rvv_builder


juzhe.zhong@rivai.ai
 
From: pan2.li
Date: 2023-12-20 14:56
To: gcc-patches
CC: juzhe.zhong; pan2.li; yanzhang.wang; kito.cheng; jeffreyalaw
Subject: [PATCH v2] RISC-V: Bugfix for the const vector in single steps
From: Pan Li <pan2.li@intel.com>
 
This patch would like to fix the below execution failure.
 
FAIL: gcc.dg/vect/pr92420.c -flto -ffat-lto-objects execution test
 
The will be one single step const vector like { -4, 4, -3, 5, -2, 6, -1, 7, ...}.
For such const vector generation with single step, we will generate vid
+ diff here. For example as below, given npatterns = 4.
 
v1= {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8... }
v2 (diff) = {3 - 0, 2 - 1, 1 - 2, 0 - 3, 7 - 4, 6 - 5, 5 - 6, 4 - 7...}
          = {3, 1, -1, 3, 3, 1, -1, 3 ...}
v1 = vd + vid.
 
Unfortunately, that cannot work well for { -4, 4, -3, 5, -2, 6, -1, 7, ...}
because it has one implicit requirement for the diff. Aka, the diff
sequence in npattern are repeated. For example the v2 (diff) as above.
 
The diff between { -4, 4, -3, 5, -2, 6, -1, 7, ...} and vid are not
npattern size repeated and then we have wrong code here. We implement
one new code gen the sequence like { -4, 4, -3, 5, -2, 6, -1, 7, ...}.
 
The below tests are passed for this patch.
 
* The RV64 regression test with rv64gcv configuration.
* The run test gcc.dg/vect/pr92420.c for below configurations.
 
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8/--param=riscv-autovec-preference=fixed-vlmax
 
gcc/ChangeLog:
 
* config/riscv/riscv-v.cc (expand_const_vector): Add restriction
for the vid-diff code gen and implement general one.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/rvv/autovec/bug-7.c: New test.
 
Signed-off-by: Pan Li <pan2.li@intel.com>
---
gcc/config/riscv/riscv-v.cc                   | 84 +++++++++++++++----
.../gcc.target/riscv/rvv/autovec/bug-7.c      | 61 ++++++++++++++
2 files changed, 130 insertions(+), 15 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c
 
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 486f5deb296..5a5899e85ae 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1257,24 +1257,78 @@ expand_const_vector (rtx target, rtx src)
  else
    {
      /* Generate the variable-length vector following this rule:
- { a, b, a, b, a + step, b + step, a + step*2, b + step*2, ...}
-    E.g. { 3, 2, 1, 0, 7, 6, 5, 4, ... } */
-       /* Step 2: Generate diff = TARGET - VID:
- { 3-0, 2-1, 1-2, 0-3, 7-4, 6-5, 5-6, 4-7, ... }*/
+ { a, b, a + step, b + step, a + step*2, b + step*2, ... }  */
      rvv_builder v (builder.mode (), builder.npatterns (), 1);
-       for (unsigned int i = 0; i < v.npatterns (); ++i)
+       bool diff_seq_repeated_p = true;
+
+       for (unsigned i = 0; i < v.npatterns (); i++)
+ {
+   poly_int64 diff_0 = rtx_to_poly_int64 (builder.elt (i)) - i;
+   poly_int64 diff_1 = rtx_to_poly_int64 (
+     builder.elt (v.npatterns () + i)) - v.npatterns () - i;
+
+   if (maybe_ne (diff_0, diff_1))
+     {
+       diff_seq_repeated_p = false;
+       break;
+     }
+ }
+
+       if (diff_seq_repeated_p)
+ {
+   /* Case 1: For example as below:
+      {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8... }
+      We have 3 - 0 = 3 equals 7 - 4 = 3, the sequence is
+      repeated as below after minus vid.
+      {3, 1, -1, -3, 3, 1, -1, -3...}
+      Then we can simplify the diff code gen to at most
+      npatterns().  */
+
+   /* Step 1: Generate diff = TARGET - VID.  */
+   for (unsigned int i = 0; i < v.npatterns (); ++i)
+     {
+      poly_int64 diff = rtx_to_poly_int64 (builder.elt (i)) - i;
+      v.quick_push (gen_int_mode (diff, v.inner_mode ()));
+     }
+
+   /* Step 2: Generate result = VID + diff.  */
+   rtx vec = v.build ();
+   rtx add_ops[] = {target, vid, vec};
+   emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
+    BINARY_OP, add_ops);
+ }
+       else
{
-   /* Calculate the diff between the target sequence and
-      vid sequence.  The elt (i) can be either const_int or
-      const_poly_int. */
-   poly_int64 diff = rtx_to_poly_int64 (builder.elt (i)) - i;
-   v.quick_push (gen_int_mode (diff, v.inner_mode ()));
+   /* Case 2: For example as below:
+      { -4, 4, -4 + 1, 4 + 1, -4 + 2, 4 + 2, -4 + 3, 4 + 3, ... }
+    */
+
+   /* Step 1: Generate { a, b, a, b, ... }  */
+   for (unsigned int i = 0; i < v.npatterns (); ++i)
+     v.quick_push (builder.elt (i));
+   rtx new_base = v.build ();
+
+   /* Step 2: Generate tmp = VID >> LOG2 (NPATTERNS).  */
+   rtx shift_count
+     = gen_int_mode (exact_log2 (builder.npatterns ()),
+     builder.inner_mode ());
+   rtx tmp = expand_simple_binop (builder.mode (), LSHIFTRT,
+ vid, shift_count, NULL_RTX,
+ false, OPTAB_DIRECT);
+
+   /* Step 3: Generate tmp2 = tmp * step.  */
+   rtx tmp2 = gen_reg_rtx (builder.mode ());
+   rtx step
+     = simplify_binary_operation (MINUS, builder.inner_mode (),
+ builder.elt (v.npatterns()),
+ builder.elt (0));
+   expand_vec_series (tmp2, const0_rtx, step, tmp);
+
+   /* Step 4: Generate target = tmp2 + new_base.  */
+   rtx add_ops[] = {target, tmp2, new_base};
+   emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
+    BINARY_OP, add_ops);
}
-       /* Step 2: Generate result = VID + diff.  */
-       rtx vec = v.build ();
-       rtx add_ops[] = {target, vid, vec};
-       emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
- BINARY_OP, add_ops);
    }
}
       else if (builder.interleaved_stepped_npatterns_p ())
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c
new file mode 100644
index 00000000000..9acac391f65
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c
@@ -0,0 +1,61 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99 -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+#define N 4
+struct C { int l, r; };
+struct C a[N], b[N], c[N];
+struct C a1[N], b1[N], c1[N];
+
+void __attribute__((noinline))
+init_data_vec (struct C * __restrict a, struct C * __restrict b,
+        struct C * __restrict c)
+{
+  int i;
+
+  for (i = 0; i < N; ++i)
+    {
+      a[i].l = N - i;
+      a[i].r = i - N;
+
+      b[i].l = i - N;
+      b[i].r = i + N;
+
+      c[i].l = -1 - i;
+      c[i].r = 2 * N - 1 - i;
+    }
+}
+
+int
+main ()
+{
+  int i;
+
+  init_data_vec (a, b, c);
+
+#pragma GCC novector
+  for (i = 0; i < N; ++i)
+    {
+      a1[i].l = N - i;
+      a1[i].r = i - N;
+
+      b1[i].l = i - N;
+      b1[i].r = i + N;
+
+      c1[i].l = -1 - i;
+      c1[i].r = 2 * N - 1 - i;
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      if (a[i].l != a1[i].l || a[i].r != a1[i].r)
+ __builtin_abort ();
+
+      if (b[i].l != b1[i].l || b[i].r != b1[i].r)
+ __builtin_abort ();
+
+      if (c[i].l != c1[i].l || c[i].r != c1[i].r)
+ __builtin_abort ();
+    }
+
+  return 0;
+}
-- 
2.34.1
 
 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH v2] RISC-V: Bugfix for the const vector in single steps
  2023-12-20  6:58   ` juzhe.zhong
@ 2023-12-20  7:01     ` Li, Pan2
  0 siblings, 0 replies; 10+ messages in thread
From: Li, Pan2 @ 2023-12-20  7:01 UTC (permalink / raw)
  To: juzhe.zhong, gcc-patches; +Cc: Wang, Yanzhang, kito.cheng, jeffreyalaw

[-- Attachment #1: Type: text/plain, Size: 11165 bytes --]

Thanks for reviewing, sure and will send V3 for it.

Pan

From: juzhe.zhong@rivai.ai <juzhe.zhong@rivai.ai>
Sent: Wednesday, December 20, 2023 2:59 PM
To: Li, Pan2 <pan2.li@intel.com>; gcc-patches <gcc-patches@gcc.gnu.org>
Cc: Li, Pan2 <pan2.li@intel.com>; Wang, Yanzhang <yanzhang.wang@intel.com>; kito.cheng <kito.cheng@gmail.com>; jeffreyalaw <jeffreyalaw@gmail.com>
Subject: Re: [PATCH v2] RISC-V: Bugfix for the const vector in single steps


+             bool diff_seq_repeated_p = true;

+

+             for (unsigned i = 0; i < v.npatterns (); i++)

+              {

+                poly_int64 diff_0 = rtx_to_poly_int64 (builder.elt (i)) - i;

+                poly_int64 diff_1 = rtx_to_poly_int64 (

+                  builder.elt (v.npatterns () + i)) - v.npatterns () - i;

+

+                if (maybe_ne (diff_0, diff_1))

+                  {

+                    diff_seq_repeated_p = false;

+                    break;

+                  }

+              }

This code should be wrapped as a helper function inside class rvv_builder
________________________________
juzhe.zhong@rivai.ai<mailto:juzhe.zhong@rivai.ai>

From: pan2.li<mailto:pan2.li@intel.com>
Date: 2023-12-20 14:56
To: gcc-patches<mailto:gcc-patches@gcc.gnu.org>
CC: juzhe.zhong<mailto:juzhe.zhong@rivai.ai>; pan2.li<mailto:pan2.li@intel.com>; yanzhang.wang<mailto:yanzhang.wang@intel.com>; kito.cheng<mailto:kito.cheng@gmail.com>; jeffreyalaw<mailto:jeffreyalaw@gmail.com>
Subject: [PATCH v2] RISC-V: Bugfix for the const vector in single steps
From: Pan Li <pan2.li@intel.com<mailto:pan2.li@intel.com>>

This patch would like to fix the below execution failure.

FAIL: gcc.dg/vect/pr92420.c -flto -ffat-lto-objects execution test

The will be one single step const vector like { -4, 4, -3, 5, -2, 6, -1, 7, ...}.
For such const vector generation with single step, we will generate vid
+ diff here. For example as below, given npatterns = 4.

v1= {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8... }
v2 (diff) = {3 - 0, 2 - 1, 1 - 2, 0 - 3, 7 - 4, 6 - 5, 5 - 6, 4 - 7...}
          = {3, 1, -1, 3, 3, 1, -1, 3 ...}
v1 = vd + vid.

Unfortunately, that cannot work well for { -4, 4, -3, 5, -2, 6, -1, 7, ...}
because it has one implicit requirement for the diff. Aka, the diff
sequence in npattern are repeated. For example the v2 (diff) as above.

The diff between { -4, 4, -3, 5, -2, 6, -1, 7, ...} and vid are not
npattern size repeated and then we have wrong code here. We implement
one new code gen the sequence like { -4, 4, -3, 5, -2, 6, -1, 7, ...}.

The below tests are passed for this patch.

* The RV64 regression test with rv64gcv configuration.
* The run test gcc.dg/vect/pr92420.c for below configurations.

riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8/--param=riscv-autovec-preference=fixed-vlmax

gcc/ChangeLog:

* config/riscv/riscv-v.cc (expand_const_vector): Add restriction
for the vid-diff code gen and implement general one.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/bug-7.c: New test.

Signed-off-by: Pan Li <pan2.li@intel.com<mailto:pan2.li@intel.com>>
---
gcc/config/riscv/riscv-v.cc                   | 84 +++++++++++++++----
.../gcc.target/riscv/rvv/autovec/bug-7.c      | 61 ++++++++++++++
2 files changed, 130 insertions(+), 15 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 486f5deb296..5a5899e85ae 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1257,24 +1257,78 @@ expand_const_vector (rtx target, rtx src)
  else
    {
      /* Generate the variable-length vector following this rule:
- { a, b, a, b, a + step, b + step, a + step*2, b + step*2, ...}
-    E.g. { 3, 2, 1, 0, 7, 6, 5, 4, ... } */
-       /* Step 2: Generate diff = TARGET - VID:
- { 3-0, 2-1, 1-2, 0-3, 7-4, 6-5, 5-6, 4-7, ... }*/
+ { a, b, a + step, b + step, a + step*2, b + step*2, ... }  */
      rvv_builder v (builder.mode (), builder.npatterns (), 1);
-       for (unsigned int i = 0; i < v.npatterns (); ++i)
+       bool diff_seq_repeated_p = true;
+
+       for (unsigned i = 0; i < v.npatterns (); i++)
+ {
+   poly_int64 diff_0 = rtx_to_poly_int64 (builder.elt (i)) - i;
+   poly_int64 diff_1 = rtx_to_poly_int64 (
+     builder.elt (v.npatterns () + i)) - v.npatterns () - i;
+
+   if (maybe_ne (diff_0, diff_1))
+     {
+       diff_seq_repeated_p = false;
+       break;
+     }
+ }
+
+       if (diff_seq_repeated_p)
+ {
+   /* Case 1: For example as below:
+      {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8... }
+      We have 3 - 0 = 3 equals 7 - 4 = 3, the sequence is
+      repeated as below after minus vid.
+      {3, 1, -1, -3, 3, 1, -1, -3...}
+      Then we can simplify the diff code gen to at most
+      npatterns().  */
+
+   /* Step 1: Generate diff = TARGET - VID.  */
+   for (unsigned int i = 0; i < v.npatterns (); ++i)
+     {
+      poly_int64 diff = rtx_to_poly_int64 (builder.elt (i)) - i;
+      v.quick_push (gen_int_mode (diff, v.inner_mode ()));
+     }
+
+   /* Step 2: Generate result = VID + diff.  */
+   rtx vec = v.build ();
+   rtx add_ops[] = {target, vid, vec};
+   emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
+    BINARY_OP, add_ops);
+ }
+       else
{
-   /* Calculate the diff between the target sequence and
-      vid sequence.  The elt (i) can be either const_int or
-      const_poly_int. */
-   poly_int64 diff = rtx_to_poly_int64 (builder.elt (i)) - i;
-   v.quick_push (gen_int_mode (diff, v.inner_mode ()));
+   /* Case 2: For example as below:
+      { -4, 4, -4 + 1, 4 + 1, -4 + 2, 4 + 2, -4 + 3, 4 + 3, ... }
+    */
+
+   /* Step 1: Generate { a, b, a, b, ... }  */
+   for (unsigned int i = 0; i < v.npatterns (); ++i)
+     v.quick_push (builder.elt (i));
+   rtx new_base = v.build ();
+
+   /* Step 2: Generate tmp = VID >> LOG2 (NPATTERNS).  */
+   rtx shift_count
+     = gen_int_mode (exact_log2 (builder.npatterns ()),
+     builder.inner_mode ());
+   rtx tmp = expand_simple_binop (builder.mode (), LSHIFTRT,
+ vid, shift_count, NULL_RTX,
+ false, OPTAB_DIRECT);
+
+   /* Step 3: Generate tmp2 = tmp * step.  */
+   rtx tmp2 = gen_reg_rtx (builder.mode ());
+   rtx step
+     = simplify_binary_operation (MINUS, builder.inner_mode (),
+ builder.elt (v.npatterns()),
+ builder.elt (0));
+   expand_vec_series (tmp2, const0_rtx, step, tmp);
+
+   /* Step 4: Generate target = tmp2 + new_base.  */
+   rtx add_ops[] = {target, tmp2, new_base};
+   emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
+    BINARY_OP, add_ops);
}
-       /* Step 2: Generate result = VID + diff.  */
-       rtx vec = v.build ();
-       rtx add_ops[] = {target, vid, vec};
-       emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
- BINARY_OP, add_ops);
    }
}
       else if (builder.interleaved_stepped_npatterns_p ())
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c
new file mode 100644
index 00000000000..9acac391f65
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c
@@ -0,0 +1,61 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99 -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+#define N 4
+struct C { int l, r; };
+struct C a[N], b[N], c[N];
+struct C a1[N], b1[N], c1[N];
+
+void __attribute__((noinline))
+init_data_vec (struct C * __restrict a, struct C * __restrict b,
+        struct C * __restrict c)
+{
+  int i;
+
+  for (i = 0; i < N; ++i)
+    {
+      a[i].l = N - i;
+      a[i].r = i - N;
+
+      b[i].l = i - N;
+      b[i].r = i + N;
+
+      c[i].l = -1 - i;
+      c[i].r = 2 * N - 1 - i;
+    }
+}
+
+int
+main ()
+{
+  int i;
+
+  init_data_vec (a, b, c);
+
+#pragma GCC novector
+  for (i = 0; i < N; ++i)
+    {
+      a1[i].l = N - i;
+      a1[i].r = i - N;
+
+      b1[i].l = i - N;
+      b1[i].r = i + N;
+
+      c1[i].l = -1 - i;
+      c1[i].r = 2 * N - 1 - i;
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      if (a[i].l != a1[i].l || a[i].r != a1[i].r)
+ __builtin_abort ();
+
+      if (b[i].l != b1[i].l || b[i].r != b1[i].r)
+ __builtin_abort ();
+
+      if (c[i].l != c1[i].l || c[i].r != c1[i].r)
+ __builtin_abort ();
+    }
+
+  return 0;
+}
--
2.34.1



^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v3] RISC-V: Bugfix for the const vector in single steps
  2023-12-20  2:39 [PATCH v1] RISC-V: Bugfix for the const vector in single steps pan2.li
  2023-12-20  2:50 ` juzhe.zhong
  2023-12-20  6:56 ` [PATCH v2] " pan2.li
@ 2023-12-20  9:35 ` pan2.li
  2023-12-20  9:36   ` juzhe.zhong
  2 siblings, 1 reply; 10+ messages in thread
From: pan2.li @ 2023-12-20  9:35 UTC (permalink / raw)
  To: gcc-patches; +Cc: juzhe.zhong, pan2.li, yanzhang.wang, kito.cheng, jeffreyalaw

From: Pan Li <pan2.li@intel.com>

This patch would like to fix the below execution failure when build with
"-march=rv64gcv_zvl512b -mabi=lp64d -mcmodel=medlow --param=riscv-autovec-lmul=m8 -ftree-vectorize -fno-vect-cost-model -O3"

FAIL: gcc.dg/vect/pr92420.c -flto -ffat-lto-objects execution test

The will be one single step const vector like { -4, 4, -3, 5, -2, 6, -1, 7, ...}.
For such const vector generation with single step, we will generate vid
+ diff here. For example as below, given npatterns = 4.

v1= {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8... }
v2 (diff) = {3 - 0, 2 - 1, 1 - 2, 0 - 3, 7 - 4, 6 - 5, 5 - 6, 4 - 7...}
          = {3, 1, -1, 3, 3, 1, -1, 3 ...}
v1 = vd + vid.

Unfortunately, that cannot work well for { -4, 4, -3, 5, -2, 6, -1, 7, ...}
because it has one implicit requirement for the diff. Aka, the diff
sequence in npattern are repeated. For example the v2 (diff) as above.

The diff between { -4, 4, -3, 5, -2, 6, -1, 7, ...} and vid are not
npattern size repeated and then we have wrong code here. We implement
one new code gen the sequence like { -4, 4, -3, 5, -2, 6, -1, 7, ...}.

The below tests are passed for this patch.

* The RV64 regression test with rv64gcv configuration.
* The run test gcc.dg/vect/pr92420.c for below configurations.

riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8/--param=riscv-autovec-preference=fixed-vlmax

gcc/ChangeLog:

	* config/riscv/riscv-v.cc (rvv_builder::npatterns_vid_diff_repeated_p):
	New function to predicate the diff to vid is repeated or not.
	(expand_const_vector): Add restriction
	for the vid-diff code gen and implement general one.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/autovec/bug-7.c: New test.

Signed-off-by: Pan Li <pan2.li@intel.com>
---
 gcc/config/riscv/riscv-v.cc                   | 111 +++++++++++++++---
 .../gcc.target/riscv/rvv/autovec/bug-7.c      |  61 ++++++++++
 2 files changed, 156 insertions(+), 16 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 486f5deb296..3b9be255799 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -433,6 +433,7 @@ public:
   bool single_step_npatterns_p () const;
   bool npatterns_all_equal_p () const;
   bool interleaved_stepped_npatterns_p () const;
+  bool npatterns_vid_diff_repeated_p () const;
 
   machine_mode new_mode () const { return m_new_mode; }
   scalar_mode inner_mode () const { return m_inner_mode; }
@@ -669,6 +670,43 @@ rvv_builder::single_step_npatterns_p () const
   return true;
 }
 
+/* Return true if the diff between const vector and vid sequence
+   is repeated. For example as below cases:
+   The diff means the const vector - vid.
+     CASE 1:
+     CONST VECTOR: {3, 2, 1, 0, 7, 6, 5, 4, ... }
+     VID         : {0, 1, 2, 3, 4, 5, 6, 7, ... }
+     DIFF(MINUS) : {3, 1,-1,-3, 3, 1,-1,-3, ... }
+     The diff sequence {3, 1,-1,-3} is repeated in the npattern and
+     return TRUE for case 1.
+
+     CASE 2:
+     CONST VECTOR: {-4, 4,-3, 5,-2, 6,-1, 7, ...}
+     VID         : { 0, 1, 2, 3, 4, 5, 6, 7, ... }
+     DIFF(MINUS) : {-4, 3,-5,-2,-6, 1,-7, 0, ... }
+     The diff sequence {-4, 3} is not repated in the npattern and
+     return FALSE for case 2.  */
+bool
+rvv_builder::npatterns_vid_diff_repeated_p () const
+{
+  if (nelts_per_pattern () != 3)
+    return false;
+  else if (npatterns () == 0)
+    return false;
+
+  for (unsigned i = 0; i < npatterns (); i++)
+    {
+      poly_int64 diff_0 = rtx_to_poly_int64 (elt (i)) - i;
+      poly_int64 diff_1
+	= rtx_to_poly_int64 (elt (npatterns () + i)) - npatterns () - i;
+
+      if (maybe_ne (diff_0, diff_1))
+	return false;
+    }
+
+  return true;
+}
+
 /* Return true if the permutation consists of two
    interleaved patterns with a constant step each.
    TODO: We currently only support NPATTERNS = 2.  */
@@ -1257,24 +1295,65 @@ expand_const_vector (rtx target, rtx src)
 	  else
 	    {
 	      /* Generate the variable-length vector following this rule:
-		 { a, b, a, b, a + step, b + step, a + step*2, b + step*2, ...}
-		   E.g. { 3, 2, 1, 0, 7, 6, 5, 4, ... } */
-	      /* Step 2: Generate diff = TARGET - VID:
-		 { 3-0, 2-1, 1-2, 0-3, 7-4, 6-5, 5-6, 4-7, ... }*/
-	      rvv_builder v (builder.mode (), builder.npatterns (), 1);
-	      for (unsigned int i = 0; i < v.npatterns (); ++i)
+		{ a, b, a + step, b + step, a + step*2, b + step*2, ... }  */
+
+	      if (builder.npatterns_vid_diff_repeated_p ())
+		{
+		  /* Case 1: For example as below:
+		     {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8... }
+		     We have 3 - 0 = 3 equals 7 - 4 = 3, the sequence is
+		     repeated as below after minus vid.
+		     {3, 1, -1, -3, 3, 1, -1, -3...}
+		     Then we can simplify the diff code gen to at most
+		     npatterns().  */
+		  rvv_builder v (builder.mode (), builder.npatterns (), 1);
+
+		  /* Step 1: Generate diff = TARGET - VID.  */
+		  for (unsigned int i = 0; i < v.npatterns (); ++i)
+		    {
+		     poly_int64 diff = rtx_to_poly_int64 (builder.elt (i)) - i;
+		     v.quick_push (gen_int_mode (diff, v.inner_mode ()));
+		    }
+
+		  /* Step 2: Generate result = VID + diff.  */
+		  rtx vec = v.build ();
+		  rtx add_ops[] = {target, vid, vec};
+		  emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
+				   BINARY_OP, add_ops);
+		}
+	      else
 		{
-		  /* Calculate the diff between the target sequence and
-		     vid sequence.  The elt (i) can be either const_int or
-		     const_poly_int. */
-		  poly_int64 diff = rtx_to_poly_int64 (builder.elt (i)) - i;
-		  v.quick_push (gen_int_mode (diff, v.inner_mode ()));
+		  /* Case 2: For example as below:
+		     { -4, 4, -4 + 1, 4 + 1, -4 + 2, 4 + 2, -4 + 3, 4 + 3, ... }
+		   */
+		  rvv_builder v (builder.mode (), builder.npatterns (), 1);
+
+		  /* Step 1: Generate { a, b, a, b, ... }  */
+		  for (unsigned int i = 0; i < v.npatterns (); ++i)
+		    v.quick_push (builder.elt (i));
+		  rtx new_base = v.build ();
+
+		  /* Step 2: Generate tmp = VID >> LOG2 (NPATTERNS).  */
+		  rtx shift_count
+		    = gen_int_mode (exact_log2 (builder.npatterns ()),
+				    builder.inner_mode ());
+		  rtx tmp = expand_simple_binop (builder.mode (), LSHIFTRT,
+						 vid, shift_count, NULL_RTX,
+						 false, OPTAB_DIRECT);
+
+		  /* Step 3: Generate tmp2 = tmp * step.  */
+		  rtx tmp2 = gen_reg_rtx (builder.mode ());
+		  rtx step
+		    = simplify_binary_operation (MINUS, builder.inner_mode (),
+						 builder.elt (v.npatterns()),
+						 builder.elt (0));
+		  expand_vec_series (tmp2, const0_rtx, step, tmp);
+
+		  /* Step 4: Generate target = tmp2 + new_base.  */
+		  rtx add_ops[] = {target, tmp2, new_base};
+		  emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
+				   BINARY_OP, add_ops);
 		}
-	      /* Step 2: Generate result = VID + diff.  */
-	      rtx vec = v.build ();
-	      rtx add_ops[] = {target, vid, vec};
-	      emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
-				BINARY_OP, add_ops);
 	    }
 	}
       else if (builder.interleaved_stepped_npatterns_p ())
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c
new file mode 100644
index 00000000000..9acac391f65
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c
@@ -0,0 +1,61 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99 -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+#define N 4
+struct C { int l, r; };
+struct C a[N], b[N], c[N];
+struct C a1[N], b1[N], c1[N];
+
+void __attribute__((noinline))
+init_data_vec (struct C * __restrict a, struct C * __restrict b,
+	       struct C * __restrict c)
+{
+  int i;
+
+  for (i = 0; i < N; ++i)
+    {
+      a[i].l = N - i;
+      a[i].r = i - N;
+
+      b[i].l = i - N;
+      b[i].r = i + N;
+
+      c[i].l = -1 - i;
+      c[i].r = 2 * N - 1 - i;
+    }
+}
+
+int
+main ()
+{
+  int i;
+
+  init_data_vec (a, b, c);
+
+#pragma GCC novector
+  for (i = 0; i < N; ++i)
+    {
+      a1[i].l = N - i;
+      a1[i].r = i - N;
+
+      b1[i].l = i - N;
+      b1[i].r = i + N;
+
+      c1[i].l = -1 - i;
+      c1[i].r = 2 * N - 1 - i;
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      if (a[i].l != a1[i].l || a[i].r != a1[i].r)
+	__builtin_abort ();
+
+      if (b[i].l != b1[i].l || b[i].r != b1[i].r)
+	__builtin_abort ();
+
+      if (c[i].l != c1[i].l || c[i].r != c1[i].r)
+	__builtin_abort ();
+    }
+
+  return 0;
+}
-- 
2.34.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v3] RISC-V: Bugfix for the const vector in single steps
  2023-12-20  9:35 ` [PATCH v3] " pan2.li
@ 2023-12-20  9:36   ` juzhe.zhong
  2023-12-20  9:39     ` Li, Pan2
  0 siblings, 1 reply; 10+ messages in thread
From: juzhe.zhong @ 2023-12-20  9:36 UTC (permalink / raw)
  To: pan2.li, gcc-patches; +Cc: pan2.li, yanzhang.wang, kito.cheng, jeffreyalaw

[-- Attachment #1: Type: text/plain, Size: 11550 bytes --]

OK。



juzhe.zhong@rivai.ai
 
From: pan2.li
Date: 2023-12-20 17:35
To: gcc-patches
CC: juzhe.zhong; pan2.li; yanzhang.wang; kito.cheng; jeffreyalaw
Subject: [PATCH v3] RISC-V: Bugfix for the const vector in single steps
From: Pan Li <pan2.li@intel.com>
 
This patch would like to fix the below execution failure when build with
"-march=rv64gcv_zvl512b -mabi=lp64d -mcmodel=medlow --param=riscv-autovec-lmul=m8 -ftree-vectorize -fno-vect-cost-model -O3"
 
FAIL: gcc.dg/vect/pr92420.c -flto -ffat-lto-objects execution test
 
The will be one single step const vector like { -4, 4, -3, 5, -2, 6, -1, 7, ...}.
For such const vector generation with single step, we will generate vid
+ diff here. For example as below, given npatterns = 4.
 
v1= {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8... }
v2 (diff) = {3 - 0, 2 - 1, 1 - 2, 0 - 3, 7 - 4, 6 - 5, 5 - 6, 4 - 7...}
          = {3, 1, -1, 3, 3, 1, -1, 3 ...}
v1 = vd + vid.
 
Unfortunately, that cannot work well for { -4, 4, -3, 5, -2, 6, -1, 7, ...}
because it has one implicit requirement for the diff. Aka, the diff
sequence in npattern are repeated. For example the v2 (diff) as above.
 
The diff between { -4, 4, -3, 5, -2, 6, -1, 7, ...} and vid are not
npattern size repeated and then we have wrong code here. We implement
one new code gen the sequence like { -4, 4, -3, 5, -2, 6, -1, 7, ...}.
 
The below tests are passed for this patch.
 
* The RV64 regression test with rv64gcv configuration.
* The run test gcc.dg/vect/pr92420.c for below configurations.
 
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8/--param=riscv-autovec-preference=fixed-vlmax
 
gcc/ChangeLog:
 
* config/riscv/riscv-v.cc (rvv_builder::npatterns_vid_diff_repeated_p):
New function to predicate the diff to vid is repeated or not.
(expand_const_vector): Add restriction
for the vid-diff code gen and implement general one.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/rvv/autovec/bug-7.c: New test.
 
Signed-off-by: Pan Li <pan2.li@intel.com>
---
gcc/config/riscv/riscv-v.cc                   | 111 +++++++++++++++---
.../gcc.target/riscv/rvv/autovec/bug-7.c      |  61 ++++++++++
2 files changed, 156 insertions(+), 16 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c
 
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 486f5deb296..3b9be255799 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -433,6 +433,7 @@ public:
   bool single_step_npatterns_p () const;
   bool npatterns_all_equal_p () const;
   bool interleaved_stepped_npatterns_p () const;
+  bool npatterns_vid_diff_repeated_p () const;
   machine_mode new_mode () const { return m_new_mode; }
   scalar_mode inner_mode () const { return m_inner_mode; }
@@ -669,6 +670,43 @@ rvv_builder::single_step_npatterns_p () const
   return true;
}
+/* Return true if the diff between const vector and vid sequence
+   is repeated. For example as below cases:
+   The diff means the const vector - vid.
+     CASE 1:
+     CONST VECTOR: {3, 2, 1, 0, 7, 6, 5, 4, ... }
+     VID         : {0, 1, 2, 3, 4, 5, 6, 7, ... }
+     DIFF(MINUS) : {3, 1,-1,-3, 3, 1,-1,-3, ... }
+     The diff sequence {3, 1,-1,-3} is repeated in the npattern and
+     return TRUE for case 1.
+
+     CASE 2:
+     CONST VECTOR: {-4, 4,-3, 5,-2, 6,-1, 7, ...}
+     VID         : { 0, 1, 2, 3, 4, 5, 6, 7, ... }
+     DIFF(MINUS) : {-4, 3,-5,-2,-6, 1,-7, 0, ... }
+     The diff sequence {-4, 3} is not repated in the npattern and
+     return FALSE for case 2.  */
+bool
+rvv_builder::npatterns_vid_diff_repeated_p () const
+{
+  if (nelts_per_pattern () != 3)
+    return false;
+  else if (npatterns () == 0)
+    return false;
+
+  for (unsigned i = 0; i < npatterns (); i++)
+    {
+      poly_int64 diff_0 = rtx_to_poly_int64 (elt (i)) - i;
+      poly_int64 diff_1
+ = rtx_to_poly_int64 (elt (npatterns () + i)) - npatterns () - i;
+
+      if (maybe_ne (diff_0, diff_1))
+ return false;
+    }
+
+  return true;
+}
+
/* Return true if the permutation consists of two
    interleaved patterns with a constant step each.
    TODO: We currently only support NPATTERNS = 2.  */
@@ -1257,24 +1295,65 @@ expand_const_vector (rtx target, rtx src)
  else
    {
      /* Generate the variable-length vector following this rule:
- { a, b, a, b, a + step, b + step, a + step*2, b + step*2, ...}
-    E.g. { 3, 2, 1, 0, 7, 6, 5, 4, ... } */
-       /* Step 2: Generate diff = TARGET - VID:
- { 3-0, 2-1, 1-2, 0-3, 7-4, 6-5, 5-6, 4-7, ... }*/
-       rvv_builder v (builder.mode (), builder.npatterns (), 1);
-       for (unsigned int i = 0; i < v.npatterns (); ++i)
+ { a, b, a + step, b + step, a + step*2, b + step*2, ... }  */
+
+       if (builder.npatterns_vid_diff_repeated_p ())
+ {
+   /* Case 1: For example as below:
+      {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8... }
+      We have 3 - 0 = 3 equals 7 - 4 = 3, the sequence is
+      repeated as below after minus vid.
+      {3, 1, -1, -3, 3, 1, -1, -3...}
+      Then we can simplify the diff code gen to at most
+      npatterns().  */
+   rvv_builder v (builder.mode (), builder.npatterns (), 1);
+
+   /* Step 1: Generate diff = TARGET - VID.  */
+   for (unsigned int i = 0; i < v.npatterns (); ++i)
+     {
+      poly_int64 diff = rtx_to_poly_int64 (builder.elt (i)) - i;
+      v.quick_push (gen_int_mode (diff, v.inner_mode ()));
+     }
+
+   /* Step 2: Generate result = VID + diff.  */
+   rtx vec = v.build ();
+   rtx add_ops[] = {target, vid, vec};
+   emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
+    BINARY_OP, add_ops);
+ }
+       else
{
-   /* Calculate the diff between the target sequence and
-      vid sequence.  The elt (i) can be either const_int or
-      const_poly_int. */
-   poly_int64 diff = rtx_to_poly_int64 (builder.elt (i)) - i;
-   v.quick_push (gen_int_mode (diff, v.inner_mode ()));
+   /* Case 2: For example as below:
+      { -4, 4, -4 + 1, 4 + 1, -4 + 2, 4 + 2, -4 + 3, 4 + 3, ... }
+    */
+   rvv_builder v (builder.mode (), builder.npatterns (), 1);
+
+   /* Step 1: Generate { a, b, a, b, ... }  */
+   for (unsigned int i = 0; i < v.npatterns (); ++i)
+     v.quick_push (builder.elt (i));
+   rtx new_base = v.build ();
+
+   /* Step 2: Generate tmp = VID >> LOG2 (NPATTERNS).  */
+   rtx shift_count
+     = gen_int_mode (exact_log2 (builder.npatterns ()),
+     builder.inner_mode ());
+   rtx tmp = expand_simple_binop (builder.mode (), LSHIFTRT,
+ vid, shift_count, NULL_RTX,
+ false, OPTAB_DIRECT);
+
+   /* Step 3: Generate tmp2 = tmp * step.  */
+   rtx tmp2 = gen_reg_rtx (builder.mode ());
+   rtx step
+     = simplify_binary_operation (MINUS, builder.inner_mode (),
+ builder.elt (v.npatterns()),
+ builder.elt (0));
+   expand_vec_series (tmp2, const0_rtx, step, tmp);
+
+   /* Step 4: Generate target = tmp2 + new_base.  */
+   rtx add_ops[] = {target, tmp2, new_base};
+   emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
+    BINARY_OP, add_ops);
}
-       /* Step 2: Generate result = VID + diff.  */
-       rtx vec = v.build ();
-       rtx add_ops[] = {target, vid, vec};
-       emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
- BINARY_OP, add_ops);
    }
}
       else if (builder.interleaved_stepped_npatterns_p ())
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c
new file mode 100644
index 00000000000..9acac391f65
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c
@@ -0,0 +1,61 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99 -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+#define N 4
+struct C { int l, r; };
+struct C a[N], b[N], c[N];
+struct C a1[N], b1[N], c1[N];
+
+void __attribute__((noinline))
+init_data_vec (struct C * __restrict a, struct C * __restrict b,
+        struct C * __restrict c)
+{
+  int i;
+
+  for (i = 0; i < N; ++i)
+    {
+      a[i].l = N - i;
+      a[i].r = i - N;
+
+      b[i].l = i - N;
+      b[i].r = i + N;
+
+      c[i].l = -1 - i;
+      c[i].r = 2 * N - 1 - i;
+    }
+}
+
+int
+main ()
+{
+  int i;
+
+  init_data_vec (a, b, c);
+
+#pragma GCC novector
+  for (i = 0; i < N; ++i)
+    {
+      a1[i].l = N - i;
+      a1[i].r = i - N;
+
+      b1[i].l = i - N;
+      b1[i].r = i + N;
+
+      c1[i].l = -1 - i;
+      c1[i].r = 2 * N - 1 - i;
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      if (a[i].l != a1[i].l || a[i].r != a1[i].r)
+ __builtin_abort ();
+
+      if (b[i].l != b1[i].l || b[i].r != b1[i].r)
+ __builtin_abort ();
+
+      if (c[i].l != c1[i].l || c[i].r != c1[i].r)
+ __builtin_abort ();
+    }
+
+  return 0;
+}
-- 
2.34.1
 
 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH v3] RISC-V: Bugfix for the const vector in single steps
  2023-12-20  9:36   ` juzhe.zhong
@ 2023-12-20  9:39     ` Li, Pan2
  0 siblings, 0 replies; 10+ messages in thread
From: Li, Pan2 @ 2023-12-20  9:39 UTC (permalink / raw)
  To: juzhe.zhong, gcc-patches; +Cc: Wang, Yanzhang, kito.cheng, jeffreyalaw

[-- Attachment #1: Type: text/plain, Size: 12276 bytes --]

Committed, thanks Juzhe.

Pan

From: juzhe.zhong@rivai.ai <juzhe.zhong@rivai.ai>
Sent: Wednesday, December 20, 2023 5:37 PM
To: Li, Pan2 <pan2.li@intel.com>; gcc-patches <gcc-patches@gcc.gnu.org>
Cc: Li, Pan2 <pan2.li@intel.com>; Wang, Yanzhang <yanzhang.wang@intel.com>; kito.cheng <kito.cheng@gmail.com>; jeffreyalaw <jeffreyalaw@gmail.com>
Subject: Re: [PATCH v3] RISC-V: Bugfix for the const vector in single steps

OK。

________________________________
juzhe.zhong@rivai.ai<mailto:juzhe.zhong@rivai.ai>

From: pan2.li<mailto:pan2.li@intel.com>
Date: 2023-12-20 17:35
To: gcc-patches<mailto:gcc-patches@gcc.gnu.org>
CC: juzhe.zhong<mailto:juzhe.zhong@rivai.ai>; pan2.li<mailto:pan2.li@intel.com>; yanzhang.wang<mailto:yanzhang.wang@intel.com>; kito.cheng<mailto:kito.cheng@gmail.com>; jeffreyalaw<mailto:jeffreyalaw@gmail.com>
Subject: [PATCH v3] RISC-V: Bugfix for the const vector in single steps
From: Pan Li <pan2.li@intel.com<mailto:pan2.li@intel.com>>

This patch would like to fix the below execution failure when build with
"-march=rv64gcv_zvl512b -mabi=lp64d -mcmodel=medlow --param=riscv-autovec-lmul=m8 -ftree-vectorize -fno-vect-cost-model -O3"

FAIL: gcc.dg/vect/pr92420.c -flto -ffat-lto-objects execution test

The will be one single step const vector like { -4, 4, -3, 5, -2, 6, -1, 7, ...}.
For such const vector generation with single step, we will generate vid
+ diff here. For example as below, given npatterns = 4.

v1= {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8... }
v2 (diff) = {3 - 0, 2 - 1, 1 - 2, 0 - 3, 7 - 4, 6 - 5, 5 - 6, 4 - 7...}
          = {3, 1, -1, 3, 3, 1, -1, 3 ...}
v1 = vd + vid.

Unfortunately, that cannot work well for { -4, 4, -3, 5, -2, 6, -1, 7, ...}
because it has one implicit requirement for the diff. Aka, the diff
sequence in npattern are repeated. For example the v2 (diff) as above.

The diff between { -4, 4, -3, 5, -2, 6, -1, 7, ...} and vid are not
npattern size repeated and then we have wrong code here. We implement
one new code gen the sequence like { -4, 4, -3, 5, -2, 6, -1, 7, ...}.

The below tests are passed for this patch.

* The RV64 regression test with rv64gcv configuration.
* The run test gcc.dg/vect/pr92420.c for below configurations.

riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8/--param=riscv-autovec-preference=fixed-vlmax

gcc/ChangeLog:

* config/riscv/riscv-v.cc (rvv_builder::npatterns_vid_diff_repeated_p):
New function to predicate the diff to vid is repeated or not.
(expand_const_vector): Add restriction
for the vid-diff code gen and implement general one.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/bug-7.c: New test.

Signed-off-by: Pan Li <pan2.li@intel.com<mailto:pan2.li@intel.com>>
---
gcc/config/riscv/riscv-v.cc                   | 111 +++++++++++++++---
.../gcc.target/riscv/rvv/autovec/bug-7.c      |  61 ++++++++++
2 files changed, 156 insertions(+), 16 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 486f5deb296..3b9be255799 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -433,6 +433,7 @@ public:
   bool single_step_npatterns_p () const;
   bool npatterns_all_equal_p () const;
   bool interleaved_stepped_npatterns_p () const;
+  bool npatterns_vid_diff_repeated_p () const;
   machine_mode new_mode () const { return m_new_mode; }
   scalar_mode inner_mode () const { return m_inner_mode; }
@@ -669,6 +670,43 @@ rvv_builder::single_step_npatterns_p () const
   return true;
}
+/* Return true if the diff between const vector and vid sequence
+   is repeated. For example as below cases:
+   The diff means the const vector - vid.
+     CASE 1:
+     CONST VECTOR: {3, 2, 1, 0, 7, 6, 5, 4, ... }
+     VID         : {0, 1, 2, 3, 4, 5, 6, 7, ... }
+     DIFF(MINUS) : {3, 1,-1,-3, 3, 1,-1,-3, ... }
+     The diff sequence {3, 1,-1,-3} is repeated in the npattern and
+     return TRUE for case 1.
+
+     CASE 2:
+     CONST VECTOR: {-4, 4,-3, 5,-2, 6,-1, 7, ...}
+     VID         : { 0, 1, 2, 3, 4, 5, 6, 7, ... }
+     DIFF(MINUS) : {-4, 3,-5,-2,-6, 1,-7, 0, ... }
+     The diff sequence {-4, 3} is not repated in the npattern and
+     return FALSE for case 2.  */
+bool
+rvv_builder::npatterns_vid_diff_repeated_p () const
+{
+  if (nelts_per_pattern () != 3)
+    return false;
+  else if (npatterns () == 0)
+    return false;
+
+  for (unsigned i = 0; i < npatterns (); i++)
+    {
+      poly_int64 diff_0 = rtx_to_poly_int64 (elt (i)) - i;
+      poly_int64 diff_1
+ = rtx_to_poly_int64 (elt (npatterns () + i)) - npatterns () - i;
+
+      if (maybe_ne (diff_0, diff_1))
+ return false;
+    }
+
+  return true;
+}
+
/* Return true if the permutation consists of two
    interleaved patterns with a constant step each.
    TODO: We currently only support NPATTERNS = 2.  */
@@ -1257,24 +1295,65 @@ expand_const_vector (rtx target, rtx src)
  else
    {
      /* Generate the variable-length vector following this rule:
- { a, b, a, b, a + step, b + step, a + step*2, b + step*2, ...}
-    E.g. { 3, 2, 1, 0, 7, 6, 5, 4, ... } */
-       /* Step 2: Generate diff = TARGET - VID:
- { 3-0, 2-1, 1-2, 0-3, 7-4, 6-5, 5-6, 4-7, ... }*/
-       rvv_builder v (builder.mode (), builder.npatterns (), 1);
-       for (unsigned int i = 0; i < v.npatterns (); ++i)
+ { a, b, a + step, b + step, a + step*2, b + step*2, ... }  */
+
+       if (builder.npatterns_vid_diff_repeated_p ())
+ {
+   /* Case 1: For example as below:
+      {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8... }
+      We have 3 - 0 = 3 equals 7 - 4 = 3, the sequence is
+      repeated as below after minus vid.
+      {3, 1, -1, -3, 3, 1, -1, -3...}
+      Then we can simplify the diff code gen to at most
+      npatterns().  */
+   rvv_builder v (builder.mode (), builder.npatterns (), 1);
+
+   /* Step 1: Generate diff = TARGET - VID.  */
+   for (unsigned int i = 0; i < v.npatterns (); ++i)
+     {
+      poly_int64 diff = rtx_to_poly_int64 (builder.elt (i)) - i;
+      v.quick_push (gen_int_mode (diff, v.inner_mode ()));
+     }
+
+   /* Step 2: Generate result = VID + diff.  */
+   rtx vec = v.build ();
+   rtx add_ops[] = {target, vid, vec};
+   emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
+    BINARY_OP, add_ops);
+ }
+       else
{
-   /* Calculate the diff between the target sequence and
-      vid sequence.  The elt (i) can be either const_int or
-      const_poly_int. */
-   poly_int64 diff = rtx_to_poly_int64 (builder.elt (i)) - i;
-   v.quick_push (gen_int_mode (diff, v.inner_mode ()));
+   /* Case 2: For example as below:
+      { -4, 4, -4 + 1, 4 + 1, -4 + 2, 4 + 2, -4 + 3, 4 + 3, ... }
+    */
+   rvv_builder v (builder.mode (), builder.npatterns (), 1);
+
+   /* Step 1: Generate { a, b, a, b, ... }  */
+   for (unsigned int i = 0; i < v.npatterns (); ++i)
+     v.quick_push (builder.elt (i));
+   rtx new_base = v.build ();
+
+   /* Step 2: Generate tmp = VID >> LOG2 (NPATTERNS).  */
+   rtx shift_count
+     = gen_int_mode (exact_log2 (builder.npatterns ()),
+     builder.inner_mode ());
+   rtx tmp = expand_simple_binop (builder.mode (), LSHIFTRT,
+ vid, shift_count, NULL_RTX,
+ false, OPTAB_DIRECT);
+
+   /* Step 3: Generate tmp2 = tmp * step.  */
+   rtx tmp2 = gen_reg_rtx (builder.mode ());
+   rtx step
+     = simplify_binary_operation (MINUS, builder.inner_mode (),
+ builder.elt (v.npatterns()),
+ builder.elt (0));
+   expand_vec_series (tmp2, const0_rtx, step, tmp);
+
+   /* Step 4: Generate target = tmp2 + new_base.  */
+   rtx add_ops[] = {target, tmp2, new_base};
+   emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
+    BINARY_OP, add_ops);
}
-       /* Step 2: Generate result = VID + diff.  */
-       rtx vec = v.build ();
-       rtx add_ops[] = {target, vid, vec};
-       emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
- BINARY_OP, add_ops);
    }
}
       else if (builder.interleaved_stepped_npatterns_p ())
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c
new file mode 100644
index 00000000000..9acac391f65
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c
@@ -0,0 +1,61 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99 -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+#define N 4
+struct C { int l, r; };
+struct C a[N], b[N], c[N];
+struct C a1[N], b1[N], c1[N];
+
+void __attribute__((noinline))
+init_data_vec (struct C * __restrict a, struct C * __restrict b,
+        struct C * __restrict c)
+{
+  int i;
+
+  for (i = 0; i < N; ++i)
+    {
+      a[i].l = N - i;
+      a[i].r = i - N;
+
+      b[i].l = i - N;
+      b[i].r = i + N;
+
+      c[i].l = -1 - i;
+      c[i].r = 2 * N - 1 - i;
+    }
+}
+
+int
+main ()
+{
+  int i;
+
+  init_data_vec (a, b, c);
+
+#pragma GCC novector
+  for (i = 0; i < N; ++i)
+    {
+      a1[i].l = N - i;
+      a1[i].r = i - N;
+
+      b1[i].l = i - N;
+      b1[i].r = i + N;
+
+      c1[i].l = -1 - i;
+      c1[i].r = 2 * N - 1 - i;
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      if (a[i].l != a1[i].l || a[i].r != a1[i].r)
+ __builtin_abort ();
+
+      if (b[i].l != b1[i].l || b[i].r != b1[i].r)
+ __builtin_abort ();
+
+      if (c[i].l != c1[i].l || c[i].r != c1[i].r)
+ __builtin_abort ();
+    }
+
+  return 0;
+}
--
2.34.1



^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2023-12-20  9:40 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-12-20  2:39 [PATCH v1] RISC-V: Bugfix for the const vector in single steps pan2.li
2023-12-20  2:50 ` juzhe.zhong
2023-12-20  4:02   ` Jeff Law
2023-12-20  4:29     ` Li, Pan2
2023-12-20  6:56 ` [PATCH v2] " pan2.li
2023-12-20  6:58   ` juzhe.zhong
2023-12-20  7:01     ` Li, Pan2
2023-12-20  9:35 ` [PATCH v3] " pan2.li
2023-12-20  9:36   ` juzhe.zhong
2023-12-20  9:39     ` Li, Pan2

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).