[PATCH v1] RISC-V: Bugfix for the const vector in single steps

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

From: pan2.li@intel.com
To: gcc-patches@gcc.gnu.org
Cc: juzhe.zhong@rivai.ai, pan2.li@intel.com, yanzhang.wang@intel.com,
	kito.cheng@gmail.com
Subject: [PATCH v1] RISC-V: Bugfix for the const vector in single steps
Date: Wed, 20 Dec 2023 10:39:22 +0800	[thread overview]
Message-ID: <20231220023922.1076198-1-pan2.li@intel.com> (raw)

From: Pan Li <pan2.li@intel.com>

For generating the const vector with single step, we have code
gen similar as below.  We have npatterns = 4.

v1= {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8... }

v2 (diff) = {3 - 0, 2 - 1, 1 - 2, 0 - 3, 7 - 4, 6 - 5, 5 - 6, 4 - 7...}
          = {3, 1, -1, 3, 3, 1, -1, 3 ...}

v1 = vd + vid.

But this requires the diff is npattern size repeated like {3, 1, -1, 3}
as above. And it cannot take care of single step as below:

{ -4, 4, -4 + 1, 4 + 1, -4 + 2, 4 + 2, -4 + 3, 4 + 3, ...

This patch would like to add the restriction to above code gen and
implement one for the general case.

gcc/ChangeLog:

	* config/riscv/riscv-v.cc (expand_const_vector): Add restriction
	for the vid-diff code gen and implement general one.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/autovec/bug-7.c: New test.

Signed-off-by: Pan Li <pan2.li@intel.com>
---
 gcc/config/riscv/riscv-v.cc                   | 73 +++++++++++++++----
 .../gcc.target/riscv/rvv/autovec/bug-7.c      | 61 ++++++++++++++++
 2 files changed, 119 insertions(+), 15 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 486f5deb296..946588b7b1f 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1257,24 +1257,67 @@ expand_const_vector (rtx target, rtx src)
 	  else
 	    {
 	      /* Generate the variable-length vector following this rule:
-		 { a, b, a, b, a + step, b + step, a + step*2, b + step*2, ...}
-		   E.g. { 3, 2, 1, 0, 7, 6, 5, 4, ... } */
-	      /* Step 2: Generate diff = TARGET - VID:
-		 { 3-0, 2-1, 1-2, 0-3, 7-4, 6-5, 5-6, 4-7, ... }*/
+		{ a, b, a + step, b + step, a + step*2, b + step*2, ... }  */
 	      rvv_builder v (builder.mode (), builder.npatterns (), 1);
-	      for (unsigned int i = 0; i < v.npatterns (); ++i)
+	      poly_int64 ele_0 = rtx_to_poly_int64 (builder.elt (0));
+	      poly_int64 ele_n
+		= rtx_to_poly_int64 (builder.elt (v.npatterns ()));
+
+	      if (known_eq (ele_0 - 0, ele_n - v.npatterns ()))
+		{
+		  /* Case 1: For example as below:
+		     {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8... }
+		     We have 3 - 0 = 3 equals 7 - 4 = 3, the sequence is
+		     repeated as below after minus vid.
+		     {3, 1, -1, -3, 3, 1, -1, -3...}
+		     Then we can simplify the diff code gen to at most
+		     npatterns().  */
+
+		  /* Step 1: Generate diff = TARGET - VID.  */
+		  for (unsigned int i = 0; i < v.npatterns (); ++i)
+		    {
+		     poly_int64 diff = rtx_to_poly_int64 (builder.elt (i)) - i;
+		     v.quick_push (gen_int_mode (diff, v.inner_mode ()));
+		    }
+
+		  /* Step 2: Generate result = VID + diff.  */
+		  rtx vec = v.build ();
+		  rtx add_ops[] = {target, vid, vec};
+		  emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
+				   BINARY_OP, add_ops);
+		}
+	      else
 		{
-		  /* Calculate the diff between the target sequence and
-		     vid sequence.  The elt (i) can be either const_int or
-		     const_poly_int. */
-		  poly_int64 diff = rtx_to_poly_int64 (builder.elt (i)) - i;
-		  v.quick_push (gen_int_mode (diff, v.inner_mode ()));
+		  /* Case 2: For example as below:
+		     { -4, 4, -4 + 1, 4 + 1, -4 + 2, 4 + 2, -4 + 3, 4 + 3, ... }
+		   */
+
+		  /* Step 1: Generate { a, b, a, b, ... }  */
+		  for (unsigned int i = 0; i < v.npatterns (); ++i)
+		    v.quick_push (builder.elt (i));
+		  rtx new_base = v.build ();
+
+		  /* Step 2: Generate tmp = VID >> LOG2 (NPATTERNS).  */
+		  rtx shift_count
+		    = gen_int_mode (exact_log2 (builder.npatterns ()),
+				    builder.inner_mode ());
+		  rtx tmp = expand_simple_binop (builder.mode (), LSHIFTRT,
+						 vid, shift_count, NULL_RTX,
+						 false, OPTAB_DIRECT);
+
+		  /* Step 3: Generate tmp2 = tmp * step.  */
+		  rtx tmp2 = gen_reg_rtx (builder.mode ());
+		  rtx step
+		    = simplify_binary_operation (MINUS, builder.inner_mode (),
+						 builder.elt (v.npatterns()),
+						 builder.elt (0));
+		  expand_vec_series (tmp2, const0_rtx, step, tmp);
+
+		  /* Step 4: Generate target = tmp2 + new_base.  */
+		  rtx add_ops[] = {target, tmp2, new_base};
+		  emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
+				   BINARY_OP, add_ops);
 		}
-	      /* Step 2: Generate result = VID + diff.  */
-	      rtx vec = v.build ();
-	      rtx add_ops[] = {target, vid, vec};
-	      emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
-				BINARY_OP, add_ops);
 	    }
 	}
       else if (builder.interleaved_stepped_npatterns_p ())
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c
new file mode 100644
index 00000000000..9acac391f65
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c
@@ -0,0 +1,61 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99 -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+#define N 4
+struct C { int l, r; };
+struct C a[N], b[N], c[N];
+struct C a1[N], b1[N], c1[N];
+
+void __attribute__((noinline))
+init_data_vec (struct C * __restrict a, struct C * __restrict b,
+	       struct C * __restrict c)
+{
+  int i;
+
+  for (i = 0; i < N; ++i)
+    {
+      a[i].l = N - i;
+      a[i].r = i - N;
+
+      b[i].l = i - N;
+      b[i].r = i + N;
+
+      c[i].l = -1 - i;
+      c[i].r = 2 * N - 1 - i;
+    }
+}
+
+int
+main ()
+{
+  int i;
+
+  init_data_vec (a, b, c);
+
+#pragma GCC novector
+  for (i = 0; i < N; ++i)
+    {
+      a1[i].l = N - i;
+      a1[i].r = i - N;
+
+      b1[i].l = i - N;
+      b1[i].r = i + N;
+
+      c1[i].l = -1 - i;
+      c1[i].r = 2 * N - 1 - i;
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      if (a[i].l != a1[i].l || a[i].r != a1[i].r)
+	__builtin_abort ();
+
+      if (b[i].l != b1[i].l || b[i].r != b1[i].r)
+	__builtin_abort ();
+
+      if (c[i].l != c1[i].l || c[i].r != c1[i].r)
+	__builtin_abort ();
+    }
+
+  return 0;
+}
-- 
2.34.1

next             reply	other threads:[~2023-12-20  2:39 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-12-20  2:39 pan2.li [this message]
2023-12-20  2:50 ` juzhe.zhong
2023-12-20  4:02   ` Jeff Law
2023-12-20  4:29     ` Li, Pan2
2023-12-20  6:56 ` [PATCH v2] " pan2.li
2023-12-20  6:58   ` juzhe.zhong
2023-12-20  7:01     ` Li, Pan2
2023-12-20  9:35 ` [PATCH v3] " pan2.li
2023-12-20  9:36   ` juzhe.zhong
2023-12-20  9:39     ` Li, Pan2

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231220023922.1076198-1-pan2.li@intel.com \
    --to=pan2.li@intel.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=juzhe.zhong@rivai.ai \
    --cc=kito.cheng@gmail.com \
    --cc=yanzhang.wang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).