public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] RISC-V: Fix vec_series expander[PR110985]
@ 2023-08-11  8:45 Juzhe-Zhong
  2023-08-11 14:46 ` Jeff Law
  0 siblings, 1 reply; 3+ messages in thread
From: Juzhe-Zhong @ 2023-08-11  8:45 UTC (permalink / raw)
  To: gcc-patches; +Cc: kito.cheng, kito.cheng, jeffreyalaw, rdapp.gcc, Juzhe-Zhong

This patch fix bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110985

	PR target/110985

gcc/ChangeLog:

	* config/riscv/riscv-v.cc (expand_vec_series): Refactor the expander.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/autovec/vls-vlmax/pr110985.c: New test.

---
 gcc/config/riscv/riscv-v.cc                   | 74 +++++++--------
 .../riscv/rvv/autovec/vls-vlmax/pr110985.c    | 90 +++++++++++++++++++
 2 files changed, 129 insertions(+), 35 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/pr110985.c

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index a3062c90618..5f9b296c92e 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1309,6 +1309,7 @@ expand_vec_series (rtx dest, rtx base, rtx step)
   machine_mode mode = GET_MODE (dest);
   poly_int64 nunits_m1 = GET_MODE_NUNITS (mode) - 1;
   poly_int64 value;
+  rtx result = register_operand (dest, mode) ? dest : gen_reg_rtx (mode);
 
   /* VECT_IV = BASE + I * STEP.  */
 
@@ -1317,15 +1318,10 @@ expand_vec_series (rtx dest, rtx base, rtx step)
   rtx op[] = {vid};
   emit_vlmax_insn (code_for_pred_series (mode), RVV_MISC_OP, op);
 
-  /* Step 2: Generate I * STEP.
-     - STEP is 1, we don't emit any instructions.
-     - STEP is power of 2, we use vsll.vi/vsll.vx.
-     - STEP is non-power of 2, we use vmul.vx.  */
   rtx step_adj;
-  if (rtx_equal_p (step, const1_rtx))
-    step_adj = vid;
-  else if (rtx_equal_p (step, constm1_rtx) && poly_int_rtx_p (base, &value)
-	   && known_eq (nunits_m1, value))
+  if (rtx_equal_p (step, constm1_rtx)
+      && poly_int_rtx_p (base, &value)
+      && known_eq (nunits_m1, value))
     {
       /* Special case:
 	   {nunits - 1, nunits - 2, ... , 0}.
@@ -1334,46 +1330,54 @@ expand_vec_series (rtx dest, rtx base, rtx step)
 	 Code sequence:
 	   vid.v v
 	   vrsub nunits - 1, v.  */
-      rtx ops[] = {dest, vid, gen_int_mode (nunits_m1, GET_MODE_INNER (mode))};
+      rtx ops[]
+	= {result, vid, gen_int_mode (nunits_m1, GET_MODE_INNER (mode))};
       insn_code icode = code_for_pred_sub_reverse_scalar (mode);
       emit_vlmax_insn (icode, RVV_BINOP, ops);
-      return;
     }
   else
     {
-      step_adj = gen_reg_rtx (mode);
-      if (CONST_INT_P (step) && pow2p_hwi (INTVAL (step)))
+      /* Step 2: Generate I * STEP.
+	 - STEP is 1, we don't emit any instructions.
+	 - STEP is power of 2, we use vsll.vi/vsll.vx.
+	 - STEP is non-power of 2, we use vmul.vx.  */
+      if (rtx_equal_p (step, const1_rtx))
+	step_adj = vid;
+      else
 	{
-	  /* Emit logical left shift operation.  */
-	  int shift = exact_log2 (INTVAL (step));
-	  rtx shift_amount = gen_int_mode (shift, Pmode);
-	  insn_code icode = code_for_pred_scalar (ASHIFT, mode);
-	  rtx ops[] = {step_adj, vid, shift_amount};
-	  emit_vlmax_insn (icode, RVV_BINOP, ops);
+	  step_adj = gen_reg_rtx (mode);
+	  if (CONST_INT_P (step) && pow2p_hwi (INTVAL (step)))
+	    {
+	      /* Emit logical left shift operation.  */
+	      int shift = exact_log2 (INTVAL (step));
+	      rtx shift_amount = gen_int_mode (shift, Pmode);
+	      insn_code icode = code_for_pred_scalar (ASHIFT, mode);
+	      rtx ops[] = {step_adj, vid, shift_amount};
+	      emit_vlmax_insn (icode, RVV_BINOP, ops);
+	    }
+	  else
+	    {
+	      insn_code icode = code_for_pred_scalar (MULT, mode);
+	      rtx ops[] = {step_adj, vid, step};
+	      emit_vlmax_insn (icode, RVV_BINOP, ops);
+	    }
 	}
+
+      /* Step 3: Generate BASE + I * STEP.
+	  - BASE is 0, use result of vid.
+	  - BASE is not 0, we use vadd.vx/vadd.vi.  */
+      if (rtx_equal_p (base, const0_rtx))
+	emit_move_insn (result, step_adj);
       else
 	{
-	  insn_code icode = code_for_pred_scalar (MULT, mode);
-	  rtx ops[] = {step_adj, vid, step};
+	  insn_code icode = code_for_pred_scalar (PLUS, mode);
+	  rtx ops[] = {result, step_adj, base};
 	  emit_vlmax_insn (icode, RVV_BINOP, ops);
 	}
     }
 
-  /* Step 3: Generate BASE + I * STEP.
-     - BASE is 0, use result of vid.
-     - BASE is not 0, we use vadd.vx/vadd.vi.  */
-  if (rtx_equal_p (base, const0_rtx))
-    {
-      emit_move_insn (dest, step_adj);
-    }
-  else
-    {
-      rtx result = gen_reg_rtx (mode);
-      insn_code icode = code_for_pred_scalar (PLUS, mode);
-      rtx ops[] = {result, step_adj, base};
-      emit_vlmax_insn (icode, RVV_BINOP, ops);
-      emit_move_insn (dest, result);
-    }
+  if (result != dest)
+    emit_move_insn (dest, result);
 }
 
 static void
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/pr110985.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/pr110985.c
new file mode 100644
index 00000000000..7710654c1bb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/pr110985.c
@@ -0,0 +1,90 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvl256b -mabi=lp64d -O3 --param=riscv-autovec-preference=fixed-vlmax -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <stdint-gcc.h>
+
+typedef int16_t vnx16i __attribute__ ((vector_size (32)));
+
+/*
+** foo1:
+**   vsetivli\s+zero,\s*16,\s*e16,\s*m1,\s*t[au],\s*m[au]
+**   vid\.v\s+v[0-9]+
+**   vrsub\.vi\s+v[0-9]+,\s*v[0-9]+,\s*15
+**   vs1r\.v\s+v[0-9]+,\s*0\([a-x0-9]+\)
+**   ret
+*/
+void
+foo1 (int16_t *__restrict out)
+{
+  vnx16i v = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0};
+  *(vnx16i *) out = v;
+}
+
+/*
+** foo2:
+**   vsetivli\s+zero,\s*16,\s*e16,\s*m1,\s*t[au],\s*m[au]
+**   vid\.v\s+v[0-9]+
+**   li\s+[a-x0-9]+,\s*7
+**   vmul\.vx\s+v[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+**   vadd\.vi\s+v[0-9]+,\s*v[0-9]+,\s*3
+**   vs1r\.v\s+v[0-9]+,\s*0\([a-x0-9]+\)
+**   ret
+*/
+void
+foo2 (int16_t *__restrict out)
+{
+  vnx16i v
+    = {3,	   3 + 7 * 1,  3 + 7 * 2,  3 + 7 * 3, 3 + 7 * 4,  3 + 7 * 5,
+       3 + 7 * 6,  3 + 7 * 7,  3 + 7 * 8,  3 + 7 * 9, 3 + 7 * 10, 3 + 7 * 11,
+       3 + 7 * 12, 3 + 7 * 13, 3 + 7 * 14, 3 + 7 * 15};
+  *(vnx16i *) out = v;
+}
+
+/*
+** foo3:
+**   vsetivli\s+zero,\s*16,\s*e16,\s*m1,\s*t[au],\s*m[au]
+**   vid\.v\s+v[0-9]+
+**   vs1r\.v\s+v[0-9]+,\s*0\([a-x0-9]+\)
+**   ret
+*/
+void
+foo3 (int16_t *__restrict out)
+{
+  vnx16i v
+    = {0, 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+  *(vnx16i *) out = v;
+}
+
+/*
+** foo4:
+**   vsetivli\s+zero,\s*16,\s*e16,\s*m1,\s*t[au],\s*m[au]
+**   vid\.v\s+v[0-9]+
+**   li\s+[a-x0-9]+,\s*6
+**   vmul\.vx\s+v[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+**   vs1r\.v\s+v[0-9]+,\s*0\([a-x0-9]+\)
+**   ret
+*/
+void
+foo4 (int16_t *__restrict out)
+{
+  vnx16i v
+    = {0*6, 1*6,2*6,3*6,4*6,5*6,6*6,7*6,8*6,9*6,10*6,11*6,12*6,13*6,14*6,15*6};
+  *(vnx16i *) out = v;
+}
+
+/*
+** foo5:
+**   vsetivli\s+zero,\s*16,\s*e16,\s*m1,\s*t[au],\s*m[au]
+**   vid\.v\s+v[0-9]+
+**   vadd\.vi\s+v[0-9]+,\s*v[0-9]+,\s*-16
+**   vs1r\.v\s+v[0-9]+,\s*0\([a-x0-9]+\)
+**   ret
+*/
+void
+foo5 (int16_t *__restrict out)
+{
+  vnx16i v
+    = {0-16, 1-16,2-16,3-16,4-16,5-16,6-16,7-16,8-16,9-16,10-16,11-16,12-16,13-16,14-16,15-16};
+  *(vnx16i *) out = v;
+}
-- 
2.36.1


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] RISC-V: Fix vec_series expander[PR110985]
  2023-08-11  8:45 [PATCH] RISC-V: Fix vec_series expander[PR110985] Juzhe-Zhong
@ 2023-08-11 14:46 ` Jeff Law
  2023-08-12  0:39   ` Li, Pan2
  0 siblings, 1 reply; 3+ messages in thread
From: Jeff Law @ 2023-08-11 14:46 UTC (permalink / raw)
  To: Juzhe-Zhong, gcc-patches; +Cc: kito.cheng, kito.cheng, rdapp.gcc



On 8/11/23 02:45, Juzhe-Zhong wrote:
> This patch fix bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110985
> 
> 	PR target/110985
> 
> gcc/ChangeLog:
> 
> 	* config/riscv/riscv-v.cc (expand_vec_series): Refactor the expander.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.target/riscv/rvv/autovec/vls-vlmax/pr110985.c: New test.
OK.  The wording on the ChangeLog could perhaps be improved -- typically 
when one says "refactor" there's not supposed to be a functional change. 
  So perhaps "Refactor the expander and don't lose final assignment" or 
something like that.

Also it's generally useful to reviewers to explain the core problem.  I 
can guess from the BZ that we lost an assignment and I can speculate it 
was a case when the destination wasn't initially a pseudo.  The 
refactoring ensured that the sequence always stores into a pseudo and if 
that pseudo is not the same as the ultimate target, then we copy from 
the pseudo to the ultimate target when expansion is done.

jeff

^ permalink raw reply	[flat|nested] 3+ messages in thread

* RE: [PATCH] RISC-V: Fix vec_series expander[PR110985]
  2023-08-11 14:46 ` Jeff Law
@ 2023-08-12  0:39   ` Li, Pan2
  0 siblings, 0 replies; 3+ messages in thread
From: Li, Pan2 @ 2023-08-12  0:39 UTC (permalink / raw)
  To: Jeff Law, Juzhe-Zhong, gcc-patches; +Cc: kito.cheng, kito.cheng, rdapp.gcc

Committed, thanks Jeff.

Pan

-----Original Message-----
From: Gcc-patches <gcc-patches-bounces+pan2.li=intel.com@gcc.gnu.org> On Behalf Of Jeff Law via Gcc-patches
Sent: Friday, August 11, 2023 10:47 PM
To: Juzhe-Zhong <juzhe.zhong@rivai.ai>; gcc-patches@gcc.gnu.org
Cc: kito.cheng@gmail.com; kito.cheng@sifive.com; rdapp.gcc@gmail.com
Subject: Re: [PATCH] RISC-V: Fix vec_series expander[PR110985]



On 8/11/23 02:45, Juzhe-Zhong wrote:
> This patch fix bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110985
> 
> 	PR target/110985
> 
> gcc/ChangeLog:
> 
> 	* config/riscv/riscv-v.cc (expand_vec_series): Refactor the expander.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.target/riscv/rvv/autovec/vls-vlmax/pr110985.c: New test.
OK.  The wording on the ChangeLog could perhaps be improved -- typically 
when one says "refactor" there's not supposed to be a functional change. 
  So perhaps "Refactor the expander and don't lose final assignment" or 
something like that.

Also it's generally useful to reviewers to explain the core problem.  I 
can guess from the BZ that we lost an assignment and I can speculate it 
was a case when the destination wasn't initially a pseudo.  The 
refactoring ensured that the sequence always stores into a pseudo and if 
that pseudo is not the same as the ultimate target, then we copy from 
the pseudo to the ultimate target when expansion is done.

jeff

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2023-08-12  0:39 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-08-11  8:45 [PATCH] RISC-V: Fix vec_series expander[PR110985] Juzhe-Zhong
2023-08-11 14:46 ` Jeff Law
2023-08-12  0:39   ` Li, Pan2

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).