public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Lehua Ding <lehua.ding@rivai.ai>
To: gcc-patches@gcc.gnu.org
Cc: juzhe.zhong@rivai.ai, kito.cheng@gmail.com, rdapp.gcc@gmail.com,
	palmer@rivosinc.com, jeffreyalaw@gmail.com, lehua.ding@rivai.ai
Subject: [PATCH V3] RISC-V: Support combine cond extend and reduce sum to widen reduce sum
Date: Thu, 21 Sep 2023 13:32:59 +0800	[thread overview]
Message-ID: <20230921053259.1382886-1-lehua.ding@rivai.ai> (raw)

V3 Change: Back to the original method.

This patch support combining cond extend and reduce_sum to cond widen reduce_sum
like combine the following three insns:
   (set (reg:RVVM2HI 149)
        (if_then_else:RVVM2HI
          (unspec:RVVMF8BI [
            (const_vector:RVVMF8BI repeat [
              (const_int 1 [0x1])
            ])
            (reg:DI 146)
            (const_int 2 [0x2]) repeated x2
            (const_int 1 [0x1])
            (reg:SI 66 vl)
            (reg:SI 67 vtype)
          ] UNSPEC_VPREDICATE)
         (const_vector:RVVM2HI repeat [
           (const_int 0 [0])
         ])
         (unspec:RVVM2HI [
           (reg:SI 0 zero)
         ] UNSPEC_VUNDEF)))
  (set (reg:RVVM2HI 138)
    (if_then_else:RVVM2HI
      (reg:RVVMF8BI 135)
      (reg:RVVM2HI 148)
      (reg:RVVM2HI 149)))
  (set (reg:HI 150)
    (unspec:HI [
      (reg:RVVM2HI 138)
    ] UNSPEC_REDUC_SUM))
into one insn:
  (set (reg:SI 147)
    (unspec:SI [
      (if_then_else:RVVM2SI
        (reg:RVVMF16BI 135)
        (sign_extend:RVVM2SI (reg:RVVM1HI 136))
        (if_then_else:RVVM2HI
          (unspec:RVVMF8BI [
            (const_vector:RVVMF8BI repeat [
              (const_int 1 [0x1])
            ])
            (reg:DI 146)
            (const_int 2 [0x2]) repeated x2
            (const_int 1 [0x1])
            (reg:SI 66 vl)
            (reg:SI 67 vtype)
          ] UNSPEC_VPREDICATE)
         (const_vector:RVVM2HI repeat [
           (const_int 0 [0])
         ])
         (unspec:RVVM2HI [
           (reg:SI 0 zero)
         ] UNSPEC_VUNDEF)))
    ] UNSPEC_REDUC_SUM))

Consider the following C code:

int16_t foo (int8_t *restrict a, int8_t *restrict pred)
{
  int16_t sum = 0;
  for (int i = 0; i < 16; i += 1)
    if (pred[i])
      sum += a[i];
  return sum;
}

assembly before this patch:

foo:
        vsetivli        zero,16,e16,m2,ta,ma
        li      a5,0
        vmv.v.i v2,0
        vsetvli zero,zero,e8,m1,ta,ma
        vl1re8.v        v0,0(a1)
        vmsne.vi        v0,v0,0
        vsetvli zero,zero,e16,m2,ta,mu
        vle8.v  v4,0(a0),v0.t
        vmv.s.x v1,a5
        vsext.vf2       v2,v4,v0.t
        vredsum.vs      v2,v2,v1
        vmv.x.s a0,v2
        slliw   a0,a0,16
        sraiw   a0,a0,16
        ret

assembly after this patch:

foo:
	li	a5,0
	vsetivli	zero,16,e16,m1,ta,ma
	vmv.s.x	v3,a5
	vsetivli	zero,16,e8,m1,ta,ma
	vl1re8.v	v0,0(a1)
	vmsne.vi	v0,v0,0
	vle8.v	v2,0(a0),v0.t
	vwredsum.vs	v1,v2,v3,v0.t
	vsetivli	zero,0,e16,m1,ta,ma
	vmv.x.s	a0,v1
	slliw	a0,a0,16
	sraiw	a0,a0,16
	ret

gcc/ChangeLog:

	* config/riscv/autovec-opt.md (*cond_widen_reduc_plus_scal_<mode>):
	New combine patterns.
	* config/riscv/riscv-protos.h (enum insn_type): New insn_type.
	(enum avl_type): New avl_type for VLS mode.
	* config/riscv/riscv-v.cc: Add VLS avl_type for VLS mode.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c: New test.
	* gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c: New test.
	* gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc_run-1.c: New test.
	* gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc_run-2.c: New test.

---
 gcc/config/riscv/autovec-opt.md               | 72 +++++++++++++++++++
 gcc/config/riscv/riscv-protos.h               |  6 +-
 gcc/config/riscv/riscv-v.cc                   |  9 ++-
 .../rvv/autovec/cond/cond_widen_reduc-1.c     | 30 ++++++++
 .../rvv/autovec/cond/cond_widen_reduc-2.c     | 30 ++++++++
 .../rvv/autovec/cond/cond_widen_reduc_run-1.c | 28 ++++++++
 .../rvv/autovec/cond/cond_widen_reduc_run-2.c | 28 ++++++++
 7 files changed, 198 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc_run-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc_run-2.c

diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index a97a095691c..ed9c0777eb9 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -1119,6 +1119,78 @@
   }
   [(set_attr "type" "vfwmuladd")])

+;; Combine mask_extend + vredsum to mask_vwredsum[u]
+;; where the mrege of mask_extend is vector const 0
+(define_insn_and_split "*cond_widen_reduc_plus_scal_<mode>"
+  [(set (match_operand:<V_DOUBLE_EXTEND_VEL> 0 "register_operand")
+        (unspec:<V_DOUBLE_EXTEND_VEL> [
+          (if_then_else:<V_DOUBLE_EXTEND>
+            (match_operand:<VM> 1 "register_operand")
+            (any_extend:<V_DOUBLE_EXTEND>
+              (match_operand:VI_QHS_NO_M8 2 "register_operand"))
+            (if_then_else:<V_DOUBLE_EXTEND>
+              (unspec:<VM> [
+                (match_operand:<VM> 3 "vector_all_trues_mask_operand")
+                (match_operand 6 "vector_length_operand")
+                (match_operand 7 "const_int_operand")
+                (match_operand 8 "const_int_operand")
+                (match_operand 9 "const_1_or_2_operand")
+                (reg:SI VL_REGNUM)
+                (reg:SI VTYPE_REGNUM)
+              ] UNSPEC_VPREDICATE)
+              (match_operand:<V_DOUBLE_EXTEND> 5 "vector_const_0_operand")
+              (match_operand:<V_DOUBLE_EXTEND> 4 "vector_merge_operand")))
+        ] UNSPEC_REDUC_SUM))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx ops[] = {operands[0], operands[2], operands[1],
+               gen_int_mode (GET_MODE_NUNITS (<MODE>mode), Pmode)};
+  riscv_vector::expand_reduction (<WREDUC_UNSPEC>,
+                                  riscv_vector::REDUCE_OP_M,
+                                  ops, CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode));
+  DONE;
+}
+[(set_attr "type" "vector")])
+
+;; Combine mask_extend + vfredsum to mask_vfwredusum
+;; where the mrege of mask_extend is vector const 0
+(define_insn_and_split "*cond_widen_reduc_plus_scal_<mode>"
+  [(set (match_operand:<V_DOUBLE_EXTEND_VEL> 0 "register_operand")
+        (unspec:<V_DOUBLE_EXTEND_VEL> [
+          (if_then_else:<V_DOUBLE_EXTEND>
+            (match_operand:<VM> 1 "register_operand")
+            (float_extend:<V_DOUBLE_EXTEND>
+              (match_operand:VF_HS_NO_M8 2 "register_operand"))
+            (if_then_else:<V_DOUBLE_EXTEND>
+              (unspec:<VM> [
+                (match_operand:<VM> 3 "vector_all_trues_mask_operand")
+                (match_operand 6 "vector_length_operand")
+                (match_operand 7 "const_int_operand")
+                (match_operand 8 "const_int_operand")
+                (match_operand 9 "const_1_or_2_operand")
+                (reg:SI VL_REGNUM)
+                (reg:SI VTYPE_REGNUM)
+              ] UNSPEC_VPREDICATE)
+              (match_operand:<V_DOUBLE_EXTEND> 5 "vector_const_0_operand")
+              (match_operand:<V_DOUBLE_EXTEND> 4 "vector_merge_operand")))
+        ] UNSPEC_REDUC_SUM_UNORDERED))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx ops[] = {operands[0], operands[2], operands[1],
+               gen_int_mode (GET_MODE_NUNITS (<MODE>mode), Pmode)};
+  riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_UNORDERED,
+                                  riscv_vector::REDUCE_OP_M_FRM_DYN,
+                                  ops, CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode));
+  DONE;
+}
+[(set_attr "type" "vector")])
+
 ;; =============================================================================
 ;; Misc combine patterns
 ;; =============================================================================
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 9ea0bcf15d3..ec28a613742 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -337,6 +337,7 @@ enum insn_type : unsigned int

   /* For vreduce, no mask policy operand. */
   REDUCE_OP = __NORMAL_OP_TA | BINARY_OP_P | VTYPE_MODE_FROM_OP1_P,
+  REDUCE_OP_M = __MASK_OP_TA | BINARY_OP_P | VTYPE_MODE_FROM_OP1_P,
   REDUCE_OP_FRM_DYN = REDUCE_OP | FRM_DYN_P | VTYPE_MODE_FROM_OP1_P,
   REDUCE_OP_M_FRM_DYN
   = __MASK_OP_TA | BINARY_OP_P | FRM_DYN_P | VTYPE_MODE_FROM_OP1_P,
@@ -366,8 +367,9 @@ enum vlmul_type

 enum avl_type
 {
-  NONVLMAX,
-  VLMAX,
+  NONVLMAX = 0,
+  VLMAX = 1,
+  VLS = 2,
 };
 /* Routines implemented in riscv-vector-builtins.cc.  */
 void init_builtins (void);
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 64a71a128d4..aa872e06ef8 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -284,6 +284,7 @@ public:

     /* Add vl operand.  */
     rtx len = m_vl_op;
+    bool vls_p = false;
     if (m_vlmax_p)
       {
 	if (riscv_v_ext_vls_mode_p (vtype_mode))
@@ -294,7 +295,7 @@ public:
 	    len = gen_int_mode (nunits, Pmode);
 	    if (!satisfies_constraint_K (len))
 	      len = force_reg (Pmode, len);
-	    m_vlmax_p = false;
+	    vls_p = true;
 	  }
 	else if (const_vlmax_p (vtype_mode))
 	  {
@@ -302,7 +303,7 @@ public:
 	       the vsetvli to obtain the value of vlmax.  */
 	    poly_uint64 nunits = GET_MODE_NUNITS (vtype_mode);
 	    len = gen_int_mode (nunits, Pmode);
-	    m_vlmax_p = false;
+	    vls_p = true;
 	  }
 	else if (can_create_pseudo_p ())
 	  {
@@ -318,7 +319,9 @@ public:
     add_policy_operand ();

     /* Add avl_type operand.  */
-    add_avl_type_operand (m_vlmax_p ? avl_type::VLMAX : avl_type::NONVLMAX);
+    add_avl_type_operand (
+      vls_p ? avl_type::VLS
+	    : (m_vlmax_p ? avl_type::VLMAX : avl_type::NONVLMAX));

     /* Add rounding mode operand.  */
     if (m_insn_flags & FRM_DYN_P)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c
new file mode 100644
index 00000000000..22a71048684
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv64gcv_zvfh_zvl128b -mabi=lp64d --param riscv-autovec-preference=fixed-vlmax --param riscv-autovec-lmul=m2 -fno-vect-cost-model -ffast-math" } */
+#include <stdint-gcc.h>
+
+#define TEST_TYPE(TYPE1, TYPE2, N)                                             \
+  __attribute__ ((noipa))                                                      \
+  TYPE1 reduc_##TYPE1##_##TYPE2 (TYPE2 *restrict a, TYPE2 *restrict pred)      \
+  {                                                                            \
+    TYPE1 sum = 0;                                                             \
+    for (int i = 0; i < N; i += 1)                                             \
+      if (pred[i])                                                             \
+	sum += a[i];                                                           \
+    return sum;                                                                \
+  }
+
+#define TEST_ALL(TEST)                                                         \
+  TEST (int16_t, int8_t, 16)                                                   \
+  TEST (int32_t, int16_t, 8)                                                   \
+  TEST (int64_t, int32_t, 4)                                                   \
+  TEST (uint16_t, uint8_t, 16)                                                 \
+  TEST (uint32_t, uint16_t, 8)                                                 \
+  TEST (uint64_t, uint32_t, 4)                                                 \
+  TEST (float, _Float16, 8)                                                    \
+  TEST (double, float, 4)
+
+TEST_ALL (TEST_TYPE)
+
+/* { dg-final { scan-assembler-times {\tvfwredusum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 2 } } */
+/* { dg-final { scan-assembler-times {\tvwredsum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
+/* { dg-final { scan-assembler-times {\tvwredsumu\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c
new file mode 100644
index 00000000000..7c8fedd072b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv64gcv_zvfh_zvl128b -mabi=lp64d --param riscv-autovec-preference=scalable --param riscv-autovec-lmul=m2 -fno-vect-cost-model -ffast-math" } */
+#include <stdint-gcc.h>
+
+#define TEST_TYPE(TYPE1, TYPE2, N)                                             \
+  __attribute__ ((noipa))                                                      \
+  TYPE1 reduc_##TYPE1##_##TYPE2 (TYPE2 *restrict a, TYPE2 *restrict pred)      \
+  {                                                                            \
+    TYPE1 sum = 0;                                                             \
+    for (int i = 0; i < N; i += 1)                                             \
+      if (pred[i])                                                             \
+	sum += a[i];                                                           \
+    return sum;                                                                \
+  }
+
+#define TEST_ALL(TEST)                                                         \
+  TEST (int16_t, int8_t, 16)                                                   \
+  TEST (int32_t, int16_t, 8)                                                   \
+  TEST (int64_t, int32_t, 4)                                                   \
+  TEST (uint16_t, uint8_t, 16)                                                 \
+  TEST (uint32_t, uint16_t, 8)                                                 \
+  TEST (uint64_t, uint32_t, 4)                                                 \
+  TEST (float, _Float16, 8)                                                    \
+  TEST (double, float, 4)
+
+TEST_ALL (TEST_TYPE)
+
+/* { dg-final { scan-assembler-times {\tvfwredusum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 2 } } */
+/* { dg-final { scan-assembler-times {\tvwredsum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
+/* { dg-final { scan-assembler-times {\tvwredsumu\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc_run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc_run-1.c
new file mode 100644
index 00000000000..228df0959b6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc_run-1.c
@@ -0,0 +1,28 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param riscv-autovec-preference=fixed-vlmax --param riscv-autovec-lmul=m2 -fno-vect-cost-model -ffast-math" } */
+
+#include "cond_widen_reduc-1.c"
+
+#define RUN(TYPE1, TYPE2, N)                                                   \
+  {                                                                            \
+    TYPE2 a[N];                                                                \
+    TYPE2 pred[N];                                                             \
+    TYPE1 r = 0;                                                               \
+    for (int i = 0; i < N; i++)                                                \
+      {                                                                        \
+	a[i] = (i * 0.1) * (i & 1 ? 1 : -1);                                   \
+	pred[i] = i % 3;                                                       \
+	if (pred[i])                                                           \
+	  r += a[i];                                                           \
+	asm volatile ("" ::: "memory");                                        \
+      }                                                                        \
+    if (r != reduc_##TYPE1##_##TYPE2 (a, pred))                                \
+      __builtin_abort ();                                                      \
+  }
+
+int __attribute__ ((optimize (1)))
+main ()
+{
+  TEST_ALL (RUN)
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc_run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc_run-2.c
new file mode 100644
index 00000000000..2bf0f5fffda
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc_run-2.c
@@ -0,0 +1,28 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param riscv-autovec-preference=scalable --param riscv-autovec-lmul=m2 -fno-vect-cost-model -ffast-math" } */
+
+#include "cond_widen_reduc-2.c"
+
+#define RUN(TYPE1, TYPE2, N)                                                   \
+  {                                                                            \
+    TYPE2 a[N];                                                                \
+    TYPE2 pred[N];                                                             \
+    TYPE1 r = 0;                                                               \
+    for (int i = 0; i < N; i++)                                                \
+      {                                                                        \
+	a[i] = (i * 0.1) * (i & 1 ? 1 : -1);                                   \
+	pred[i] = i % 3;                                                       \
+	if (pred[i])                                                           \
+	  r += a[i];                                                           \
+	asm volatile ("" ::: "memory");                                        \
+      }                                                                        \
+    if (r != reduc_##TYPE1##_##TYPE2 (a, pred))                                \
+      __builtin_abort ();                                                      \
+  }
+
+int __attribute__ ((optimize (1)))
+main ()
+{
+  TEST_ALL (RUN)
+  return 0;
+}
--
2.36.3



             reply	other threads:[~2023-09-21  5:33 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-09-21  5:32 Lehua Ding [this message]
2023-09-21  8:12 ` Robin Dapp
2023-09-21  8:27   ` Lehua Ding
2023-09-22  3:03     ` Lehua Ding

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230921053259.1382886-1-lehua.ding@rivai.ai \
    --to=lehua.ding@rivai.ai \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=jeffreyalaw@gmail.com \
    --cc=juzhe.zhong@rivai.ai \
    --cc=kito.cheng@gmail.com \
    --cc=palmer@rivosinc.com \
    --cc=rdapp.gcc@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).