[gcc(refs/vendors/riscv/heads/gcc-13-with-riscv-opts)] MATCH: Optimize COND_ADD

public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed

* [gcc(refs/vendors/riscv/heads/gcc-13-with-riscv-opts)] MATCH: Optimize COND_ADD_LEN reduction pattern
@ 2023-09-26 15:13 Jeff Law
  0 siblings, 0 replies; only message in thread
From: Jeff Law @ 2023-09-26 15:13 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:722e490ac8638c18464c1cff3460af733ec1a815

commit 722e490ac8638c18464c1cff3460af733ec1a815
Author: Juzhe-Zhong <juzhe.zhong@rivai.ai>
Date:   Tue Sep 26 17:50:37 2023 +0800

    MATCH: Optimize COND_ADD_LEN reduction pattern
    
    This patch leverage this commit: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=62b505a4d5fc89
    to optimize COND_LEN_ADD reduction pattern.
    
    We are doing optimization of VEC_COND_EXPR + COND_LEN_ADD -> COND_LEN_ADD.
    
    Consider thsi following case:
    
    void
    pr11594 (uint64_t *restrict a, uint64_t *restrict b, int loop_size)
    {
      uint64_t result = 0;
    
      for (int i = 0; i < loop_size; i++)
        {
          if (b[i] <= a[i])
            {
              result += a[i];
            }
        }
    
      a[0] = result;
    }
    
    Before this patch:
            vsetvli a7,zero,e64,m1,ta,ma
            vmv.v.i v2,0
            vmv1r.v v3,v2                    --- redundant
    .L3:
            vsetvli a5,a2,e64,m1,ta,ma
            vle64.v v1,0(a3)
            vle64.v v0,0(a1)
            slli    a6,a5,3
            vsetvli a7,zero,e64,m1,ta,ma
            sub     a2,a2,a5
            vmsleu.vv       v0,v0,v1
            add     a1,a1,a6
            vmerge.vvm      v1,v3,v1,v0     ---- redundant.
            add     a3,a3,a6
            vsetvli zero,a5,e64,m1,tu,ma
            vadd.vv v2,v2,v1
            bne     a2,zero,.L3
            li      a5,0
            vsetvli a4,zero,e64,m1,ta,ma
            vmv.s.x v1,a5
            vredsum.vs      v2,v2,v1
            vmv.x.s a5,v2
            sd      a5,0(a0)
            ret
    
    After this patch:
    
            vsetvli a6,zero,e64,m1,ta,ma
            vmv.v.i v1,0
    .L3:
            vsetvli a5,a2,e64,m1,ta,ma
            vle64.v v2,0(a4)
            vle64.v v0,0(a1)
            slli    a3,a5,3
            vsetvli a6,zero,e64,m1,ta,ma
            sub     a2,a2,a5
            vmsleu.vv       v0,v0,v2
            add     a1,a1,a3
            vsetvli zero,a5,e64,m1,tu,mu
            add     a4,a4,a3
            vadd.vv v1,v1,v2,v0.t
            bne     a2,zero,.L3
            li      a5,0
            vsetivli        zero,1,e64,m1,ta,ma
            vmv.s.x v2,a5
            vsetvli a5,zero,e64,m1,ta,ma
            vredsum.vs      v1,v1,v2
            vmv.x.s a5,v1
            sd      a5,0(a0)
            ret
    
    Bootstrap && Regression is running.
    
    Ok for trunk when testing passes ?
    
            PR tree-optimization/111594
            PR tree-optimization/110660
    
    gcc/ChangeLog:
    
            * match.pd: Optimize COND_LEN_ADD reduction.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/riscv/rvv/autovec/cond/cond_reduc-1.c: New test.
            * gcc.target/riscv/rvv/autovec/cond/pr111594.c: New test.
    
    (cherry picked from commit dd0197fb4cdee8cd5f78fea9a965c96d7ca47229)

Diff:
---
 gcc/match.pd                                       | 15 +++++++++++
 .../riscv/rvv/autovec/cond/cond_reduc-1.c          | 29 ++++++++++++++++++++++
 .../gcc.target/riscv/rvv/autovec/cond/pr111594.c   | 22 ++++++++++++++++
 3 files changed, 66 insertions(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index ce45bc8d5f0..efcf37c16a4 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -7983,6 +7983,21 @@ and,
   (IFN_COND_ADD @0 @1 (vec_cond @2 @3 integer_zerop) @1)
    (IFN_COND_ADD (bit_and @0 @2) @1 @3 @1))
 
+/* Detect simplication for a conditional length reduction where
+
+   a = mask ? b : 0
+   c = i < len + bias ? d + a : d
+
+   is turned into
+
+   c = mask && i < len + bias ? d + b : d.  */
+(simplify
+  (IFN_COND_LEN_ADD integer_truep @0 (vec_cond @1 @2 zerop@5) @0 @3 @4)
+   (if (ANY_INTEGRAL_TYPE_P (type)
+	|| (FLOAT_TYPE_P (type)
+	    && fold_real_zero_addition_p (type, NULL_TREE, @5, 0)))
+    (IFN_COND_LEN_ADD @1 @0 @2 @0 @3 @4)))
+
 /* For pointers @0 and @2 and nonnegative constant offset @1, look for
    expressions like:
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_reduc-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_reduc-1.c
new file mode 100644
index 00000000000..db6f9d1ec6c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_reduc-1.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv64gcv_zvfh -mabi=lp64d -fno-vect-cost-model -ffast-math -fdump-tree-optimized" } */
+
+#include <stdint-gcc.h>
+
+#define COND_REDUCTION(TYPE)                                                   \
+  TYPE foo##TYPE (TYPE *restrict a, TYPE *restrict b, int loop_size)           \
+  {                                                                            \
+    TYPE result = 0;                                                           \
+    for (int i = 0; i < loop_size; i++)                                        \
+      if (b[i] <= a[i])                                                        \
+	result += a[i];                                                        \
+    return result;                                                             \
+  }
+
+COND_REDUCTION (int8_t)
+COND_REDUCTION (int16_t)
+COND_REDUCTION (int32_t)
+COND_REDUCTION (int64_t)
+COND_REDUCTION (uint8_t)
+COND_REDUCTION (uint16_t)
+COND_REDUCTION (uint32_t)
+COND_REDUCTION (uint64_t)
+COND_REDUCTION (_Float16)
+COND_REDUCTION (float)
+COND_REDUCTION (double)
+
+/* { dg-final { scan-tree-dump-not "VCOND_MASK" "optimized" } } */
+/* { dg-final { scan-tree-dump-times "COND_LEN_ADD" 11 "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/pr111594.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/pr111594.c
new file mode 100644
index 00000000000..6d81b26fbd0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/pr111594.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d -fno-vect-cost-model -ffast-math" } */
+
+#include <stdint-gcc.h>
+
+void
+pr11594 (uint64_t *restrict a, uint64_t *restrict b, int loop_size)
+{
+  uint64_t result = 0;
+
+  for (int i = 0; i < loop_size; i++)
+    {
+      if (b[i] <= a[i])
+	{
+	  result += a[i];
+	}
+    }
+
+  a[0] = result;
+}
+
+/* { dg-final { scan-assembler-not {vmerge} } } */

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-09-26 15:13 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-09-26 15:13 [gcc(refs/vendors/riscv/heads/gcc-13-with-riscv-opts)] MATCH: Optimize COND_ADD_LEN reduction pattern Jeff Law

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).