public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: "Li, Pan2" <pan2.li@intel.com>
To: Richard Biener <rguenther@suse.de>, Juzhe-Zhong <juzhe.zhong@rivai.ai>
Cc: "gcc-patches@gcc.gnu.org" <gcc-patches@gcc.gnu.org>,
	"richard.sandiford@arm.com" <richard.sandiford@arm.com>
Subject: RE: [PATCH V3] MATCH: Optimize COND_ADD_LEN reduction pattern
Date: Tue, 26 Sep 2023 12:19:11 +0000	[thread overview]
Message-ID: <MW5PR11MB590868DD7465B965E5F322CBA9C3A@MW5PR11MB5908.namprd11.prod.outlook.com> (raw)
In-Reply-To: <nycvar.YFH.7.77.849.2309261134250.5561@jbgna.fhfr.qr>

Committed as passed x86 bootstrap and regression test, thanks Richard.

Pan

-----Original Message-----
From: Richard Biener <rguenther@suse.de> 
Sent: Tuesday, September 26, 2023 7:35 PM
To: Juzhe-Zhong <juzhe.zhong@rivai.ai>
Cc: gcc-patches@gcc.gnu.org; richard.sandiford@arm.com
Subject: Re: [PATCH V3] MATCH: Optimize COND_ADD_LEN reduction pattern

On Tue, 26 Sep 2023, Juzhe-Zhong wrote:

> This patch leverage this commit: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=62b505a4d5fc89
> to optimize COND_LEN_ADD reduction pattern.
> 
> We are doing optimization of VEC_COND_EXPR + COND_LEN_ADD -> COND_LEN_ADD.
> 
> Consider thsi following case:
> 
> #include <stdint.h>
> 
> void
> pr11594 (uint64_t *restrict a, uint64_t *restrict b, int loop_size)
> {
>   uint64_t result = 0;
> 
>   for (int i = 0; i < loop_size; i++)
>     {
>       if (b[i] <= a[i])
> 	{
> 	  result += a[i];
> 	}
>     }
> 
>   a[0] = result;
> }
> 
> Before this patch:
>         vsetvli a7,zero,e64,m1,ta,ma
>         vmv.v.i v2,0
>         vmv1r.v v3,v2                    --- redundant
> .L3:
>         vsetvli a5,a2,e64,m1,ta,ma
>         vle64.v v1,0(a3)
>         vle64.v v0,0(a1)
>         slli    a6,a5,3
>         vsetvli a7,zero,e64,m1,ta,ma
>         sub     a2,a2,a5
>         vmsleu.vv       v0,v0,v1
>         add     a1,a1,a6
>         vmerge.vvm      v1,v3,v1,v0     ---- redundant.
>         add     a3,a3,a6
>         vsetvli zero,a5,e64,m1,tu,ma
>         vadd.vv v2,v2,v1
>         bne     a2,zero,.L3
>         li      a5,0
>         vsetvli a4,zero,e64,m1,ta,ma
>         vmv.s.x v1,a5
>         vredsum.vs      v2,v2,v1
>         vmv.x.s a5,v2
>         sd      a5,0(a0)
>         ret
> 
> After this patch:
> 
> 	vsetvli	a6,zero,e64,m1,ta,ma
> 	vmv.v.i	v1,0
> .L3:
> 	vsetvli	a5,a2,e64,m1,ta,ma
> 	vle64.v	v2,0(a4)
> 	vle64.v	v0,0(a1)
> 	slli	a3,a5,3
> 	vsetvli	a6,zero,e64,m1,ta,ma
> 	sub	a2,a2,a5
> 	vmsleu.vv	v0,v0,v2
> 	add	a1,a1,a3
> 	vsetvli	zero,a5,e64,m1,tu,mu
> 	add	a4,a4,a3
> 	vadd.vv	v1,v1,v2,v0.t
> 	bne	a2,zero,.L3
> 	li	a5,0
> 	vsetivli	zero,1,e64,m1,ta,ma
> 	vmv.s.x	v2,a5
> 	vsetvli	a5,zero,e64,m1,ta,ma
> 	vredsum.vs	v1,v1,v2
> 	vmv.x.s	a5,v1
> 	sd	a5,0(a0)
> 	ret
> 
> Bootstrap && Regression is running.
> 
> Ok for trunk when testing passes ?

OK

> 	PR tree-optimization/111594
>         PR tree-optimization/110660
> 
> gcc/ChangeLog:
> 
> 	* match.pd: Optimize COND_LEN_ADD reduction.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.target/riscv/rvv/autovec/cond/cond_reduc-1.c: New test.
> 	* gcc.target/riscv/rvv/autovec/cond/pr111594.c: New test.
> 
> ---
>  gcc/match.pd                                  | 15 ++++++++++
>  .../riscv/rvv/autovec/cond/cond_reduc-1.c     | 29 +++++++++++++++++++
>  .../riscv/rvv/autovec/cond/pr111594.c         | 22 ++++++++++++++
>  3 files changed, 66 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_reduc-1.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/pr111594.c
> 
> diff --git a/gcc/match.pd b/gcc/match.pd
> index a17778fbaa6..3ce90c3333b 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -8866,6 +8866,21 @@ and,
>    (IFN_COND_ADD @0 @1 (vec_cond @2 @3 integer_zerop) @1)
>     (IFN_COND_ADD (bit_and @0 @2) @1 @3 @1))
>  
> +/* Detect simplication for a conditional length reduction where
> +
> +   a = mask ? b : 0
> +   c = i < len + bias ? d + a : d
> +
> +   is turned into
> +
> +   c = mask && i < len + bias ? d + b : d.  */
> +(simplify
> +  (IFN_COND_LEN_ADD integer_truep @0 (vec_cond @1 @2 zerop@5) @0 @3 @4)
> +   (if (ANY_INTEGRAL_TYPE_P (type)
> +	|| (FLOAT_TYPE_P (type)
> +	    && fold_real_zero_addition_p (type, NULL_TREE, @5, 0)))
> +    (IFN_COND_LEN_ADD @1 @0 @2 @0 @3 @4)))
> +
>  /* For pointers @0 and @2 and nonnegative constant offset @1, look for
>     expressions like:
>  
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_reduc-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_reduc-1.c
> new file mode 100644
> index 00000000000..db6f9d1ec6c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_reduc-1.c
> @@ -0,0 +1,29 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=rv64gcv_zvfh -mabi=lp64d -fno-vect-cost-model -ffast-math -fdump-tree-optimized" } */
> +
> +#include <stdint-gcc.h>
> +
> +#define COND_REDUCTION(TYPE)                                                   \
> +  TYPE foo##TYPE (TYPE *restrict a, TYPE *restrict b, int loop_size)           \
> +  {                                                                            \
> +    TYPE result = 0;                                                           \
> +    for (int i = 0; i < loop_size; i++)                                        \
> +      if (b[i] <= a[i])                                                        \
> +	result += a[i];                                                        \
> +    return result;                                                             \
> +  }
> +
> +COND_REDUCTION (int8_t)
> +COND_REDUCTION (int16_t)
> +COND_REDUCTION (int32_t)
> +COND_REDUCTION (int64_t)
> +COND_REDUCTION (uint8_t)
> +COND_REDUCTION (uint16_t)
> +COND_REDUCTION (uint32_t)
> +COND_REDUCTION (uint64_t)
> +COND_REDUCTION (_Float16)
> +COND_REDUCTION (float)
> +COND_REDUCTION (double)
> +
> +/* { dg-final { scan-tree-dump-not "VCOND_MASK" "optimized" } } */
> +/* { dg-final { scan-tree-dump-times "COND_LEN_ADD" 11 "optimized" } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/pr111594.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/pr111594.c
> new file mode 100644
> index 00000000000..6d81b26fbd0
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/pr111594.c
> @@ -0,0 +1,22 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=rv64gcv -mabi=lp64d -fno-vect-cost-model -ffast-math" } */
> +
> +#include <stdint-gcc.h>
> +
> +void
> +pr11594 (uint64_t *restrict a, uint64_t *restrict b, int loop_size)
> +{
> +  uint64_t result = 0;
> +
> +  for (int i = 0; i < loop_size; i++)
> +    {
> +      if (b[i] <= a[i])
> +	{
> +	  result += a[i];
> +	}
> +    }
> +
> +  a[0] = result;
> +}
> +
> +/* { dg-final { scan-assembler-not {vmerge} } } */
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

      reply	other threads:[~2023-09-26 12:19 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-09-26  9:50 Juzhe-Zhong
2023-09-26 11:34 ` Richard Biener
2023-09-26 12:19   ` Li, Pan2 [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=MW5PR11MB590868DD7465B965E5F322CBA9C3A@MW5PR11MB5908.namprd11.prod.outlook.com \
    --to=pan2.li@intel.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=juzhe.zhong@rivai.ai \
    --cc=rguenther@suse.de \
    --cc=richard.sandiford@arm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).