public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: "juzhe.zhong@rivai.ai" <juzhe.zhong@rivai.ai>
To: kito.cheng <kito.cheng@gmail.com>
Cc: gcc-patches <gcc-patches@gcc.gnu.org>,
	 Kito.cheng <kito.cheng@sifive.com>,
	 jeffreyalaw <jeffreyalaw@gmail.com>,
	 "Robin Dapp" <rdapp.gcc@gmail.com>
Subject: Re: Re: [PATCH] RISC-V: Allow LICM hoist POLY_INT configuration code sequence
Date: Sun, 4 Feb 2024 10:03:49 +0800	[thread overview]
Message-ID: <4AF20724FCB5A950+202402041003488289149@rivai.ai> (raw)
In-Reply-To: <CA+yXCZC02m59j6xZz9F_RMMK71Y_UxUedWdOZLpVsMyN7uBibg@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 12920 bytes --]

Hi,  kito and Robin and Jeff.
 
I didn't commit this patch yet since I found there is an ICE caused by this patch:

during RTL pass: loop2_unroll
dump file: bug.c.286r.loop2_unroll
bug.c: In function 'crashIt':
bug.c:23:1: internal compiler error: in decompose, at wide-int.h:1049
   23 | }
      | ^
0x1043946 wi::int_traits<generic_wide_int<wide_int_ref_storage<false, false> > >::decompose(long*, unsigned int, generic_wide_int<wide_int_ref_storage<false, false> > const&)
        ../../../../gcc/gcc/wide-int.h:1049
0x1043a80 wide_int_ref_storage<false, false>::wide_int_ref_storage<generic_wide_int<wide_int_ref_storage<false, false> > >(generic_wide_int<wide_int_ref_storage<false, false> > const&, unsigned int)
        ../../../../gcc/gcc/wide-int.h:1099
0x1042f72 generic_wide_int<wide_int_ref_storage<false, false> >::generic_wide_int<generic_wide_int<wide_int_ref_storage<false, false> > >(generic_wide_int<wide_int_ref_storage<false, false> > const&, unsigned int)
        ../../../../gcc/gcc/wide-int.h:855
0x145b5d0 wi::binary_traits<generic_wide_int<wide_int_ref_storage<false, false> >, generic_wide_int<wide_int_ref_storage<false, false> >, wi::int_traits<generic_wide_int<wide_int_ref_storage<false, false> > >::precision_type, wi::int_traits<generic_wide_int<wide_int_ref_storage<false, false> > >::precision_type>::result_type wi::add<generic_wide_int<wide_int_ref_storage<false, false> >, generic_wide_int<wide_int_ref_storage<false, false> > >(generic_wide_int<wide_int_ref_storage<false, false> > const&, generic_wide_int<wide_int_ref_storage<false, false> > const&)
        ../../../../gcc/gcc/wide-int.h:2872
0x1458439 wi::binary_traits<generic_wide_int<wide_int_ref_storage<false, false> >, generic_wide_int<wide_int_ref_storage<false, false> >, wi::int_traits<generic_wide_int<wide_int_ref_storage<false, false> > >::precision_type, wi::int_traits<generic_wide_int<wide_int_ref_storage<false, false> > >::precision_type>::operator_result operator+<generic_wide_int<wide_int_ref_storage<false, false> >, generic_wide_int<wide_int_ref_storage<false, false> > >(generic_wide_int<wide_int_ref_storage<false, false> > const&, generic_wide_int<wide_int_ref_storage<false, false> > const&)
        ../../../../gcc/gcc/wide-int.h:3857
0x195f866 poly_int<2u, poly_result<generic_wide_int<wide_int_ref_storage<false, false> >, generic_wide_int<wide_int_ref_storage<false, false> >, poly_coeff_pair_traits<generic_wide_int<wide_int_ref_storage<false, false> >, generic_wide_int<wide_int_ref_storage<false, false> > >::result_kind>::type> operator+<2u, generic_wide_int<wide_int_ref_storage<false, false> >, generic_wide_int<wide_int_ref_storage<false, false> > >(poly_int<2u, generic_wide_int<wide_int_ref_storage<false, false> > > const&, poly_int<2u, generic_wide_int<wide_int_ref_storage<false, false> > > const&)
        ../../../../gcc/gcc/poly-int.h:772
0x194d423 simplify_const_binary_operation(rtx_code, machine_mode, rtx_def*, rtx_def*)
        ../../../../gcc/gcc/simplify-rtx.cc:5392
0x1940374 simplify_context::simplify_binary_operation(rtx_code, machine_mode, rtx_def*, rtx_def*)
        ../../../../gcc/gcc/simplify-rtx.cc:2664
0x1936e62 simplify_context::simplify_gen_binary(rtx_code, machine_mode, rtx_def*, rtx_def*)
        ../../../../gcc/gcc/simplify-rtx.cc:182
0x11b43f6 simplify_gen_binary(rtx_code, machine_mode, rtx_def*, rtx_def*)
        ../../../../gcc/gcc/rtl.h:3529
0x16c0e35 get_biv_step_1
        ../../../../gcc/gcc/loop-iv.cc:788
0x16c0c97 get_biv_step_1
        ../../../../gcc/gcc/loop-iv.cc:758
0x16c0f68 get_biv_step
        ../../../../gcc/gcc/loop-iv.cc:828
0x16c1390 iv_analyze_biv
        ../../../../gcc/gcc/loop-iv.cc:921
0x16c1e7d iv_analyze_op
        ../../../../gcc/gcc/loop-iv.cc:1187
0x16c1d71 iv_analyze_op
        ../../../../gcc/gcc/loop-iv.cc:1157
0x16c15e0 iv_analyze_expr(rtx_insn*, scalar_int_mode, rtx_def*, rtx_iv*)
        ../../../../gcc/gcc/loop-iv.cc:976
0x16c1757 iv_analyze_expr(rtx_insn*, scalar_int_mode, rtx_def*, rtx_iv*)
        ../../../../gcc/gcc/loop-iv.cc:1020
0x16c1757 iv_analyze_expr(rtx_insn*, scalar_int_mode, rtx_def*, rtx_iv*)
        ../../../../gcc/gcc/loop-iv.cc:1020
0x16c1b83 iv_analyze_def
        ../../../../gcc/gcc/loop-iv.cc:1115

To reproduce this ICE:

with compile option:  -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions

typedef unsigned short (FUNC_P) (void *, unsigned char *, unsigned short);

void crashIt(int id, FUNC_P *func, unsigned char *funcparm)
{
  unsigned char buff[5], reverse[4];
  unsigned char *bp = buff;
  unsigned char *rp = reverse;
  unsigned short int count = 0;
  unsigned short cnt;
  while (id > 0)
    {
      *rp++ = (unsigned char) (id & 0x7F);
      id >>= 7;
      count++;
    }
  cnt = count + 1;
  while ((count--) > 1)
    {
      *bp++ = (unsigned char)(*(--rp) | 0x80);
    }
  *bp++ = *(--rp);
  (void)(*func)(funcparm, buff, cnt);
}

The root cause is this following RTL pattern, after fwprop1:

(insn 82 78 84 9 (set (reg:DI 230)
        (sign_extend:DI (minus:SI (subreg/s/v:SI (reg:DI 150 [ niters.10 ]) 0)
                (subreg:SI (reg:DI 221) 0)))) 13 {subsi3_extended}
     (expr_list:REG_EQUAL (sign_extend:DI (plus:SI (subreg/s/v:SI (reg:DI 150 [ niters.10 ]) 0)
                (const_poly_int:SI [-16, -16])))
        (nil)))

The highlight (const_poly_int:SI [-16, -16])
causes ICE.

This RTL is because:
(insn 69 68 71 8 (set (reg:DI 221)
        (const_poly_int:DI [16, 16])) 208 {*movdi_64bit}
     (nil))
(insn 82 78 84 9 (set (reg:DI 230)
        (sign_extend:DI (minus:SI (subreg/s/v:SI (reg:DI 150 [ niters.10 ]) 0)
                (subreg:SI (reg:DI 221) 0)))) 13 {subsi3_extended}                                          ----> (subreg:SI (const_poly_int:SI [-16, -16])) fwprop1 add  (const_poly_int:SI [-16, -16]) reg_equal
     (expr_list:REG_EQUAL (sign_extend:DI (plus:SI (subreg/s/v:SI (reg:DI 150 [ niters.10 ]) 0)
                (const_poly_int:SI [-16, -16])))
        (nil)))

Previously, we are doing:

(set (subreg:DI (reg:SI)  (DI: poly value)). --> outer mode bigger than inner mode in dest operand.

We never has (subreg: (poly_value)), so we won't have ICE. However, I don't think our previous approach is correct.

Actually, I believe we should apply this following which should be better:

 (set (reg:SI)  (subreg:SI (DI: poly value))) but it causes ICE that I mentioned above.

Also, I try this following which can fix this issue:

diff --git a/gcc/loop-iv.cc b/gcc/loop-iv.cc
index eb7e923a38b..09750951845 100644
--- a/gcc/loop-iv.cc
+++ b/gcc/loop-iv.cc
@@ -646,10 +646,10 @@ get_biv_step_1 (df_ref def, scalar_int_mode outer_mode, rtx reg,
   if (!set)
     return false;

-  rhs = find_reg_equal_equiv_note (insn);
-  if (rhs)
-    rhs = XEXP (rhs, 0);
-  else
+  //rhs = find_reg_equal_equiv_note (insn);
+  //if (rhs)
+  //  rhs = XEXP (rhs, 0);
+  //else
     rhs = SET_SRC (set);

Any thoughts ?





juzhe.zhong@rivai.ai
 
From: Kito Cheng
Date: 2024-02-02 16:50
To: Juzhe-Zhong
CC: gcc-patches; kito.cheng; jeffreyalaw; rdapp.gcc
Subject: Re: [PATCH] RISC-V: Allow LICM hoist POLY_INT configuration code sequence
LGTM :)
 
On Thu, Feb 1, 2024 at 11:46 PM Juzhe-Zhong <juzhe.zhong@rivai.ai> wrote:
>
> Realize in recent benchmark evaluation (coremark-pro zip-test):
>
>         vid.v   v2
>         vmv.v.i v5,0
> .L9:
>         vle16.v v3,0(a4)
>         vrsub.vx        v4,v2,a6   ---> LICM failed to hoist it outside the loop.
>
> The root cause is:
>
> (insn 56 47 57 4 (set (subreg:DI (reg:HI 220) 0)
>         (reg:DI 223)) "rvv.c":11:9 208 {*movdi_64bit}  -> Its result used by the following vrsub.vx then supress the hoist of the vrsub.vx
>      (nil))
>
> (insn 57 56 59 4 (set (reg:RVVMF2HI 216)
>         (if_then_else:RVVMF2HI (unspec:RVVMF32BI [
>                     (const_vector:RVVMF32BI repeat [
>                             (const_int 1 [0x1])
>                         ])
>                     (reg:DI 350)
>                     (const_int 2 [0x2]) repeated x2
>                     (const_int 1 [0x1])
>                     (reg:SI 66 vl)
>                     (reg:SI 67 vtype)
>                 ] UNSPEC_VPREDICATE)
>             (minus:RVVMF2HI (vec_duplicate:RVVMF2HI (reg:HI 220))
>                 (reg:RVVMF2HI 217))
>             (unspec:RVVMF2HI [
>                     (reg:DI 0 zero)
>                 ] UNSPEC_VUNDEF))) "rvv.c":11:9 6938 {pred_subrvvmf2hi_reverse_scalar}
>      (expr_list:REG_DEAD (reg:HI 220)
>         (nil)))
>
> This patch fixes it generate (set (reg:HI) (subreg:HI (reg:DI))) instead of (set (subreg:DI (reg:DI)) (reg:DI)).
>
> After this patch:
>
>         vid.v   v2
>         vrsub.vx        v2,v2,a7
>         vmv.v.i v4,0
> .L3:
>         vle16.v v3,0(a4)
>
> Tested on both RV32 and RV64 no regression.
>
> gcc/ChangeLog:
>
>         * config/riscv/riscv.cc (riscv_legitimize_move): Fix poly_int dest generation.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/riscv/rvv/autovec/poly_licm-1.c: New test.
>         * gcc.target/riscv/rvv/autovec/poly_licm-2.c: New test.
>
> ---
>  gcc/config/riscv/riscv.cc                     |  9 ++++---
>  .../riscv/rvv/autovec/poly_licm-1.c           | 18 +++++++++++++
>  .../riscv/rvv/autovec/poly_licm-2.c           | 27 +++++++++++++++++++
>  3 files changed, 50 insertions(+), 4 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/poly_licm-1.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/poly_licm-2.c
>
> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
> index 529ef5e84b7..6e22b43e618 100644
> --- a/gcc/config/riscv/riscv.cc
> +++ b/gcc/config/riscv/riscv.cc
> @@ -2711,16 +2711,17 @@ riscv_legitimize_move (machine_mode mode, rtx dest, rtx src)
>                                     (const_poly_int:HI [m, n])
>                                     (const_poly_int:SI [m, n]).  */
>           rtx tmp = gen_reg_rtx (Pmode);
> -         riscv_legitimize_poly_move (Pmode, gen_lowpart (Pmode, dest), tmp,
> -                                     src);
> +         rtx tmp2 = gen_reg_rtx (Pmode);
> +         riscv_legitimize_poly_move (Pmode, tmp2, tmp, src);
> +         emit_move_insn (dest, gen_lowpart (mode, tmp2));
>         }
>        else
>         {
>           /* In RV32 system, handle (const_poly_int:SI [m, n])
>                                     (const_poly_int:DI [m, n]).
>              In RV64 system, handle (const_poly_int:DI [m, n]).
> -       FIXME: Maybe we could gen SImode in RV32 and then sign-extend to DImode,
> -       the offset should not exceed 4GiB in general.  */
> +            FIXME: Maybe we could gen SImode in RV32 and then sign-extend to
> +            DImode, the offset should not exceed 4GiB in general.  */
>           rtx tmp = gen_reg_rtx (mode);
>           riscv_legitimize_poly_move (mode, dest, tmp, src);
>         }
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/poly_licm-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/poly_licm-1.c
> new file mode 100644
> index 00000000000..b7da65f0996
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/poly_licm-1.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-schedule-insns -fno-schedule-insns2" } */
> +
> +extern int wsize;
> +
> +typedef unsigned short Posf;
> +#define NIL 0
> +
> +void foo (Posf *p)
> +{
> +  register unsigned n, m;
> +  do {
> +      m = *--p;
> +      *p = (Posf)(m >= wsize ? m-wsize : NIL);
> +  } while (--n);
> +}
> +
> +/* { dg-final { scan-assembler-times {vid\.v\s+v[0-9]+\s+addi\s+\s*[a-x0-9]+,\s*[a-x0-9]+,\s*-1\s+vrsub\.vx\s+} 1 } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/poly_licm-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/poly_licm-2.c
> new file mode 100644
> index 00000000000..ffb3c63149f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/poly_licm-2.c
> @@ -0,0 +1,27 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-schedule-insns -fno-schedule-insns2" } */
> +
> +typedef unsigned short uint16_t;
> +
> +void AAA (uint16_t *x, uint16_t *y, unsigned wsize, unsigned count)
> +{
> +  unsigned m = 0, n = count;
> +  register uint16_t *p;
> +
> +  p = x;
> +
> +  do {
> +    m = *--p;
> +    *p = (uint16_t)(m >= wsize ? m-wsize : 0);
> +  } while (--n);
> +
> +  n = wsize;
> +  p = y;
> +
> +  do {
> +      m = *--p;
> +      *p = (uint16_t)(m >= wsize ? m-wsize : 0);
> +  } while (--n);
> +}
> +
> +/* { dg-final { scan-assembler-times {vid\.v\s+v[0-9]+\s+vrsub\.vx\s+} 2 } } */
> --
> 2.36.1
>
 

  reply	other threads:[~2024-02-04  2:03 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-02-01 15:45 Juzhe-Zhong
2024-02-02  8:50 ` Kito Cheng
2024-02-04  2:03   ` juzhe.zhong [this message]
2024-02-06 13:14     ` Robin Dapp
2024-02-18  2:49       ` juzhe.zhong
2024-03-19  4:09       ` Jeff Law

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4AF20724FCB5A950+202402041003488289149@rivai.ai \
    --to=juzhe.zhong@rivai.ai \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=jeffreyalaw@gmail.com \
    --cc=kito.cheng@gmail.com \
    --cc=kito.cheng@sifive.com \
    --cc=rdapp.gcc@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).