From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-vk1-xa2e.google.com (mail-vk1-xa2e.google.com [IPv6:2607:f8b0:4864:20::a2e]) by sourceware.org (Postfix) with ESMTPS id 274523858CDB for ; Sat, 13 May 2023 03:14:05 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org 274523858CDB Authentication-Results: sourceware.org; dmarc=pass (p=none dis=none) header.from=gmail.com Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=gmail.com Received: by mail-vk1-xa2e.google.com with SMTP id 71dfb90a1353d-452f0e27a86so2160379e0c.3 for ; Fri, 12 May 2023 20:14:05 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20221208; t=1683947644; x=1686539644; h=content-transfer-encoding:cc:to:subject:message-id:date:from :in-reply-to:references:mime-version:from:to:cc:subject:date :message-id:reply-to; bh=27jx0bSNkHj9kHt9veDA2V/d4oESp5q1YedTXHQ/IOw=; b=K/XKR5N9oW8WbVWSqh8mjlXJcvM7Hfj+Q/CEJ83g3SH94Zi+MiBOh1+/7fcEu6uS3C aS2mc6ssgtTxSaK+gnK+mgMPAvifJT8ppTsibTjW+gIwPdPRF5TI1Sa4O+wAgh0cuxyy PjI7P985RJSci8q279sea7w1cvciNB+cs8wmLZHrmXmYSs1a14TfMDZRMekaoknQ+eG4 fRwXmTeAf7RO62s0WyXl8Qy1MX0DCLLnFs0TZ45TS4/SHuBqiqM/Q+L3XqhVetrl6S/b RfzHW9xonielu/4HdHw4YxpUag/enZVa3yWW9dR0VFCxZ6YZYOR6gixwOBvw1cM8iMh5 Tzqw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20221208; t=1683947644; x=1686539644; h=content-transfer-encoding:cc:to:subject:message-id:date:from :in-reply-to:references:mime-version:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=27jx0bSNkHj9kHt9veDA2V/d4oESp5q1YedTXHQ/IOw=; b=ViYn/VqXJSdbNISO0W/WPyQALjaZ19UIx03nCIoNBrkITA5jyUV6sGSSY9BZ1YCLR2 lGAlbBUrNp8Xh8SgLdsFdeFzT3gvm+EKqfLsL4L3ZfTr2n2Ex4AVRAcILtinz9Ldae+0 foi2to9Butgtv0lB6R3ojenQsuSGRJsy7G0nuyiqwjdHCVJQ0D2NymNLd17gUh+TEmnr yhCIax7w5sMvP/j4UrZ4/3VzVF40Ys6I3qnxARgsnqJmSG5xjZ6G3XA4OmuW38pSMyg1 j65DedKernwCb23PlD2WJ5i4ThIpGcnT+qpdWjhFbqZsN+bn+B55abyNDCwuM7rVAX0e Mo5w== X-Gm-Message-State: AC+VfDxqoFhamgryJRtB1yupDOBo2CA85fm9vuwYaVypwFOJwhGbsgZg 9VnEmprs1c+mId//wRFzrI5zw0TryZs/ycxvo74= X-Google-Smtp-Source: ACHHUZ59OjT6s9BzZ656McNuCbrjAxA3xInSiJjRDCZUuMcmgi0lHuWNkicEl3oIuQOk4s7g6woMcVexbiZ+4mMzFlA= X-Received: by 2002:a1f:5202:0:b0:446:ecce:e4a7 with SMTP id g2-20020a1f5202000000b00446eccee4a7mr8251510vkb.9.1683947644187; Fri, 12 May 2023 20:14:04 -0700 (PDT) MIME-Version: 1.0 References: <20230513020859.13485-1-juzhe.zhong@rivai.ai> In-Reply-To: <20230513020859.13485-1-juzhe.zhong@rivai.ai> From: Kito Cheng Date: Sat, 13 May 2023 11:13:52 +0800 Message-ID: Subject: Re: [PATCH] RISC-V: Optimize vsetvl AVL for VLS VLMAX auto-vectorization To: juzhe.zhong@rivai.ai Cc: gcc-patches@gcc.gnu.org, palmer@dabbelt.com, jeffreyalaw@gmail.com, rdapp.gcc@gmail.com Content-Type: text/plain; charset="UTF-8" Content-Transfer-Encoding: quoted-printable X-Spam-Status: No, score=-8.5 required=5.0 tests=BAYES_00,DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF,FREEMAIL_FROM,GIT_PATCH_0,KAM_SHORT,RCVD_IN_DNSWL_NONE,SPF_HELO_NONE,SPF_PASS,TXREP,T_SCC_BODY_TEXT_LINE autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org List-Id: Hmmm here is alternative approach for this: diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index b8dc333f54e1..c88056024e7d 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -50,6 +50,21 @@ using namespace riscv_vector; namespace riscv_vector { +/* Return true if vlmax is constant value instead and can be used + in vsetivli. */ +static bool +const_vlmax_p (machine_mode mode) +{ + poly_uint64 nunits =3D GET_MODE_NUNITS (mode); + + /* Only allowed in VLS-VLMAX mode. */ + if (!nunits.is_constant ()) + return false; + + /* vsetivli can only hold 0~31. */ + return (IN_RANGE (nunits.to_constant (), 0, 31)); +} + template class insn_expander { public: @@ -101,15 +116,26 @@ public: void set_len_and_policy (rtx len, bool force_vlmax =3D false) { - bool vlmax_p =3D force_vlmax; + bool vlmax_p =3D force_vlmax || !len; gcc_assert (has_dest); - if (!len) + if (vlmax_p) { - vlmax_p =3D true; - len =3D gen_reg_rtx (Pmode); - emit_vlmax_vsetvl (dest_mode, len); - } + if (const_vlmax_p (dest_mode)) + { + /* Optimize VLS-VLMAX code gen, we can use vsetivli instead o= f + vsetvli to obtain the value of vlmax. */ + poly_uint64 nunits =3D GET_MODE_NUNITS (dest_mode); + len =3D gen_int_mode (nunits, Pmode); + /* It has become NONVLMAX now. */ + vlmax_p =3D false; + } + else if (!len) + { + len =3D gen_reg_rtx (Pmode); + emit_vlmax_vsetvl (dest_mode, len); + } + } add_input_operand (len, Pmode); --=20 On Sat, May 13, 2023 at 10:09=E2=80=AFAM wrote: > > From: Juzhe-Zhong > > This patch is optimizing the AVL for VLS auto-vectorzation. > > Consider such case: > > typedef int8_t vnx2qi __attribute__ ((vector_size (2))); > > __attribute__ ((noipa)) void > f_vnx2qi (int8_t a, int8_t b, int8_t *out) > { > vnx2qi v =3D {a, b}; > *(vnx2qi *) out =3D v; > } > > Before this patch: > > f_vnx2qi: > vsetvli a5,zero,e8,mf8,ta,ma > vmv.v.x v1,a0 > vslide1down.vx v1,v1,a1 > vse8.v v1,0(a2) > ret > > After this patch: > > f_vnx2qi: > vsetivli zero,2,e8,mf8,ta,ma > vmv.v.x v1,a0 > vslide1down.vx v1,v1,a1 > vse8.v v1,0(a2) > ret > > gcc/ChangeLog: > > * config/riscv/riscv-protos.h (emit_vlmax_vsetvl): Change argumen= t type. > * config/riscv/riscv-v.cc (emit_vlmax_vsetvl): Optimize AVL for v= lmax VLS. > (emit_vlmax_reg_op): Ditto. > * config/riscv/vector.md: Adapt argument. > > gcc/testsuite/ChangeLog: > > * gcc.target/riscv/rvv/base/vf_avl-1.c: New test. > > --- > gcc/config/riscv/riscv-protos.h | 2 +- > gcc/config/riscv/riscv-v.cc | 25 +++++++++++++++---- > gcc/config/riscv/vector.md | 4 +-- > .../gcc.target/riscv/rvv/base/vf_avl-1.c | 15 +++++++++++ > 4 files changed, 38 insertions(+), 8 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vf_avl-1.c > > diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-pro= tos.h > index bc71f9cbbba..90934d43430 100644 > --- a/gcc/config/riscv/riscv-protos.h > +++ b/gcc/config/riscv/riscv-protos.h > @@ -164,7 +164,7 @@ bool check_builtin_call (location_t, vec,= unsigned int, > tree, unsigned int, tree *); > bool const_vec_all_same_in_range_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT); > bool legitimize_move (rtx, rtx, machine_mode); > -void emit_vlmax_vsetvl (machine_mode, rtx); > +void emit_vlmax_vsetvl (machine_mode, rtx *); > void emit_hard_vlmax_vsetvl (machine_mode, rtx); > void emit_vlmax_op (unsigned, rtx, rtx, machine_mode); > void emit_vlmax_reg_op (unsigned, rtx, rtx, rtx, machine_mode); > diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc > index d844c305320..382cc4b6311 100644 > --- a/gcc/config/riscv/riscv-v.cc > +++ b/gcc/config/riscv/riscv-v.cc > @@ -120,7 +120,9 @@ public: > { > vlmax_p =3D true; > len =3D gen_reg_rtx (Pmode); > - emit_vlmax_vsetvl (dest_mode, len); > + emit_vlmax_vsetvl (dest_mode, &len); > + if (CONST_INT_P (len)) > + vlmax_p =3D false; > } > > add_input_operand (len, Pmode); > @@ -183,16 +185,29 @@ emit_hard_vlmax_vsetvl (machine_mode vmode, rtx vl) > } > > void > -emit_vlmax_vsetvl (machine_mode vmode, rtx vl) > +emit_vlmax_vsetvl (machine_mode vmode, rtx *vl) > { > unsigned int sew =3D get_sew (vmode); > enum vlmul_type vlmul =3D get_vlmul (vmode); > unsigned int ratio =3D calculate_ratio (sew, vlmul); > > + /* For VLS VLMAX auto-vectorization, we change > + VL into const_int value of VF so that we > + will emit "vsetivli zero, CONST_INT" instead of > + "vsetvli a5, zero". > + > + TODO: Support VLS min-length in the future. */ > + poly_uint64 nunits =3D GET_MODE_NUNITS (vmode); > + if (nunits.is_constant () && IN_RANGE (nunits.to_constant (), 0, 31)) > + { > + *vl =3D gen_int_mode (nunits, Pmode); > + return; > + } > + > if (!optimize) > - emit_hard_vlmax_vsetvl (vmode, vl); > + emit_hard_vlmax_vsetvl (vmode, *vl); > else > - emit_insn (gen_vlmax_avl (Pmode, vl, gen_int_mode (ratio, Pmode))); > + emit_insn (gen_vlmax_avl (Pmode, *vl, gen_int_mode (ratio, Pmode))); > } > > /* Calculate SEW/LMUL ratio. */ > @@ -323,7 +338,7 @@ emit_vlmax_reg_op (unsigned icode, rtx dest, rtx src,= rtx len, > machine_mode mask_mode) > { > emit_pred_op (icode, NULL_RTX, dest, src, len, mask_mode, > - /* Force VLMAX */ true); > + /* Force VLMAX */ CONST_INT_P (len) ? false : true); > } > > void > diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md > index 328fce8d632..b02ecf92c00 100644 > --- a/gcc/config/riscv/vector.md > +++ b/gcc/config/riscv/vector.md > @@ -720,7 +720,7 @@ > emit_insn (gen_rtx_SET (operands[0], operands[1])); > else > { > - riscv_vector::emit_vlmax_vsetvl (mode, operands[2]); > + riscv_vector::emit_vlmax_vsetvl (mode, &operands[2])= ; > riscv_vector::emit_vlmax_reg_op (code_for_pred_mov (= mode), > operands[0], operands[1], operands= [2], > mode); > @@ -741,7 +741,7 @@ > emit_insn (gen_rtx_SET (operands[0], operands[1])); > else > { > - riscv_vector::emit_vlmax_vsetvl (mode, operands[2]); > + riscv_vector::emit_vlmax_vsetvl (mode, &operands[2]); > riscv_vector::emit_vlmax_reg_op (code_for_pred_mov (mode)= , > operands[0], operands[1], operands= [2], > mode); > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/vf_avl-1.c b/gcc/tes= tsuite/gcc.target/riscv/rvv/base/vf_avl-1.c > new file mode 100644 > index 00000000000..11adf6bc611 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/vf_avl-1.c > @@ -0,0 +1,15 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -march=3Drv32gcv -mabi=3Dilp32d --param riscv-autov= ec-preference=3Dfixed-vlmax" } */ > + > +#include > + > +typedef int8_t vnx2qi __attribute__ ((vector_size (2))); > + > +__attribute__ ((noipa)) void > +f_vnx2qi (int8_t a, int8_t b, int8_t *out) > +{ > + vnx2qi v =3D {a, b}; > + *(vnx2qi *) out =3D v; > +} > + > +/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*2,\s*e8,\s*mf8,= \s*t[au],\s*m[au]} 1 } } */ > -- > 2.36.1 >