From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-vk1-xa2b.google.com (mail-vk1-xa2b.google.com [IPv6:2607:f8b0:4864:20::a2b]) by sourceware.org (Postfix) with ESMTPS id 0504F3857B98 for ; Wed, 7 Jun 2023 02:38:21 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org 0504F3857B98 Authentication-Results: sourceware.org; dmarc=pass (p=none dis=none) header.from=gmail.com Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=gmail.com Received: by mail-vk1-xa2b.google.com with SMTP id 71dfb90a1353d-45d1c1404easo1208375e0c.1 for ; Tue, 06 Jun 2023 19:38:21 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20221208; t=1686105500; x=1688697500; h=content-transfer-encoding:cc:to:subject:message-id:date:from :in-reply-to:references:mime-version:from:to:cc:subject:date :message-id:reply-to; bh=j4z593eNP0kPoL+TmfKIhhRjEyeYjCgLrV/U/GScFOY=; b=L+Z7N8RZixq7s337m2rl03vV3fB2UmgEj9i6RYluEkhJZJlkW3L559OE1yJmfij4Xx Ahv3UApZfpgbgqaBpX87kNbwg+64vyNyFFrRQ/UYbKE4UFZHbZMpWg03j4xlztuuMxh5 L93EmI5i8FiZSnxfGfikc3+gxCktx1pkm80ttVzbYfCy3G4afjjgvkRg0LASjHyWLAcr LZBif0OSE76vPJrUZPFjXX+PK2uYcpQH7Tl9i9H4QXmUfs1Oqw1NgQdPb5E/TOjKQlLX FGbIeV9a7SKQ68XQZ1ptTLD+rVP9CS9omGGleKzX1iGSWgx5Hj2eB0plc2EyDXYR307t 5YEg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20221208; t=1686105500; x=1688697500; h=content-transfer-encoding:cc:to:subject:message-id:date:from :in-reply-to:references:mime-version:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=j4z593eNP0kPoL+TmfKIhhRjEyeYjCgLrV/U/GScFOY=; b=ClKJ0jRZ/Yhuv8kSnKauzzmJCARuDmhatqJrplH40zfyNlhXgMu8wi/hf5l6QC7nG4 yUGsr1pK6lMCppc02e7ntZklfaISMiBHHaObJRKtuqCuK2oJq8JbOb4eM8n66BQ8rMLu snPPbcmm3MBDOndXa+W2wfZKVy+9HZ74SOkuLnW+yV6Eb7abwc3SuNCa6VzR1+CdnmVS JZ7zlbSDFvDOZycbpTix0G5vYEBIfEryawqHYMHtUOSbEgGLRaKhxs7FGGefuh9cwDbH HoyIpjgS9Wq0uzFxXpB57VE6j+p/Pd+qqIgVBHVssBKzea/fSYWCltgdR5FW2QvfcP6r 7SJQ== X-Gm-Message-State: AC+VfDyUPiVY+YOYccIyTFQ+e/MbpCNLpqUuLKegM6i2sHJSRmWytF0H w+26HcBD5MtzI0X00U7eJnUBxNCoksojP4YxTZc= X-Google-Smtp-Source: ACHHUZ47gEZrSHZKvRLmoC/j1oW38ai1bsZWvCmrHfhDiEkNxw1YYOorNqtJVWjQCmY1RDP4UwmBA3scPXMini7ejIc= X-Received: by 2002:a1f:bf0b:0:b0:462:7eb0:6f8 with SMTP id p11-20020a1fbf0b000000b004627eb006f8mr1287806vkf.1.1686105499884; Tue, 06 Jun 2023 19:38:19 -0700 (PDT) MIME-Version: 1.0 References: <20230606041635.226494-1-juzhe.zhong@rivai.ai> In-Reply-To: From: Kito Cheng Date: Wed, 7 Jun 2023 10:38:08 +0800 Message-ID: Subject: Re: [PATCH] RISC-V: Support RVV VLA SLP auto-vectorization To: "juzhe.zhong@rivai.ai" Cc: gcc-patches , "Kito.cheng" , palmer , palmer , jeffreyalaw , Robin Dapp , "pan2.li" Content-Type: text/plain; charset="UTF-8" Content-Transfer-Encoding: quoted-printable X-Spam-Status: No, score=-7.8 required=5.0 tests=BAYES_00,DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF,FREEMAIL_FROM,GIT_PATCH_0,KAM_SHORT,RCVD_IN_DNSWL_NONE,SCC_10_SHORT_WORD_LINES,SCC_5_SHORT_WORD_LINES,SPF_HELO_NONE,SPF_PASS,TXREP,T_SCC_BODY_TEXT_LINE autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org List-Id: Few comments, but all comments are asking adding more comment :P > @@ -398,6 +410,48 @@ rvv_builder::get_merge_scalar_mask (unsigned int ind= ex_in_pattern) const > return gen_int_mode (mask, inner_int_mode ()); > } > > +/* Return true if the variable-length vector is single step. */ > +bool > +rvv_builder::single_step_npatterns_p () const what is single_step_npatterns? could you have more comment? > +{ > + if (nelts_per_pattern () !=3D 3) > + return false; > + > + poly_int64 step > + =3D rtx_to_poly_int64 (elt (npatterns ())) - rtx_to_poly_int64 (elt = (0)); > + for (unsigned int i =3D 0; i < npatterns (); i++) > + { > + poly_int64 ele0 =3D rtx_to_poly_int64 (elt (i)); > + poly_int64 ele1 =3D rtx_to_poly_int64 (elt (npatterns () + i)); > + poly_int64 ele2 =3D rtx_to_poly_int64 (elt (npatterns () * 2 + i))= ; > + poly_int64 diff1 =3D ele1 - ele0; > + poly_int64 diff2 =3D ele2 - ele1; > + if (maybe_ne (step, diff1) || maybe_ne (step, diff2)) > + return false; > + } > + return true; > +} > + > +/* Return true if all elements of NPATTERNS are equal. > + > + E.g. NPATTERNS =3D 4: > + { 2, 2, 2, 2, 4, 4, 4, 4, 8, 8, 8, 8, 16, 16, 16, 16, ... } > + E.g. NPATTERNS =3D 8: > + { 2, 2, 2, 2, 2, 2, 2, 2, 8, 8, 8, 8, 8, 8, 8, 8, ... } > +*/ > +bool > +rvv_builder::npatterns_all_equal_p () const > +{ > + poly_int64 ele0 =3D rtx_to_poly_int64 (elt (0)); > + for (unsigned int i =3D 1; i < npatterns (); i++) > + { > + poly_int64 ele =3D rtx_to_poly_int64 (elt (i)); > + if (!known_eq (ele, ele0)) > + return false; > + } > + return true; > +} > + > static unsigned > get_sew (machine_mode mode) > { > @@ -425,7 +479,7 @@ const_vec_all_same_in_range_p (rtx x, HOST_WIDE_INT m= inval, > future. */ > > static bool > -const_vec_all_in_range_p (rtx vec, HOST_WIDE_INT minval, HOST_WIDE_INT m= axval) > +const_vec_all_in_range_p (rtx vec, poly_int64 minval, poly_int64 maxval) > { > if (!CONST_VECTOR_P (vec) > || GET_MODE_CLASS (GET_MODE (vec)) !=3D MODE_VECTOR_INT) > @@ -440,8 +494,10 @@ const_vec_all_in_range_p (rtx vec, HOST_WIDE_INT min= val, HOST_WIDE_INT maxval) > for (int i =3D 0; i < nunits; i++) > { > rtx vec_elem =3D CONST_VECTOR_ELT (vec, i); > - if (!CONST_INT_P (vec_elem) > - || !IN_RANGE (INTVAL (vec_elem), minval, maxval)) > + poly_int64 value; > + if (!poly_int_rtx_p (vec_elem, &value) > + || maybe_lt (value, minval) > + || maybe_gt (value, maxval)) > return false; > } > return true; > @@ -453,7 +509,7 @@ const_vec_all_in_range_p (rtx vec, HOST_WIDE_INT minv= al, HOST_WIDE_INT maxval) > future. */ > > static rtx > -gen_const_vector_dup (machine_mode mode, HOST_WIDE_INT val) > +gen_const_vector_dup (machine_mode mode, poly_int64 val) > { > rtx c =3D gen_int_mode (val, GET_MODE_INNER (mode)); > return gen_const_vec_duplicate (mode, c); > @@ -727,7 +783,10 @@ emit_vlmax_gather_insn (rtx target, rtx op, rtx sel) > rtx elt; > insn_code icode; > machine_mode data_mode =3D GET_MODE (target); > - if (const_vec_duplicate_p (sel, &elt)) > + machine_mode sel_mode =3D GET_MODE (sel); > + if (maybe_ne (GET_MODE_SIZE (data_mode), GET_MODE_SIZE (sel_mode))) > + icode =3D code_for_pred_gatherei16 (data_mode); > + else if (const_vec_duplicate_p (sel, &elt)) > { > icode =3D code_for_pred_gather_scalar (data_mode); > sel =3D elt; > @@ -744,7 +803,10 @@ emit_vlmax_masked_gather_mu_insn (rtx target, rtx op= , rtx sel, rtx mask) > rtx elt; > insn_code icode; > machine_mode data_mode =3D GET_MODE (target); > - if (const_vec_duplicate_p (sel, &elt)) > + machine_mode sel_mode =3D GET_MODE (sel); > + if (maybe_ne (GET_MODE_SIZE (data_mode), GET_MODE_SIZE (sel_mode))) > + icode =3D code_for_pred_gatherei16 (data_mode); > + else if (const_vec_duplicate_p (sel, &elt)) > { > icode =3D code_for_pred_gather_scalar (data_mode); > sel =3D elt; > @@ -895,11 +957,130 @@ expand_const_vector (rtx target, rtx src) > return; > } > > - /* TODO: We only support const duplicate vector for now. More cases > - will be supported when we support auto-vectorization: > + /* Handle variable-length vector. */ > + unsigned int nelts_per_pattern =3D CONST_VECTOR_NELTS_PER_PATTERN (src= ); > + unsigned int npatterns =3D CONST_VECTOR_NPATTERNS (src); > + rvv_builder builder (mode, npatterns, nelts_per_pattern); > + for (unsigned int i =3D 0; i < nelts_per_pattern; i++) > + { > + for (unsigned int j =3D 0; j < npatterns; j++) > + builder.quick_push (CONST_VECTOR_ELT (src, i * npatterns + j)); > + } > + builder.finalize (); > > - 1. multiple elts duplicate vector. > - 2. multiple patterns with multiple elts. */ > + if (CONST_VECTOR_DUPLICATE_P (src)) I thought it's a predicator for a vector with same value like [a, a, a, a,...] when I read the check but seems like not? so could you add more comment for that? > + { > + if (builder.can_duplicate_repeating_sequence_p ()) Also more comment about this > + { > + rtx ele =3D builder.get_merged_repeating_sequence (); > + rtx dup =3D expand_vector_broadcast (builder.new_mode (), ele); > + emit_move_insn (target, gen_lowpart (mode, dup)); > + } > + else and this. > + { > + unsigned int nbits =3D npatterns - 1; > + > + /* Generate vid =3D { 0, 1, 2, 3, 4, 5, 6, 7, ... }. */ > + rtx vid =3D gen_reg_rtx (builder.int_mode ()); > + rtx op[] =3D {vid}; > + emit_vlmax_insn (code_for_pred_series (builder.int_mode ()), > + RVV_MISC_OP, op); > + > + /* Generate vid_repeat =3D { 0, 1, ... nbits, ... } */ > + rtx vid_repeat =3D gen_reg_rtx (builder.int_mode ()); > + rtx and_ops[] =3D {vid_repeat, vid, > + gen_int_mode (nbits, builder.inner_int_mode ()= )}; > + emit_vlmax_insn (code_for_pred_scalar (AND, builder.int_mode ()= ), > + RVV_BINOP, and_ops); > + > + rtx tmp =3D gen_reg_rtx (builder.mode ()); > + rtx dup_ops[] =3D {tmp, builder.elt (0)}; > + emit_vlmax_insn (code_for_pred_broadcast (builder.mode ()), RVV= _UNOP, > + dup_ops); > + for (unsigned int i =3D 1; i < builder.npatterns (); i++) > + { > + /* Generate mask according to i. */ > + rtx mask =3D gen_reg_rtx (builder.mask_mode ()); > + rtx const_vec =3D gen_const_vector_dup (builder.int_mode ()= , i); > + expand_vec_cmp (mask, EQ, vid_repeat, const_vec); > + > + /* Merge scalar to each i. */ > + rtx tmp2 =3D gen_reg_rtx (builder.mode ()); > + rtx merge_ops[] =3D {tmp2, tmp, builder.elt (i), mask}; > + insn_code icode =3D code_for_pred_merge_scalar (builder.mod= e ()); > + emit_vlmax_merge_insn (icode, RVV_MERGE_OP, merge_ops); > + tmp =3D tmp2; > + } > + emit_move_insn (target, tmp); > + } > + return; > + } > + else if (CONST_VECTOR_STEPPED_P (src)) > + { > + gcc_assert (GET_MODE_CLASS (mode) =3D=3D MODE_VECTOR_INT); > + if (builder.single_step_npatterns_p ()) > + { > + /* Describe the case by choosing NPATTERNS =3D 4 as an example.= */ > + rtx base, step; > + if (builder.npatterns_all_equal_p ()) > + { > + /* Generate the variable-length vector as below: > + E.g. { 0, 0, 0, 0, 8, 8, 8, 8, 16, 16, 16, 16, ... } */ Add more comment like: { a, a, a, a, a + step, a + step, a + step, a + step, a + step * 2, a + step * 2,, a + step * 2,, a + step * 2, ...} > + /* Step 1: Generate base =3D { 0, 0, 0, 0, 0, 0, 0, ... }. = */ > + base =3D expand_vector_broadcast (builder.mode (), builder.= elt (0)); > + } > + else > + { > + /* Generate the variable-length vector as below: > + E.g. { 0, 6, 0, 6, 8, 14, 8, 14, 16, 22, 16, 22, ... } *= / Add more comment like: { a, b, a, b, a + step, b + step, a + step *2, b + step *2, ...} > + /* Step 1: Generate base =3D { 0, 6, 0, 6, ... }. */ > + rvv_builder new_builder (builder.mode (), builder.npatterns= (), > + 1); > + for (unsigned int i =3D 0; i < builder.npatterns (); ++i) > + new_builder.quick_push (builder.elt (i)); > + rtx new_vec =3D new_builder.build (); > + base =3D gen_reg_rtx (builder.mode ()); > + emit_move_insn (base, new_vec); > + } > + > + /* Step 2: Generate step =3D gen_int_mode (diff, mode). */ > + poly_int64 value1 =3D rtx_to_poly_int64 (builder.elt (0)); > + poly_int64 value2 > + =3D rtx_to_poly_int64 (builder.elt (builder.npatterns ())); > + poly_int64 diff =3D value2 - value1; > + step =3D gen_int_mode (diff, builder.inner_mode ()); > + > + /* Step 3: Generate vid =3D { 0, 1, 2, 3, 4, 5, 6, 7, ... }. *= / > + rtx vid =3D gen_reg_rtx (builder.mode ()); > + rtx op[] =3D {vid}; > + emit_vlmax_insn (code_for_pred_series (builder.mode ()), RVV_MI= SC_OP, > + op); > + > + /* Step 4: Generate factor =3D { 0, 0, 0, 0, 1, 1, 1, 1, ... }.= */ > + rtx factor =3D gen_reg_rtx (builder.mode ()); > + rtx shift_ops[] > + =3D {factor, vid, > + gen_int_mode (exact_log2 (builder.npatterns ()), Pmode)}; Do we have check builder.npatterns () must be power of 2 in somewhere? > + emit_vlmax_insn (code_for_pred_scalar (LSHIFTRT, builder.mode (= )), > + RVV_BINOP, shift_ops); > + > + /* Step 5: Generate adjusted step =3D { 0, 0, 0, 0, diff, diff,= ... } */ > + rtx adjusted_step =3D gen_reg_rtx (builder.mode ()); > + rtx mul_ops[] =3D {adjusted_step, factor, step}; > + emit_vlmax_insn (code_for_pred_scalar (MULT, builder.mode ()), > + RVV_BINOP, mul_ops); > + > + /* Step 6: Generate the final result. */ > + rtx add_ops[] =3D {target, base, adjusted_step}; > + emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()), RVV_BIN= OP, > + add_ops); > + } > + else > + /* TODO: We will enable more variable-length vector in the future= . */ > + gcc_unreachable (); > + } > + else > + gcc_unreachable (); > } > > /* Expand a pre-RA RVV data move from SRC to DEST. On Wed, Jun 7, 2023 at 8:39=E2=80=AFAM juzhe.zhong@rivai.ai wrote: > > Ping this patch. Ok for trunk ? > Since following patches are blocked by this. > > > > juzhe.zhong@rivai.ai > > From: juzhe.zhong > Date: 2023-06-06 12:16 > To: gcc-patches > CC: kito.cheng; kito.cheng; palmer; palmer; jeffreyalaw; rdapp.gcc; pan2.= li; Juzhe-Zhong > Subject: [PATCH] RISC-V: Support RVV VLA SLP auto-vectorization > From: Juzhe-Zhong > > This patch enables basic VLA SLP auto-vectorization. > Consider this following case: > void > f (uint8_t *restrict a, uint8_t *restrict b) > { > for (int i =3D 0; i < 100; ++i) > { > a[i * 8 + 0] =3D b[i * 8 + 7] + 1; > a[i * 8 + 1] =3D b[i * 8 + 7] + 2; > a[i * 8 + 2] =3D b[i * 8 + 7] + 8; > a[i * 8 + 3] =3D b[i * 8 + 7] + 4; > a[i * 8 + 4] =3D b[i * 8 + 7] + 5; > a[i * 8 + 5] =3D b[i * 8 + 7] + 6; > a[i * 8 + 6] =3D b[i * 8 + 7] + 7; > a[i * 8 + 7] =3D b[i * 8 + 7] + 3; > } > } > > To enable VLA SLP auto-vectorization, we should be able to handle this fo= llowing const vector: > > 1. NPATTERNS =3D 8, NELTS_PER_PATTERN =3D 3. > { 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16,= 16, 16, ... } > > 2. NPATTERNS =3D 8, NELTS_PER_PATTERN =3D 1. > { 1, 2, 8, 4, 5, 6, 7, 3, ... } > > And these vector can be generated at prologue. > > After this patch, we end up with this following codegen: > > Prologue: > ... > vsetvli a7,zero,e16,m2,ta,ma > vid.v v4 > vsrl.vi v4,v4,3 > li a3,8 > vmul.vx v4,v4,a3 =3D=3D=3D> v4 =3D { 0, 0, 0, 0, 0, 0, 0, 0, 8, = 8, 8, 8, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16, 16, 16, ... } > ... > li t1,67633152 > addi t1,t1,513 > li a3,50790400 > addi a3,a3,1541 > slli a3,a3,32 > add a3,a3,t1 > vsetvli t1,zero,e64,m1,ta,ma > vmv.v.x v3,a3 =3D=3D=3D> v3 =3D { 1, 2, 8, 4, 5, 6, 7, 3, ... } > ... > LoopBody: > ... > min a3,... > vsetvli zero,a3,e8,m1,ta,ma > vle8.v v2,0(a6) > vsetvli a7,zero,e8,m1,ta,ma > vrgatherei16.vv v1,v2,v4 > vadd.vv v1,v1,v3 > vsetvli zero,a3,e8,m1,ta,ma > vse8.v v1,0(a2) > add a6,a6,a4 > add a2,a2,a4 > mv a3,a5 > add a5,a5,t1 > bgtu a3,a4,.L3 > ... > > Note: we need to use "vrgatherei16.vv" instead of "vrgather.vv" for SEW = =3D 8 since "vrgatherei16.vv" can cover larger > range than "vrgather.vv" (which only can maximum element index =3D = 255). > Epilogue: > lbu a5,799(a1) > addiw a4,a5,1 > sb a4,792(a0) > addiw a4,a5,2 > sb a4,793(a0) > addiw a4,a5,8 > sb a4,794(a0) > addiw a4,a5,4 > sb a4,795(a0) > addiw a4,a5,5 > sb a4,796(a0) > addiw a4,a5,6 > sb a4,797(a0) > addiw a4,a5,7 > sb a4,798(a0) > addiw a5,a5,3 > sb a5,799(a0) > ret > > There is one more last thing we need to do is the "Epilogue auto-vectoriz= ation" which needs VLS modes support. > I will support VLS modes for "Epilogue auto-vectorization" in the future. > > gcc/ChangeLog: > > * config/riscv/riscv-protos.h (expand_vec_perm_const): New functi= on. > * config/riscv/riscv-v.cc (rvv_builder::can_duplicate_repeating_s= equence_p): Support POLY handling. > (rvv_builder::single_step_npatterns_p): New function. > (rvv_builder::npatterns_all_equal_p): Ditto. > (const_vec_all_in_range_p): Support POLY handling. > (gen_const_vector_dup): Ditto. > (emit_vlmax_gather_insn): Add vrgatherei16. > (emit_vlmax_masked_gather_mu_insn): Ditto. > (expand_const_vector): Add VLA SLP const vector support. > (expand_vec_perm): Support POLY. > (struct expand_vec_perm_d): New struct. > (shuffle_generic_patterns): New function. > (expand_vec_perm_const_1): Ditto. > (expand_vec_perm_const): Ditto. > * config/riscv/riscv.cc (riscv_vectorize_vec_perm_const): Ditto. > (TARGET_VECTORIZE_VEC_PERM_CONST): New targethook. > > gcc/testsuite/ChangeLog: > > * gcc.target/riscv/rvv/autovec/scalable-1.c: Adapt testcase for V= LA vectorizer. > * gcc.target/riscv/rvv/autovec/v-1.c: Ditto. > * gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c: Ditto. > * gcc.target/riscv/rvv/autovec/zve32x_zvl128b-1.c: Ditto. > * gcc.target/riscv/rvv/autovec/zve64d-1.c: Ditto. > * gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c: Ditto. > * gcc.target/riscv/rvv/autovec/zve64f-1.c: Ditto. > * gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c: Ditto. > * gcc.target/riscv/rvv/autovec/zve64x_zvl128b-1.c: Ditto. > * gcc.target/riscv/rvv/autovec/partial/slp-1.c: New test. > * gcc.target/riscv/rvv/autovec/partial/slp-2.c: New test. > * gcc.target/riscv/rvv/autovec/partial/slp-3.c: New test. > * gcc.target/riscv/rvv/autovec/partial/slp-4.c: New test. > * gcc.target/riscv/rvv/autovec/partial/slp-5.c: New test. > * gcc.target/riscv/rvv/autovec/partial/slp-6.c: New test. > * gcc.target/riscv/rvv/autovec/partial/slp-7.c: New test. > * gcc.target/riscv/rvv/autovec/partial/slp_run-1.c: New test. > * gcc.target/riscv/rvv/autovec/partial/slp_run-2.c: New test. > * gcc.target/riscv/rvv/autovec/partial/slp_run-3.c: New test. > * gcc.target/riscv/rvv/autovec/partial/slp_run-4.c: New test. > * gcc.target/riscv/rvv/autovec/partial/slp_run-5.c: New test. > * gcc.target/riscv/rvv/autovec/partial/slp_run-6.c: New test. > * gcc.target/riscv/rvv/autovec/partial/slp_run-7.c: New test. > > --- > gcc/config/riscv/riscv-protos.h | 2 + > gcc/config/riscv/riscv-v.cc | 352 ++++++++++++++++-- > gcc/config/riscv/riscv.cc | 16 + > .../riscv/rvv/autovec/partial/slp-1.c | 22 ++ > .../riscv/rvv/autovec/partial/slp-2.c | 22 ++ > .../riscv/rvv/autovec/partial/slp-3.c | 22 ++ > .../riscv/rvv/autovec/partial/slp-4.c | 22 ++ > .../riscv/rvv/autovec/partial/slp-5.c | 22 ++ > .../riscv/rvv/autovec/partial/slp-6.c | 23 ++ > .../riscv/rvv/autovec/partial/slp-7.c | 15 + > .../riscv/rvv/autovec/partial/slp_run-1.c | 66 ++++ > .../riscv/rvv/autovec/partial/slp_run-2.c | 67 ++++ > .../riscv/rvv/autovec/partial/slp_run-3.c | 67 ++++ > .../riscv/rvv/autovec/partial/slp_run-4.c | 67 ++++ > .../riscv/rvv/autovec/partial/slp_run-5.c | 67 ++++ > .../riscv/rvv/autovec/partial/slp_run-6.c | 67 ++++ > .../riscv/rvv/autovec/partial/slp_run-7.c | 58 +++ > .../gcc.target/riscv/rvv/autovec/scalable-1.c | 2 +- > .../gcc.target/riscv/rvv/autovec/v-1.c | 7 +- > .../riscv/rvv/autovec/zve32f_zvl128b-1.c | 2 +- > .../riscv/rvv/autovec/zve32x_zvl128b-1.c | 2 +- > .../gcc.target/riscv/rvv/autovec/zve64d-1.c | 2 +- > .../riscv/rvv/autovec/zve64d_zvl128b-1.c | 2 +- > .../gcc.target/riscv/rvv/autovec/zve64f-1.c | 2 +- > .../riscv/rvv/autovec/zve64f_zvl128b-1.c | 2 +- > .../riscv/rvv/autovec/zve64x_zvl128b-1.c | 2 +- > 26 files changed, 963 insertions(+), 37 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp= -1.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp= -2.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp= -3.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp= -4.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp= -5.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp= -6.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp= -7.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp= _run-1.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp= _run-2.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp= _run-3.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp= _run-4.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp= _run-5.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp= _run-6.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp= _run-7.c > > diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-pro= tos.h > index d770e5e826e..27ecd16e496 100644 > --- a/gcc/config/riscv/riscv-protos.h > +++ b/gcc/config/riscv/riscv-protos.h > @@ -168,6 +168,8 @@ void init_builtins (void); > const char *mangle_builtin_type (const_tree); > #ifdef GCC_TARGET_H > bool verify_type_context (location_t, type_context_kind, const_tree, bool= ); > +bool expand_vec_perm_const (machine_mode, machine_mode, rtx, rtx, rtx, > + const vec_perm_indices &); > #endif > void handle_pragma_vector (void); > tree builtin_decl (unsigned, bool); > diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc > index 83277fc2c05..4864429ed06 100644 > --- a/gcc/config/riscv/riscv-v.cc > +++ b/gcc/config/riscv/riscv-v.cc > @@ -251,9 +251,12 @@ public: > m_inner_mode =3D GET_MODE_INNER (mode); > m_inner_bits_size =3D GET_MODE_BITSIZE (m_inner_mode); > m_inner_bytes_size =3D GET_MODE_SIZE (m_inner_mode); > + m_mask_mode =3D get_mask_mode (mode).require (); > gcc_assert ( > int_mode_for_size (inner_bits_size (), 0).exists (&m_inner_int_mod= e)); > + m_int_mode > + =3D get_vector_mode (m_inner_int_mode, GET_MODE_NUNITS (mode)).req= uire (); > } > bool can_duplicate_repeating_sequence_p (); > @@ -262,9 +265,14 @@ public: > bool repeating_sequence_use_merge_profitable_p (); > rtx get_merge_scalar_mask (unsigned int) const; > + bool single_step_npatterns_p () const; > + bool npatterns_all_equal_p () const; > + > machine_mode new_mode () const { return m_new_mode; } > scalar_mode inner_mode () const { return m_inner_mode; } > scalar_int_mode inner_int_mode () const { return m_inner_int_mode; } > + machine_mode mask_mode () const { return m_mask_mode; } > + machine_mode int_mode () const { return m_int_mode; } > unsigned int inner_bits_size () const { return m_inner_bits_size; } > unsigned int inner_bytes_size () const { return m_inner_bytes_size; } > @@ -273,6 +281,8 @@ private: > scalar_int_mode m_inner_int_mode; > machine_mode m_new_mode; > scalar_int_mode m_new_inner_mode; > + machine_mode m_mask_mode; > + machine_mode m_int_mode; > unsigned int m_inner_bits_size; > unsigned int m_inner_bytes_size; > }; > @@ -290,7 +300,9 @@ rvv_builder::can_duplicate_repeating_sequence_p () > || GET_MODE_SIZE (m_new_inner_mode) > UNITS_PER_WORD > || !get_vector_mode (m_new_inner_mode, new_size).exists (&m_new_mo= de)) > return false; > - return repeating_sequence_p (0, full_nelts ().to_constant (), npattern= s ()); > + if (full_nelts ().is_constant ()) > + return repeating_sequence_p (0, full_nelts ().to_constant (), npatte= rns ()); > + return nelts_per_pattern () =3D=3D 1; > } > /* Return true if it is a repeating sequence that using > @@ -398,6 +410,48 @@ rvv_builder::get_merge_scalar_mask (unsigned int ind= ex_in_pattern) const > return gen_int_mode (mask, inner_int_mode ()); > } > +/* Return true if the variable-length vector is single step. */ > +bool > +rvv_builder::single_step_npatterns_p () const > +{ > + if (nelts_per_pattern () !=3D 3) > + return false; > + > + poly_int64 step > + =3D rtx_to_poly_int64 (elt (npatterns ())) - rtx_to_poly_int64 (elt = (0)); > + for (unsigned int i =3D 0; i < npatterns (); i++) > + { > + poly_int64 ele0 =3D rtx_to_poly_int64 (elt (i)); > + poly_int64 ele1 =3D rtx_to_poly_int64 (elt (npatterns () + i)); > + poly_int64 ele2 =3D rtx_to_poly_int64 (elt (npatterns () * 2 + i))= ; > + poly_int64 diff1 =3D ele1 - ele0; > + poly_int64 diff2 =3D ele2 - ele1; > + if (maybe_ne (step, diff1) || maybe_ne (step, diff2)) > + return false; > + } > + return true; > +} > + > +/* Return true if all elements of NPATTERNS are equal. > + > + E.g. NPATTERNS =3D 4: > + { 2, 2, 2, 2, 4, 4, 4, 4, 8, 8, 8, 8, 16, 16, 16, 16, ... } > + E.g. NPATTERNS =3D 8: > + { 2, 2, 2, 2, 2, 2, 2, 2, 8, 8, 8, 8, 8, 8, 8, 8, ... } > +*/ > +bool > +rvv_builder::npatterns_all_equal_p () const > +{ > + poly_int64 ele0 =3D rtx_to_poly_int64 (elt (0)); > + for (unsigned int i =3D 1; i < npatterns (); i++) > + { > + poly_int64 ele =3D rtx_to_poly_int64 (elt (i)); > + if (!known_eq (ele, ele0)) > + return false; > + } > + return true; > +} > + > static unsigned > get_sew (machine_mode mode) > { > @@ -425,7 +479,7 @@ const_vec_all_same_in_range_p (rtx x, HOST_WIDE_INT m= inval, > future. */ > static bool > -const_vec_all_in_range_p (rtx vec, HOST_WIDE_INT minval, HOST_WIDE_INT m= axval) > +const_vec_all_in_range_p (rtx vec, poly_int64 minval, poly_int64 maxval) > { > if (!CONST_VECTOR_P (vec) > || GET_MODE_CLASS (GET_MODE (vec)) !=3D MODE_VECTOR_INT) > @@ -440,8 +494,10 @@ const_vec_all_in_range_p (rtx vec, HOST_WIDE_INT min= val, HOST_WIDE_INT maxval) > for (int i =3D 0; i < nunits; i++) > { > rtx vec_elem =3D CONST_VECTOR_ELT (vec, i); > - if (!CONST_INT_P (vec_elem) > - || !IN_RANGE (INTVAL (vec_elem), minval, maxval)) > + poly_int64 value; > + if (!poly_int_rtx_p (vec_elem, &value) > + || maybe_lt (value, minval) > + || maybe_gt (value, maxval)) > return false; > } > return true; > @@ -453,7 +509,7 @@ const_vec_all_in_range_p (rtx vec, HOST_WIDE_INT minv= al, HOST_WIDE_INT maxval) > future. */ > static rtx > -gen_const_vector_dup (machine_mode mode, HOST_WIDE_INT val) > +gen_const_vector_dup (machine_mode mode, poly_int64 val) > { > rtx c =3D gen_int_mode (val, GET_MODE_INNER (mode)); > return gen_const_vec_duplicate (mode, c); > @@ -727,7 +783,10 @@ emit_vlmax_gather_insn (rtx target, rtx op, rtx sel) > rtx elt; > insn_code icode; > machine_mode data_mode =3D GET_MODE (target); > - if (const_vec_duplicate_p (sel, &elt)) > + machine_mode sel_mode =3D GET_MODE (sel); > + if (maybe_ne (GET_MODE_SIZE (data_mode), GET_MODE_SIZE (sel_mode))) > + icode =3D code_for_pred_gatherei16 (data_mode); > + else if (const_vec_duplicate_p (sel, &elt)) > { > icode =3D code_for_pred_gather_scalar (data_mode); > sel =3D elt; > @@ -744,7 +803,10 @@ emit_vlmax_masked_gather_mu_insn (rtx target, rtx op= , rtx sel, rtx mask) > rtx elt; > insn_code icode; > machine_mode data_mode =3D GET_MODE (target); > - if (const_vec_duplicate_p (sel, &elt)) > + machine_mode sel_mode =3D GET_MODE (sel); > + if (maybe_ne (GET_MODE_SIZE (data_mode), GET_MODE_SIZE (sel_mode))) > + icode =3D code_for_pred_gatherei16 (data_mode); > + else if (const_vec_duplicate_p (sel, &elt)) > { > icode =3D code_for_pred_gather_scalar (data_mode); > sel =3D elt; > @@ -895,11 +957,130 @@ expand_const_vector (rtx target, rtx src) > return; > } > - /* TODO: We only support const duplicate vector for now. More cases > - will be supported when we support auto-vectorization: > + /* Handle variable-length vector. */ > + unsigned int nelts_per_pattern =3D CONST_VECTOR_NELTS_PER_PATTERN (src= ); > + unsigned int npatterns =3D CONST_VECTOR_NPATTERNS (src); > + rvv_builder builder (mode, npatterns, nelts_per_pattern); > + for (unsigned int i =3D 0; i < nelts_per_pattern; i++) > + { > + for (unsigned int j =3D 0; j < npatterns; j++) > + builder.quick_push (CONST_VECTOR_ELT (src, i * npatterns + j)); > + } > + builder.finalize (); > - 1. multiple elts duplicate vector. > - 2. multiple patterns with multiple elts. */ > + if (CONST_VECTOR_DUPLICATE_P (src)) > + { > + if (builder.can_duplicate_repeating_sequence_p ()) > + { > + rtx ele =3D builder.get_merged_repeating_sequence (); > + rtx dup =3D expand_vector_broadcast (builder.new_mode (), ele); > + emit_move_insn (target, gen_lowpart (mode, dup)); > + } > + else > + { > + unsigned int nbits =3D npatterns - 1; > + > + /* Generate vid =3D { 0, 1, 2, 3, 4, 5, 6, 7, ... }. */ > + rtx vid =3D gen_reg_rtx (builder.int_mode ()); > + rtx op[] =3D {vid}; > + emit_vlmax_insn (code_for_pred_series (builder.int_mode ()), > + RVV_MISC_OP, op); > + > + /* Generate vid_repeat =3D { 0, 1, ... nbits, ... } */ > + rtx vid_repeat =3D gen_reg_rtx (builder.int_mode ()); > + rtx and_ops[] =3D {vid_repeat, vid, > + gen_int_mode (nbits, builder.inner_int_mode ())}; > + emit_vlmax_insn (code_for_pred_scalar (AND, builder.int_mode ()), > + RVV_BINOP, and_ops); > + > + rtx tmp =3D gen_reg_rtx (builder.mode ()); > + rtx dup_ops[] =3D {tmp, builder.elt (0)}; > + emit_vlmax_insn (code_for_pred_broadcast (builder.mode ()), RVV_UNOP, > + dup_ops); > + for (unsigned int i =3D 1; i < builder.npatterns (); i++) > + { > + /* Generate mask according to i. */ > + rtx mask =3D gen_reg_rtx (builder.mask_mode ()); > + rtx const_vec =3D gen_const_vector_dup (builder.int_mode (), i); > + expand_vec_cmp (mask, EQ, vid_repeat, const_vec); > + > + /* Merge scalar to each i. */ > + rtx tmp2 =3D gen_reg_rtx (builder.mode ()); > + rtx merge_ops[] =3D {tmp2, tmp, builder.elt (i), mask}; > + insn_code icode =3D code_for_pred_merge_scalar (builder.mode ()); > + emit_vlmax_merge_insn (icode, RVV_MERGE_OP, merge_ops); > + tmp =3D tmp2; > + } > + emit_move_insn (target, tmp); > + } > + return; > + } > + else if (CONST_VECTOR_STEPPED_P (src)) > + { > + gcc_assert (GET_MODE_CLASS (mode) =3D=3D MODE_VECTOR_INT); > + if (builder.single_step_npatterns_p ()) > + { > + /* Describe the case by choosing NPATTERNS =3D 4 as an example. */ > + rtx base, step; > + if (builder.npatterns_all_equal_p ()) > + { > + /* Generate the variable-length vector as below: > + E.g. { 0, 0, 0, 0, 8, 8, 8, 8, 16, 16, 16, 16, ... } */ > + /* Step 1: Generate base =3D { 0, 0, 0, 0, 0, 0, 0, ... }. */ > + base =3D expand_vector_broadcast (builder.mode (), builder.elt (0= )); > + } > + else > + { > + /* Generate the variable-length vector as below: > + E.g. { 0, 6, 0, 6, 8, 14, 8, 14, 16, 22, 16, 22, ... } */ > + /* Step 1: Generate base =3D { 0, 6, 0, 6, ... }. */ > + rvv_builder new_builder (builder.mode (), builder.npatterns (), > + 1); > + for (unsigned int i =3D 0; i < builder.npatterns (); ++i) > + new_builder.quick_push (builder.elt (i)); > + rtx new_vec =3D new_builder.build (); > + base =3D gen_reg_rtx (builder.mode ()); > + emit_move_insn (base, new_vec); > + } > + > + /* Step 2: Generate step =3D gen_int_mode (diff, mode). */ > + poly_int64 value1 =3D rtx_to_poly_int64 (builder.elt (0)); > + poly_int64 value2 > + =3D rtx_to_poly_int64 (builder.elt (builder.npatterns ())); > + poly_int64 diff =3D value2 - value1; > + step =3D gen_int_mode (diff, builder.inner_mode ()); > + > + /* Step 3: Generate vid =3D { 0, 1, 2, 3, 4, 5, 6, 7, ... }. */ > + rtx vid =3D gen_reg_rtx (builder.mode ()); > + rtx op[] =3D {vid}; > + emit_vlmax_insn (code_for_pred_series (builder.mode ()), RVV_MISC_OP, > + op); > + > + /* Step 4: Generate factor =3D { 0, 0, 0, 0, 1, 1, 1, 1, ... }. */ > + rtx factor =3D gen_reg_rtx (builder.mode ()); > + rtx shift_ops[] > + =3D {factor, vid, > + gen_int_mode (exact_log2 (builder.npatterns ()), Pmode)}; > + emit_vlmax_insn (code_for_pred_scalar (LSHIFTRT, builder.mode ()), > + RVV_BINOP, shift_ops); > + > + /* Step 5: Generate adjusted step =3D { 0, 0, 0, 0, diff, diff, ... }= */ > + rtx adjusted_step =3D gen_reg_rtx (builder.mode ()); > + rtx mul_ops[] =3D {adjusted_step, factor, step}; > + emit_vlmax_insn (code_for_pred_scalar (MULT, builder.mode ()), > + RVV_BINOP, mul_ops); > + > + /* Step 6: Generate the final result. */ > + rtx add_ops[] =3D {target, base, adjusted_step}; > + emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()), RVV_BINOP, > + add_ops); > + } > + else > + /* TODO: We will enable more variable-length vector in the future. */ > + gcc_unreachable (); > + } > + else > + gcc_unreachable (); > } > /* Expand a pre-RA RVV data move from SRC to DEST. > @@ -2029,14 +2210,13 @@ expand_vec_perm (rtx target, rtx op0, rtx op1, rt= x sel) > { > machine_mode data_mode =3D GET_MODE (target); > machine_mode sel_mode =3D GET_MODE (sel); > - > - /* Enforced by the pattern condition. */ > - int nunits =3D GET_MODE_NUNITS (sel_mode).to_constant (); > + poly_uint64 nunits =3D GET_MODE_NUNITS (sel_mode); > /* Check if the sel only references the first values vector. If each s= elect > index is in range of [0, nunits - 1]. A single vrgather instruction= s is > - enough. */ > - if (const_vec_all_in_range_p (sel, 0, nunits - 1)) > + enough. Since we will use vrgatherei16.vv for variable-length vecto= r, > + it is never out of range and we don't need to modulo the index. */ > + if (!nunits.is_constant () || const_vec_all_in_range_p (sel, 0, nunits= - 1)) > { > emit_vlmax_gather_insn (target, op0, sel); > return; > @@ -2057,14 +2237,20 @@ expand_vec_perm (rtx target, rtx op0, rtx op1, rt= x sel) > return; > } > - /* Note: vec_perm indices are supposed to wrap when they go beyond the > - size of the two value vectors, i.e. the upper bits of the indices > - are effectively ignored. RVV vrgather instead produces 0 for any > - out-of-range indices, so we need to modulo all the vec_perm indices > - to ensure they are all in range of [0, 2 * nunits - 1]. */ > + rtx sel_mod =3D sel; > rtx max_sel =3D gen_const_vector_dup (sel_mode, 2 * nunits - 1); > - rtx sel_mod > - =3D expand_simple_binop (sel_mode, AND, sel, max_sel, NULL, 0, OPTAB= _DIRECT); > + /* We don't need to modulo indices for VLA vector. > + Since we should gurantee they aren't out of range before. */ > + if (nunits.is_constant ()) > + { > + /* Note: vec_perm indices are supposed to wrap when they go beyond= the > + size of the two value vectors, i.e. the upper bits of the indices > + are effectively ignored. RVV vrgather instead produces 0 for any > + out-of-range indices, so we need to modulo all the vec_perm indices > + to ensure they are all in range of [0, 2 * nunits - 1]. */ > + sel_mod =3D expand_simple_binop (sel_mode, AND, sel, max_sel, NULL= , 0, > + OPTAB_DIRECT); > + } > /* This following sequence is handling the case that: > __builtin_shufflevector (vec1, vec2, index...), the index can be an= y > @@ -2094,4 +2280,124 @@ expand_vec_perm (rtx target, rtx op0, rtx op1, rt= x sel) > emit_vlmax_masked_gather_mu_insn (target, op1, tmp, mask); > } > +/* Implement TARGET_VECTORIZE_VEC_PERM_CONST for RVV. */ > + > +/* vec_perm support. */ > + > +struct expand_vec_perm_d > +{ > + rtx target, op0, op1; > + vec_perm_indices perm; > + machine_mode vmode; > + machine_mode op_mode; > + bool one_vector_p; > + bool testing_p; > +}; > + > +/* Recognize the pattern that can be shuffled by generic approach. */ > + > +static bool > +shuffle_generic_patterns (struct expand_vec_perm_d *d) > +{ > + machine_mode sel_mode =3D related_int_vector_mode (d->vmode).require (= ); > + poly_uint64 nunits =3D GET_MODE_NUNITS (d->vmode); > + > + /* For constant size indices, we dont't need to handle it here. > + Just leave it to vec_perm. */ > + if (d->perm.length ().is_constant ()) > + return false; > + > + /* Permuting two SEW8 variable-length vectors need vrgatherei16.vv. > + Otherwise, it could overflow the index range. */ > + if (GET_MODE_INNER (d->vmode) =3D=3D QImode > + && !get_vector_mode (HImode, nunits).exists (&sel_mode)) > + return false; > + > + /* Success! */ > + if (d->testing_p) > + return true; > + > + rtx sel =3D vec_perm_indices_to_rtx (sel_mode, d->perm); > + expand_vec_perm (d->target, d->op0, d->op1, force_reg (sel_mode, sel))= ; > + return true; > +} > + > +static bool > +expand_vec_perm_const_1 (struct expand_vec_perm_d *d) > +{ > + gcc_assert (d->op_mode !=3D E_VOIDmode); > + > + /* The pattern matching functions above are written to look for a smal= l > + number to begin the sequence (0, 1, N/2). If we begin with an inde= x > + from the second operand, we can swap the operands. */ > + poly_int64 nelt =3D d->perm.length (); > + if (known_ge (d->perm[0], nelt)) > + { > + d->perm.rotate_inputs (1); > + std::swap (d->op0, d->op1); > + } > + > + if (known_gt (nelt, 1)) > + { > + if (d->vmode =3D=3D d->op_mode) > + { > + if (shuffle_generic_patterns (d)) > + return true; > + return false; > + } > + else > + return false; > + } > + return false; > +} > + > +bool > +expand_vec_perm_const (machine_mode vmode, machine_mode op_mode, rtx tar= get, > + rtx op0, rtx op1, const vec_perm_indices &sel) > +{ > + /* RVV doesn't have Mask type pack/unpack instructions and we don't us= e > + mask to do the iteration loop control. Just disable it directly. *= / > + if (GET_MODE_CLASS (vmode) =3D=3D MODE_VECTOR_BOOL) > + return false; > + > + struct expand_vec_perm_d d; > + > + /* Check whether the mask can be applied to a single vector. */ > + if (sel.ninputs () =3D=3D 1 || (op0 && rtx_equal_p (op0, op1))) > + d.one_vector_p =3D true; > + else if (sel.all_from_input_p (0)) > + { > + d.one_vector_p =3D true; > + op1 =3D op0; > + } > + else if (sel.all_from_input_p (1)) > + { > + d.one_vector_p =3D true; > + op0 =3D op1; > + } > + else > + d.one_vector_p =3D false; > + > + d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, > + sel.nelts_per_input ()); > + d.vmode =3D vmode; > + d.op_mode =3D op_mode; > + d.target =3D target; > + d.op0 =3D op0; > + if (op0 =3D=3D op1) > + d.op1 =3D d.op0; > + else > + d.op1 =3D op1; > + d.testing_p =3D !target; > + > + if (!d.testing_p) > + return expand_vec_perm_const_1 (&d); > + > + rtx_insn *last =3D get_last_insn (); > + bool ret =3D expand_vec_perm_const_1 (&d); > + gcc_assert (last =3D=3D get_last_insn ()); > + > + return ret; > +} > + > } // namespace riscv_vector > diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc > index caa7858b864..5d22012b591 100644 > --- a/gcc/config/riscv/riscv.cc > +++ b/gcc/config/riscv/riscv.cc > @@ -7631,6 +7631,19 @@ riscv_vectorize_related_mode (machine_mode vector_= mode, scalar_mode element_mode > return default_vectorize_related_mode (vector_mode, element_mode, nuni= ts); > } > +/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */ > + > +static bool > +riscv_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode= , > + rtx target, rtx op0, rtx op1, > + const vec_perm_indices &sel) > +{ > + if (TARGET_VECTOR && riscv_v_ext_vector_mode_p (vmode)) > + return riscv_vector::expand_vec_perm_const (vmode, op_mode, target, = op0, > + op1, sel); > + > + return false; > +} > /* Initialize the GCC target structure. */ > #undef TARGET_ASM_ALIGNED_HI_OP > @@ -7930,6 +7943,9 @@ riscv_vectorize_related_mode (machine_mode vector_m= ode, scalar_mode element_mode > #undef TARGET_VECTORIZE_RELATED_MODE > #define TARGET_VECTORIZE_RELATED_MODE riscv_vectorize_related_mode > +#undef TARGET_VECTORIZE_VEC_PERM_CONST > +#define TARGET_VECTORIZE_VEC_PERM_CONST riscv_vectorize_vec_perm_const > + > struct gcc_target targetm =3D TARGET_INITIALIZER; > #include "gt-riscv.h" > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-1.c b= /gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-1.c > new file mode 100644 > index 00000000000..befb518e2dd > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-1.c > @@ -0,0 +1,22 @@ > +/* { dg-do compile } */ > +/* { dg-additional-options "-march=3Drv32gcv -mabi=3Dilp32d --param risc= v-autovec-preference=3Dscalable -fdump-tree-optimized-details" } */ > + > +#include > + > +void __attribute__ ((noipa)) > +f (int8_t *restrict a, int8_t *restrict b, int n) > +{ > + for (int i =3D 0; i < n; ++i) > + { > + a[i * 8 + 0] =3D b[i * 8 + 37] + 1; > + a[i * 8 + 1] =3D b[i * 8 + 37] + 2; > + a[i * 8 + 2] =3D b[i * 8 + 37] + 8; > + a[i * 8 + 3] =3D b[i * 8 + 37] + 4; > + a[i * 8 + 4] =3D b[i * 8 + 37] + 5; > + a[i * 8 + 5] =3D b[i * 8 + 37] + 6; > + a[i * 8 + 6] =3D b[i * 8 + 37] + 7; > + a[i * 8 + 7] =3D b[i * 8 + 37] + 3; > + } > +} > + > +/* { dg-final { scan-tree-dump-times "\.VEC_PERM" 1 "optimized" } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-2.c b= /gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-2.c > new file mode 100644 > index 00000000000..ac817451295 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-2.c > @@ -0,0 +1,22 @@ > +/* { dg-do compile } */ > +/* { dg-additional-options "-march=3Drv32gcv -mabi=3Dilp32d --param risc= v-autovec-preference=3Dscalable -fdump-tree-optimized-details" } */ > + > +#include > + > +void __attribute__ ((noipa)) > +f (int16_t *restrict a, int16_t *restrict b, int n) > +{ > + for (int i =3D 0; i < n; ++i) > + { > + a[i * 8 + 0] =3D b[i * 8 + 37] + 1; > + a[i * 8 + 1] =3D b[i * 8 + 37] + 2; > + a[i * 8 + 2] =3D b[i * 8 + 37] + 8; > + a[i * 8 + 3] =3D b[i * 8 + 37] + 4; > + a[i * 8 + 4] =3D b[i * 8 + 37] + 5; > + a[i * 8 + 5] =3D b[i * 8 + 37] + 6; > + a[i * 8 + 6] =3D b[i * 8 + 37] + 7; > + a[i * 8 + 7] =3D b[i * 8 + 37] + 3; > + } > +} > + > +/* { dg-final { scan-tree-dump-times "\.VEC_PERM" 1 "optimized" } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-3.c b= /gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-3.c > new file mode 100644 > index 00000000000..73962055b03 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-3.c > @@ -0,0 +1,22 @@ > +/* { dg-do compile } */ > +/* { dg-additional-options "-march=3Drv32gcv -mabi=3Dilp32d --param risc= v-autovec-preference=3Dscalable -fdump-tree-optimized-details" } */ > + > +#include > + > +void __attribute__ ((noipa)) > +f (int8_t *restrict a, int8_t *restrict b, int n) > +{ > + for (int i =3D 0; i < n; ++i) > + { > + a[i * 8] =3D b[i * 8 + 1] + 1; > + a[i * 8 + 1] =3D b[i * 8 + 7] + 2; > + a[i * 8 + 2] =3D b[i * 8 + 1] + 3; > + a[i * 8 + 3] =3D b[i * 8 + 7] + 4; > + a[i * 8 + 4] =3D b[i * 8 + 1] + 5; > + a[i * 8 + 5] =3D b[i * 8 + 7] + 6; > + a[i * 8 + 6] =3D b[i * 8 + 1] + 7; > + a[i * 8 + 7] =3D b[i * 8 + 7] + 8; > + } > +} > + > +/* { dg-final { scan-tree-dump-times "\.VEC_PERM" 1 "optimized" } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-4.c b= /gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-4.c > new file mode 100644 > index 00000000000..fa216fc8c40 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-4.c > @@ -0,0 +1,22 @@ > +/* { dg-do compile } */ > +/* { dg-additional-options "-march=3Drv32gcv -mabi=3Dilp32d --param risc= v-autovec-preference=3Dscalable -fdump-tree-optimized-details" } */ > + > +#include > + > +void __attribute__ ((noipa)) > +f (int16_t *restrict a, int16_t *restrict b, int n) > +{ > + for (int i =3D 0; i < n; ++i) > + { > + a[i * 8 + 0] =3D b[i * 8 + 1] + 1; > + a[i * 8 + 1] =3D b[i * 8 + 7] + 2; > + a[i * 8 + 2] =3D b[i * 8 + 1] + 3; > + a[i * 8 + 3] =3D b[i * 8 + 7] + 4; > + a[i * 8 + 4] =3D b[i * 8 + 1] + 5; > + a[i * 8 + 5] =3D b[i * 8 + 7] + 6; > + a[i * 8 + 6] =3D b[i * 8 + 1] + 7; > + a[i * 8 + 7] =3D b[i * 8 + 7] + 8; > + } > +} > + > +/* { dg-final { scan-tree-dump-times "\.VEC_PERM" 1 "optimized" } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-5.c b= /gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-5.c > new file mode 100644 > index 00000000000..899ed9e310b > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-5.c > @@ -0,0 +1,22 @@ > +/* { dg-do compile } */ > +/* { dg-additional-options "-march=3Drv32gcv -mabi=3Dilp32d --param risc= v-autovec-preference=3Dscalable -fdump-tree-optimized-details" } */ > + > +#include > + > +void __attribute__ ((noipa)) > +f (int8_t *restrict a, int8_t *restrict b, int n) > +{ > + for (int i =3D 0; i < n; ++i) > + { > + a[i * 8 + 0] =3D b[i * 8 + 1] + 1; > + a[i * 8 + 1] =3D b[i * 8 + 7] + 2; > + a[i * 8 + 2] =3D b[i * 8 + 4] + 3; > + a[i * 8 + 3] =3D b[i * 8 + 8] + 4; > + a[i * 8 + 4] =3D b[i * 8 + 1] + 5; > + a[i * 8 + 5] =3D b[i * 8 + 7] + 6; > + a[i * 8 + 6] =3D b[i * 8 + 4] + 7; > + a[i * 8 + 7] =3D b[i * 8 + 8] + 8; > + } > +} > + > +/* { dg-final { scan-tree-dump-times "\.VEC_PERM" 1 "optimized" } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-6.c b= /gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-6.c > new file mode 100644 > index 00000000000..fb87cc00cea > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-6.c > @@ -0,0 +1,23 @@ > +/* { dg-do compile } */ > +/* { dg-additional-options "-march=3Drv32gcv -mabi=3Dilp32d --param risc= v-autovec-preference=3Dscalable -fdump-tree-optimized-details" } */ > + > +#include > + > +void __attribute__ ((noipa)) > +f (uint8_t *restrict a, uint8_t *restrict b, int n) > +{ > + for (int i =3D 0; i < n; ++i) > + { > + a[i * 8 + 0] =3D b[i * 8 + 1] + 1; > + a[i * 8 + 1] =3D b[i * 8 + 2] + 2; > + a[i * 8 + 2] =3D b[i * 8 + 6] + 8; > + a[i * 8 + 3] =3D b[i * 8 + 7] + 4; > + a[i * 8 + 4] =3D b[i * 8 + 3] + 5; > + a[i * 8 + 5] =3D b[i * 8 + 4] + 6; > + a[i * 8 + 6] =3D b[i * 8 + 5] + 7; > + a[i * 8 + 7] =3D b[i * 8 + 0] + 3; > + } > +} > + > +/* { dg-final { scan-tree-dump-times "\.VEC_PERM" 1 "optimized" } } */ > + > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-7.c b= /gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-7.c > new file mode 100644 > index 00000000000..3dd744b586e > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-7.c > @@ -0,0 +1,15 @@ > +/* { dg-do compile } */ > +/* { dg-additional-options "-march=3Drv32gcv -mabi=3Dilp32d --param risc= v-autovec-preference=3Dscalable -fdump-tree-optimized-details" } */ > + > +#include > + > +void __attribute__ ((noipa)) > +f (float *__restrict f, double *__restrict d, int n) > +{ > + for (int i =3D 0; i < n; ++i) > + { > + f[i * 2 + 0] =3D 1; > + f[i * 2 + 1] =3D 2; > + d[i] =3D 3; > + } > +} > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-1= .c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-1.c > new file mode 100644 > index 00000000000..16f078a0433 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-1.c > @@ -0,0 +1,66 @@ > +/* { dg-do run { target { riscv_vector } } } */ > +/* { dg-additional-options "--param riscv-autovec-preference=3Dscalable"= } */ > + > +#include "slp-1.c" > + > +#define LIMIT 128 > +void __attribute__ ((optimize (0))) > +f_golden (int8_t *restrict a, int8_t *restrict b, int n) > +{ > + for (int i =3D 0; i < n; ++i) > + { > + a[i * 8 + 0] =3D b[i * 8 + 37] + 1; > + a[i * 8 + 1] =3D b[i * 8 + 37] + 2; > + a[i * 8 + 2] =3D b[i * 8 + 37] + 8; > + a[i * 8 + 3] =3D b[i * 8 + 37] + 4; > + a[i * 8 + 4] =3D b[i * 8 + 37] + 5; > + a[i * 8 + 5] =3D b[i * 8 + 37] + 6; > + a[i * 8 + 6] =3D b[i * 8 + 37] + 7; > + a[i * 8 + 7] =3D b[i * 8 + 37] + 3; > + } > +} > + > +int > +main (void) > +{ > +#define RUN(NUM) = \ > + int8_t a_##NUM[NUM * 8 + 8] =3D {0}; = \ > + int8_t a_golden_##NUM[NUM * 8 + 8] =3D {0}; = \ > + int8_t b_##NUM[NUM * 8 + 37] =3D {0}; = \ > + for (int i =3D 0; i < NUM * 8 + 37; i++) = \ > + { = \ > + if (i % NUM =3D=3D 0) = \ > + b_##NUM[i] =3D (i + NUM) % LIMIT; = \ > + else = \ > + b_##NUM[i] =3D (i - NUM) % (-LIMIT); = \ > + } = \ > + f (a_##NUM, b_##NUM, NUM); = \ > + f_golden (a_golden_##NUM, b_##NUM, NUM); = \ > + for (int i =3D 0; i < NUM * 8 + 8; i++) = \ > + { = \ > + if (a_##NUM[i] !=3D a_golden_##NUM[i]) = \ > + __builtin_abort (); = \ > + } > + > + RUN (3); > + RUN (5); > + RUN (15); > + RUN (16); > + RUN (17); > + RUN (31); > + RUN (32); > + RUN (33); > + RUN (63); > + RUN (64); > + RUN (65); > + RUN (127); > + RUN (128); > + RUN (129); > + RUN (239); > + RUN (359); > + RUN (498); > + RUN (799); > + RUN (977); > + RUN (5789); > + return 0; > +} > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-2= .c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-2.c > new file mode 100644 > index 00000000000..41f688f628c > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-2.c > @@ -0,0 +1,67 @@ > +/* { dg-do run { target { riscv_vector } } } */ > +/* { dg-additional-options "--param riscv-autovec-preference=3Dscalable"= } */ > + > +#include "slp-2.c" > + > +#define LIMIT 32767 > + > +void __attribute__ ((optimize (0))) > +f_golden (int16_t *restrict a, int16_t *restrict b, int n) > +{ > + for (int i =3D 0; i < n; ++i) > + { > + a[i * 8 + 0] =3D b[i * 8 + 37] + 1; > + a[i * 8 + 1] =3D b[i * 8 + 37] + 2; > + a[i * 8 + 2] =3D b[i * 8 + 37] + 8; > + a[i * 8 + 3] =3D b[i * 8 + 37] + 4; > + a[i * 8 + 4] =3D b[i * 8 + 37] + 5; > + a[i * 8 + 5] =3D b[i * 8 + 37] + 6; > + a[i * 8 + 6] =3D b[i * 8 + 37] + 7; > + a[i * 8 + 7] =3D b[i * 8 + 37] + 3; > + } > +} > + > +int > +main (void) > +{ > +#define RUN(NUM) = \ > + int16_t a_##NUM[NUM * 8 + 8] =3D {0}; = \ > + int16_t a_golden_##NUM[NUM * 8 + 8] =3D {0}; = \ > + int16_t b_##NUM[NUM * 8 + 37] =3D {0}; = \ > + for (int i =3D 0; i < NUM * 8 + 37; i++) = \ > + { = \ > + if (i % NUM =3D=3D 0) = \ > + b_##NUM[i] =3D (i + NUM) % LIMIT; = \ > + else = \ > + b_##NUM[i] =3D (i - NUM) % (-LIMIT); = \ > + } = \ > + f (a_##NUM, b_##NUM, NUM); = \ > + f_golden (a_golden_##NUM, b_##NUM, NUM); = \ > + for (int i =3D 0; i < NUM * 8 + 8; i++) = \ > + { = \ > + if (a_##NUM[i] !=3D a_golden_##NUM[i]) = \ > + __builtin_abort (); = \ > + } > + > + RUN (3); > + RUN (5); > + RUN (15); > + RUN (16); > + RUN (17); > + RUN (31); > + RUN (32); > + RUN (33); > + RUN (63); > + RUN (64); > + RUN (65); > + RUN (127); > + RUN (128); > + RUN (129); > + RUN (239); > + RUN (359); > + RUN (498); > + RUN (799); > + RUN (977); > + RUN (5789); > + return 0; > +} > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-3= .c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-3.c > new file mode 100644 > index 00000000000..30996cb2c6e > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-3.c > @@ -0,0 +1,67 @@ > +/* { dg-do run { target { riscv_vector } } } */ > +/* { dg-additional-options "--param riscv-autovec-preference=3Dscalable"= } */ > + > +#include "slp-3.c" > + > +#define LIMIT 128 > + > +void __attribute__ ((optimize (0))) > +f_golden (int8_t *restrict a, int8_t *restrict b, int n) > +{ > + for (int i =3D 0; i < n; ++i) > + { > + a[i * 8] =3D b[i * 8 + 1] + 1; > + a[i * 8 + 1] =3D b[i * 8 + 7] + 2; > + a[i * 8 + 2] =3D b[i * 8 + 1] + 3; > + a[i * 8 + 3] =3D b[i * 8 + 7] + 4; > + a[i * 8 + 4] =3D b[i * 8 + 1] + 5; > + a[i * 8 + 5] =3D b[i * 8 + 7] + 6; > + a[i * 8 + 6] =3D b[i * 8 + 1] + 7; > + a[i * 8 + 7] =3D b[i * 8 + 7] + 8; > + } > +} > + > +int > +main (void) > +{ > +#define RUN(NUM) = \ > + int8_t a_##NUM[NUM * 8 + 8] =3D {0}; = \ > + int8_t a_golden_##NUM[NUM * 8 + 8] =3D {0}; = \ > + int8_t b_##NUM[NUM * 8 + 8] =3D {0}; = \ > + for (int i =3D 0; i < NUM * 8 + 8; i++) = \ > + { = \ > + if (i % NUM =3D=3D 0) = \ > + b_##NUM[i] =3D (i + NUM) % LIMIT; = \ > + else = \ > + b_##NUM[i] =3D (i - NUM) % (-LIMIT); = \ > + } = \ > + f (a_##NUM, b_##NUM, NUM); = \ > + f_golden (a_golden_##NUM, b_##NUM, NUM); = \ > + for (int i =3D 0; i < NUM * 8 + 8; i++) = \ > + { = \ > + if (a_##NUM[i] !=3D a_golden_##NUM[i]) = \ > + __builtin_abort (); = \ > + } > + > + RUN (3); > + RUN (5); > + RUN (15); > + RUN (16); > + RUN (17); > + RUN (31); > + RUN (32); > + RUN (33); > + RUN (63); > + RUN (64); > + RUN (65); > + RUN (127); > + RUN (128); > + RUN (129); > + RUN (239); > + RUN (359); > + RUN (498); > + RUN (799); > + RUN (977); > + RUN (5789); > + return 0; > +} > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-4= .c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-4.c > new file mode 100644 > index 00000000000..3d43ef0890c > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-4.c > @@ -0,0 +1,67 @@ > +/* { dg-do run { target { riscv_vector } } } */ > +/* { dg-additional-options "--param riscv-autovec-preference=3Dscalable"= } */ > + > +#include "slp-4.c" > + > +#define LIMIT 32767 > + > +void __attribute__ ((optimize (0))) > +f_golden (int16_t *restrict a, int16_t *restrict b, int n) > +{ > + for (int i =3D 0; i < n; ++i) > + { > + a[i * 8] =3D b[i * 8 + 1] + 1; > + a[i * 8 + 1] =3D b[i * 8 + 7] + 2; > + a[i * 8 + 2] =3D b[i * 8 + 1] + 3; > + a[i * 8 + 3] =3D b[i * 8 + 7] + 4; > + a[i * 8 + 4] =3D b[i * 8 + 1] + 5; > + a[i * 8 + 5] =3D b[i * 8 + 7] + 6; > + a[i * 8 + 6] =3D b[i * 8 + 1] + 7; > + a[i * 8 + 7] =3D b[i * 8 + 7] + 8; > + } > +} > + > +int > +main (void) > +{ > +#define RUN(NUM) = \ > + int16_t a_##NUM[NUM * 8 + 8] =3D {0}; = \ > + int16_t a_golden_##NUM[NUM * 8 + 8] =3D {0}; = \ > + int16_t b_##NUM[NUM * 8 + 8] =3D {0}; = \ > + for (int i =3D 0; i < NUM * 8 + 8; i++) = \ > + { = \ > + if (i % NUM =3D=3D 0) = \ > + b_##NUM[i] =3D (i + NUM) % LIMIT; = \ > + else = \ > + b_##NUM[i] =3D (i - NUM) % (-LIMIT); = \ > + } = \ > + f (a_##NUM, b_##NUM, NUM); = \ > + f_golden (a_golden_##NUM, b_##NUM, NUM); = \ > + for (int i =3D 0; i < NUM * 8 + 8; i++) = \ > + { = \ > + if (a_##NUM[i] !=3D a_golden_##NUM[i]) = \ > + __builtin_abort (); = \ > + } > + > + RUN (3); > + RUN (5); > + RUN (15); > + RUN (16); > + RUN (17); > + RUN (31); > + RUN (32); > + RUN (33); > + RUN (63); > + RUN (64); > + RUN (65); > + RUN (127); > + RUN (128); > + RUN (129); > + RUN (239); > + RUN (359); > + RUN (498); > + RUN (799); > + RUN (977); > + RUN (5789); > + return 0; > +} > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-5= .c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-5.c > new file mode 100644 > index 00000000000..814308bd7af > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-5.c > @@ -0,0 +1,67 @@ > +/* { dg-do run { target { riscv_vector } } } */ > +/* { dg-additional-options "--param riscv-autovec-preference=3Dscalable"= } */ > + > +#include "slp-5.c" > + > +#define LIMIT 128 > + > +void __attribute__ ((optimize (0))) > +f_golden (int8_t *restrict a, int8_t *restrict b, int n) > +{ > + for (int i =3D 0; i < n; ++i) > + { > + a[i * 8 + 0] =3D b[i * 8 + 1] + 1; > + a[i * 8 + 1] =3D b[i * 8 + 7] + 2; > + a[i * 8 + 2] =3D b[i * 8 + 4] + 3; > + a[i * 8 + 3] =3D b[i * 8 + 8] + 4; > + a[i * 8 + 4] =3D b[i * 8 + 1] + 5; > + a[i * 8 + 5] =3D b[i * 8 + 7] + 6; > + a[i * 8 + 6] =3D b[i * 8 + 4] + 7; > + a[i * 8 + 7] =3D b[i * 8 + 8] + 8; > + } > +} > + > +int > +main (void) > +{ > +#define RUN(NUM) = \ > + int8_t a_##NUM[NUM * 8 + 8] =3D {0}; = \ > + int8_t a_golden_##NUM[NUM * 8 + 8] =3D {0}; = \ > + int8_t b_##NUM[NUM * 8 + 9] =3D {0}; = \ > + for (int i =3D 0; i < NUM * 8 + 9; i++) = \ > + { = \ > + if (i % NUM =3D=3D 0) = \ > + b_##NUM[i] =3D (i + NUM) % LIMIT; = \ > + else = \ > + b_##NUM[i] =3D (i - NUM) % (-LIMIT); = \ > + } = \ > + f (a_##NUM, b_##NUM, NUM); = \ > + f_golden (a_golden_##NUM, b_##NUM, NUM); = \ > + for (int i =3D 0; i < NUM * 8 + 8; i++) = \ > + { = \ > + if (a_##NUM[i] !=3D a_golden_##NUM[i]) = \ > + __builtin_abort (); = \ > + } > + > + RUN (3); > + RUN (5); > + RUN (15); > + RUN (16); > + RUN (17); > + RUN (31); > + RUN (32); > + RUN (33); > + RUN (63); > + RUN (64); > + RUN (65); > + RUN (127); > + RUN (128); > + RUN (129); > + RUN (239); > + RUN (359); > + RUN (498); > + RUN (799); > + RUN (977); > + RUN (5789); > + return 0; > +} > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-6= .c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-6.c > new file mode 100644 > index 00000000000..e317eeac2f2 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-6.c > @@ -0,0 +1,67 @@ > +/* { dg-do run { target { riscv_vector } } } */ > +/* { dg-additional-options "--param riscv-autovec-preference=3Dscalable"= } */ > + > +#include "slp-6.c" > + > +#define LIMIT 128 > + > +void __attribute__ ((optimize (0))) > +f_golden (int8_t *restrict a, int8_t *restrict b, int n) > +{ > + for (int i =3D 0; i < n; ++i) > + { > + a[i * 8 + 0] =3D b[i * 8 + 1] + 1; > + a[i * 8 + 1] =3D b[i * 8 + 2] + 2; > + a[i * 8 + 2] =3D b[i * 8 + 6] + 8; > + a[i * 8 + 3] =3D b[i * 8 + 7] + 4; > + a[i * 8 + 4] =3D b[i * 8 + 3] + 5; > + a[i * 8 + 5] =3D b[i * 8 + 4] + 6; > + a[i * 8 + 6] =3D b[i * 8 + 5] + 7; > + a[i * 8 + 7] =3D b[i * 8 + 0] + 3; > + } > +} > + > +int > +main (void) > +{ > +#define RUN(NUM) = \ > + int8_t a_##NUM[NUM * 8 + 8] =3D {0}; = \ > + int8_t a_golden_##NUM[NUM * 8 + 8] =3D {0}; = \ > + int8_t b_##NUM[NUM * 8 + 9] =3D {0}; = \ > + for (int i =3D 0; i < NUM * 8 + 9; i++) = \ > + { = \ > + if (i % NUM =3D=3D 0) = \ > + b_##NUM[i] =3D (i + NUM) % LIMIT; = \ > + else = \ > + b_##NUM[i] =3D (i - NUM) % (-LIMIT); = \ > + } = \ > + f (a_##NUM, b_##NUM, NUM); = \ > + f_golden (a_golden_##NUM, b_##NUM, NUM); = \ > + for (int i =3D 0; i < NUM * 8 + 8; i++) = \ > + { = \ > + if (a_##NUM[i] !=3D a_golden_##NUM[i]) = \ > + __builtin_abort (); = \ > + } > + > + RUN (3); > + RUN (5); > + RUN (15); > + RUN (16); > + RUN (17); > + RUN (31); > + RUN (32); > + RUN (33); > + RUN (63); > + RUN (64); > + RUN (65); > + RUN (127); > + RUN (128); > + RUN (129); > + RUN (239); > + RUN (359); > + RUN (498); > + RUN (799); > + RUN (977); > + RUN (5789); > + return 0; > +} > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-7= .c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-7.c > new file mode 100644 > index 00000000000..a8e4781988e > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-7.c > @@ -0,0 +1,58 @@ > +/* { dg-do run { target { riscv_vector } } } */ > +/* { dg-additional-options "--param riscv-autovec-preference=3Dscalable"= } */ > + > +#include "slp-7.c" > + > +void > +f_golden (float *__restrict f, double *__restrict d, int n) > +{ > + for (int i =3D 0; i < n; ++i) > + { > + f[i * 2 + 0] =3D 1; > + f[i * 2 + 1] =3D 2; > + d[i] =3D 3; > + } > +} > + > +int > +main (void) > +{ > +#define RUN(NUM) = \ > + float a_##NUM[NUM * 2 + 2] =3D {0}; = \ > + float a_golden_##NUM[NUM * 2 + 2] =3D {0}; = \ > + double b_##NUM[NUM] =3D {0}; = \ > + double b_golden_##NUM[NUM] =3D {0}; = \ > + f (a_##NUM, b_##NUM, NUM); = \ > + f_golden (a_golden_##NUM, b_golden_##NUM, NUM); = \ > + for (int i =3D 0; i < NUM; i++) = \ > + { = \ > + if (a_##NUM[i * 2 + 0] !=3D a_golden_##NUM[i * 2 + 0]) = \ > + __builtin_abort (); = \ > + if (a_##NUM[i * 2 + 1] !=3D a_golden_##NUM[i * 2 + 1]) = \ > + __builtin_abort (); = \ > + if (b_##NUM[i] !=3D b_golden_##NUM[i]) = \ > + __builtin_abort (); = \ > + } > + > + RUN (3); > + RUN (5); > + RUN (15); > + RUN (16); > + RUN (17); > + RUN (31); > + RUN (32); > + RUN (33); > + RUN (63); > + RUN (64); > + RUN (65); > + RUN (127); > + RUN (128); > + RUN (129); > + RUN (239); > + RUN (359); > + RUN (498); > + RUN (799); > + RUN (977); > + RUN (5789); > + return 0; > +} > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/scalable-1.c b/gc= c/testsuite/gcc.target/riscv/rvv/autovec/scalable-1.c > index 500b0adce66..3c03a87377d 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/scalable-1.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/scalable-1.c > @@ -14,4 +14,4 @@ f (int32_t *__restrict f, int32_t *__restrict d, int n) > } > } > -/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 0 = "vect" } } */ > +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 1 = "vect" } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/v-1.c b/gcc/tests= uite/gcc.target/riscv/rvv/autovec/v-1.c > index 383c82a3b7c..e68d05f5f48 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/v-1.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/v-1.c > @@ -3,9 +3,4 @@ > #include "template-1.h" > -/* Currently, we don't support SLP auto-vectorization for VLA. But it's > - necessary that we add this testcase here to make sure such unsupporte= d SLP > - auto-vectorization will not cause an ICE. We will enable "vect" check= ing when > - we support SLP auto-vectorization for VLA in the future. */ > - > -/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 0 = "vect" } } */ > +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 5 = "vect" } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.= c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c > index 23cc1c8651f..ecfda79e19a 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c > @@ -3,4 +3,4 @@ > #include "template-1.h" > -/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 0 = "vect" } } */ > +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 3 = "vect" } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32x_zvl128b-1.= c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32x_zvl128b-1.c > index 4f130f02f67..1394f08f2b9 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32x_zvl128b-1.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32x_zvl128b-1.c > @@ -3,4 +3,4 @@ > #include "template-1.h" > -/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 0 = "vect" } } */ > +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 2 = "vect" } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d-1.c b/gcc/= testsuite/gcc.target/riscv/rvv/autovec/zve64d-1.c > index 823d51a03cb..c5e89996fa4 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d-1.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d-1.c > @@ -3,4 +3,4 @@ > #include "template-1.h" > -/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 2 = "vect" } } */ > +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 3 = "vect" } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.= c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c > index 5ead22746d3..6b320ca6f38 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c > @@ -3,4 +3,4 @@ > #include "template-1.h" > -/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 0 = "vect" } } */ > +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 5 = "vect" } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f-1.c b/gcc/= testsuite/gcc.target/riscv/rvv/autovec/zve64f-1.c > index e03d1b44ca6..6c2a002de9c 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f-1.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f-1.c > @@ -3,4 +3,4 @@ > #include "template-1.h" > -/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 2 = "vect" } } */ > +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 3 = "vect" } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.= c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c > index 5bb2d9d96fa..ae3f066477c 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c > @@ -3,4 +3,4 @@ > #include "template-1.h" > -/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 0 = "vect" } } */ > +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 = "vect" } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64x_zvl128b-1.= c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64x_zvl128b-1.c > index 71820ece4b2..fc676a3865e 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64x_zvl128b-1.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64x_zvl128b-1.c > @@ -3,4 +3,4 @@ > #include "template-1.h" > -/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 0 = "vect" } } */ > +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 3 = "vect" } } */ > -- > 2.36.1 >