From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by sourceware.org (Postfix) with ESMTP id 7D76D39874C7 for ; Thu, 8 Jul 2021 12:39:29 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 7D76D39874C7 Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 25219D6E for ; Thu, 8 Jul 2021 05:39:29 -0700 (PDT) Received: from localhost (e121540-lin.manchester.arm.com [10.32.98.126]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPSA id C0EF03F73B for ; Thu, 8 Jul 2021 05:39:28 -0700 (PDT) From: Richard Sandiford To: gcc-patches@gcc.gnu.org Mail-Followup-To: gcc-patches@gcc.gnu.org, richard.sandiford@arm.com Subject: [PATCH 02/10] vect: Create array_slice of live-out stmts References: Date: Thu, 08 Jul 2021 13:39:27 +0100 In-Reply-To: (Richard Sandiford's message of "Thu, 08 Jul 2021 13:38:20 +0100") Message-ID: User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/26.3 (gnu/linux) MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: quoted-printable X-Spam-Status: No, score=-12.4 required=5.0 tests=BAYES_00, GIT_PATCH_0, KAM_DMARC_STATUS, SPF_HELO_NONE, SPF_PASS, TXREP autolearn=ham autolearn_force=no version=3.4.4 X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on server2.sourceware.org X-BeenThere: gcc-patches@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-patches mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 08 Jul 2021 12:39:31 -0000 This patch constructs an array_slice of the scalar statements that produce live-out reduction results in the original unvectorised loop. There are three cases: - SLP reduction chains: the final SLP stmt is live-out - full SLP reductions: all SLP stmts are live-out - non-SLP reductions: the single scalar stmt is live-out This is a slight simplification on its own, mostly because it maans =E2=80=9Cgroup_size=E2=80=9D has a consistent meaning throughout the functi= on. The main justification though is that it helps with later patches. gcc/ * tree-vect-loop.c (vect_create_epilog_for_reduction): Truncate scalar_results to group_size elements after reducing down from N*group_size elements. Construct an array_slice of the live-out stmts and assert that there is one stmt per scalar result. --- gcc/tree-vect-loop.c | 61 +++++++++++++++----------------------------- 1 file changed, 21 insertions(+), 40 deletions(-) diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 7c3e3352b43..8390ac80ca0 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -5010,7 +5010,12 @@ vect_create_epilog_for_reduction (loop_vec_info loop= _vinfo, auto_vec scalar_results; unsigned int group_size =3D 1, k; auto_vec phis; - bool slp_reduc =3D false; + /* SLP reduction without reduction chain, e.g., + # a1 =3D phi + # b1 =3D phi + a2 =3D operation (a1) + b2 =3D operation (b1) */ + bool slp_reduc =3D (slp_node && !REDUC_GROUP_FIRST_ELEMENT (stmt_info)); bool direct_slp_reduc; tree new_phi_result; tree induction_index =3D NULL_TREE; @@ -5050,6 +5055,16 @@ vect_create_epilog_for_reduction (loop_vec_info loop= _vinfo, adjustment_def =3D STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info); } =20 + stmt_vec_info single_live_out_stmt[] =3D { stmt_info }; + array_slice live_out_stmts =3D single_live_out_stmt; + if (slp_reduc) + /* All statements produce live-out values. */ + live_out_stmts =3D SLP_TREE_SCALAR_STMTS (slp_node); + else if (slp_node) + /* The last statement in the reduction chain produces the live-out + value. */ + single_live_out_stmt[0] =3D SLP_TREE_SCALAR_STMTS (slp_node)[group_siz= e - 1]; + unsigned vec_num; int ncopies; if (slp_node) @@ -5248,13 +5263,6 @@ vect_create_epilog_for_reduction (loop_vec_info loop= _vinfo, new_scalar_dest =3D vect_create_destination_var (scalar_dest, NULL); bitsize =3D TYPE_SIZE (scalar_type); =20 - /* SLP reduction without reduction chain, e.g., - # a1 =3D phi - # b1 =3D phi - a2 =3D operation (a1) - b2 =3D operation (b1) */ - slp_reduc =3D (slp_node && !REDUC_GROUP_FIRST_ELEMENT (stmt_info)); - /* True if we should implement SLP_REDUC using native reduction operatio= ns instead of scalar operations. */ direct_slp_reduc =3D (reduc_fn !=3D IFN_LAST @@ -5877,6 +5885,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_= vinfo, first_res, res); scalar_results[j % group_size] =3D new_res; } + scalar_results.truncate (group_size); for (k =3D 0; k < group_size; k++) scalar_results[k] =3D gimple_convert (&stmts, scalar_type, scalar_results[k]); @@ -5969,39 +5978,11 @@ vect_create_epilog_for_reduction (loop_vec_info loo= p_vinfo, use =20=20 use */ =20 - - /* In SLP reduction chain we reduce vector results into one vector if - necessary, hence we set here REDUC_GROUP_SIZE to 1. SCALAR_DEST is t= he - LHS of the last stmt in the reduction chain, since we are looking for - the loop exit phi node. */ - if (REDUC_GROUP_FIRST_ELEMENT (stmt_info)) - { - stmt_vec_info dest_stmt_info - =3D vect_orig_stmt (SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1]); - scalar_dest =3D gimple_assign_lhs (dest_stmt_info->stmt); - group_size =3D 1; - } - - /* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS = (in - case that REDUC_GROUP_SIZE is greater than vectorization factor). - Therefore, we need to match SCALAR_RESULTS with corresponding stateme= nts. - The first (REDUC_GROUP_SIZE / number of new vector stmts) scalar resu= lts - correspond to the first vector stmt, etc. - (RATIO is equal to (REDUC_GROUP_SIZE / number of new vector stmts)). = */ - if (group_size > new_phis.length ()) - gcc_assert (!(group_size % new_phis.length ())); - - for (k =3D 0; k < group_size; k++) + gcc_assert (live_out_stmts.size () =3D=3D scalar_results.length ()); + for (k =3D 0; k < live_out_stmts.size (); k++) { - if (slp_reduc) - { - stmt_vec_info scalar_stmt_info =3D SLP_TREE_SCALAR_STMTS (slp_node)[k]; - - orig_stmt_info =3D STMT_VINFO_RELATED_STMT (scalar_stmt_info); - /* SLP statements can't participate in patterns. */ - gcc_assert (!orig_stmt_info); - scalar_dest =3D gimple_assign_lhs (scalar_stmt_info->stmt); - } + stmt_vec_info scalar_stmt_info =3D vect_orig_stmt (live_out_stmts[k]= ); + scalar_dest =3D gimple_assign_lhs (scalar_stmt_info->stmt); =20 phis.create (3); /* Find the loop-closed-use at the loop exit of the original scalar