From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by sourceware.org (Postfix) with ESMTP id C8EE8398640E for ; Thu, 8 Jul 2021 12:39:49 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org C8EE8398640E Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 77813D6E for ; Thu, 8 Jul 2021 05:39:49 -0700 (PDT) Received: from localhost (e121540-lin.manchester.arm.com [10.32.98.126]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPSA id 1EBBA3F73B for ; Thu, 8 Jul 2021 05:39:49 -0700 (PDT) From: Richard Sandiford To: gcc-patches@gcc.gnu.org Mail-Followup-To: gcc-patches@gcc.gnu.org, richard.sandiford@arm.com Subject: [PATCH 03/10] vect: Remove new_phis from References: Date: Thu, 08 Jul 2021 13:39:47 +0100 In-Reply-To: (Richard Sandiford's message of "Thu, 08 Jul 2021 13:38:20 +0100") Message-ID: User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/26.3 (gnu/linux) MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: quoted-printable X-Spam-Status: No, score=-12.4 required=5.0 tests=BAYES_00, GIT_PATCH_0, KAM_DMARC_STATUS, SPF_HELO_NONE, SPF_PASS, TXREP autolearn=ham autolearn_force=no version=3.4.4 X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on server2.sourceware.org X-BeenThere: gcc-patches@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-patches mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 08 Jul 2021 12:39:51 -0000 vect_create_epilog_for_reduction had a variable called new_phis. It collected the statements that produce the exit block definitions of the vector reduction accumulators. Although those statements are indeed phis initially, they are often replaced with normal statements later, leading to puzzling code like: FOR_EACH_VEC_ELT (new_phis, i, new_phi) { int bit_offset; if (gimple_code (new_phi) =3D=3D GIMPLE_PHI) vec_temp =3D PHI_RESULT (new_phi); else vec_temp =3D gimple_assign_lhs (new_phi); Also, although the array collects statements, in practice all users want the lhs instead. This patch therefore replaces new_phis with a vector of gimple values called =E2=80=9Creduc_inputs=E2=80=9D. Also, reduction chains and ncopies>1 were handled with identical code (and there was a comment saying so). The patch unites them into a single =E2=80=9Cif=E2=80=9D. gcc/ * tree-vect-loop.c (vect_create_epilog_for_reduction): Replace the new_phis vector with a reduc_inputs vector. Combine handling of reduction chains and ncopies > 1. --- gcc/tree-vect-loop.c | 113 ++++++++++++++++--------------------------- 1 file changed, 41 insertions(+), 72 deletions(-) diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 8390ac80ca0..b7f73ca52c7 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -5005,7 +5005,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_= vinfo, imm_use_iterator imm_iter, phi_imm_iter; use_operand_p use_p, phi_use_p; gimple *use_stmt; - auto_vec new_phis; + auto_vec reduc_inputs; int j, i; auto_vec scalar_results; unsigned int group_size =3D 1, k; @@ -5017,7 +5017,6 @@ vect_create_epilog_for_reduction (loop_vec_info loop_= vinfo, b2 =3D operation (b1) */ bool slp_reduc =3D (slp_node && !REDUC_GROUP_FIRST_ELEMENT (stmt_info)); bool direct_slp_reduc; - tree new_phi_result; tree induction_index =3D NULL_TREE; =20 if (slp_node) @@ -5215,7 +5214,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_= vinfo, if (double_reduc) loop =3D outer_loop; exit_bb =3D single_exit (loop)->dest; - new_phis.create (slp_node ? vec_num : ncopies); + reduc_inputs.create (slp_node ? vec_num : ncopies); for (unsigned i =3D 0; i < vec_num; i++) { if (slp_node) @@ -5223,19 +5222,14 @@ vect_create_epilog_for_reduction (loop_vec_info loo= p_vinfo, else def =3D gimple_get_lhs (STMT_VINFO_VEC_STMTS (rdef_info)[0]); for (j =3D 0; j < ncopies; j++) - { + { tree new_def =3D copy_ssa_name (def); - phi =3D create_phi_node (new_def, exit_bb); - if (j =3D=3D 0) - new_phis.quick_push (phi); - else - { - def =3D gimple_get_lhs (STMT_VINFO_VEC_STMTS (rdef_info)[j]); - new_phis.quick_push (phi); - } - - SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def); - } + phi =3D create_phi_node (new_def, exit_bb); + if (j) + def =3D gimple_get_lhs (STMT_VINFO_VEC_STMTS (rdef_info)[j]); + SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def); + reduc_inputs.quick_push (new_def); + } } =20 exit_gsi =3D gsi_after_labels (exit_bb); @@ -5274,52 +5268,32 @@ vect_create_epilog_for_reduction (loop_vec_info loo= p_vinfo, a2 =3D operation (a1) a3 =3D operation (a2), =20 - we may end up with more than one vector result. Here we reduce them = to - one vector. */ - if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) || direct_slp_reduc) + we may end up with more than one vector result. Here we reduce them + to one vector. + + The same is true if we couldn't use a single defuse cycle. */ + if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) + || direct_slp_reduc + || ncopies > 1) { gimple_seq stmts =3D NULL; - tree first_vect =3D PHI_RESULT (new_phis[0]); - first_vect =3D gimple_convert (&stmts, vectype, first_vect); - for (k =3D 1; k < new_phis.length (); k++) + tree first_vect =3D gimple_convert (&stmts, vectype, reduc_inputs[0]= ); + for (k =3D 1; k < reduc_inputs.length (); k++) { - gimple *next_phi =3D new_phis[k]; - tree second_vect =3D PHI_RESULT (next_phi); - second_vect =3D gimple_convert (&stmts, vectype, second_vect); + tree second_vect =3D gimple_convert (&stmts, vectype, reduc_inputs[k]); first_vect =3D gimple_build (&stmts, code, vectype, first_vect, second_vect); } gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); =20 - new_phi_result =3D first_vect; - new_phis.truncate (0); - new_phis.safe_push (SSA_NAME_DEF_STMT (first_vect)); + reduc_inputs.truncate (0); + reduc_inputs.safe_push (first_vect); } - /* Likewise if we couldn't use a single defuse cycle. */ - else if (ncopies > 1) - { - gimple_seq stmts =3D NULL; - tree first_vect =3D PHI_RESULT (new_phis[0]); - first_vect =3D gimple_convert (&stmts, vectype, first_vect); - for (int k =3D 1; k < ncopies; ++k) - { - tree second_vect =3D PHI_RESULT (new_phis[k]); - second_vect =3D gimple_convert (&stmts, vectype, second_vect); - first_vect =3D gimple_build (&stmts, code, vectype, - first_vect, second_vect); - } - gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); - new_phi_result =3D first_vect; - new_phis.truncate (0); - new_phis.safe_push (SSA_NAME_DEF_STMT (first_vect)); - } - else - new_phi_result =3D PHI_RESULT (new_phis[0]); =20 if (STMT_VINFO_REDUC_TYPE (reduc_info) =3D=3D COND_REDUCTION && reduc_fn !=3D IFN_LAST) { - /* For condition reductions, we have a vector (NEW_PHI_RESULT) conta= ining + /* For condition reductions, we have a vector (REDUC_INPUTS 0) conta= ining various data values where the condition matched and another vector (INDUCTION_INDEX) containing all the indexes of those matches. We need to extract the last matching index (which will be the index with @@ -5350,7 +5324,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_= vinfo, tree zero_vec =3D build_zero_cst (vectype); =20 gimple_seq stmts =3D NULL; - new_phi_result =3D gimple_convert (&stmts, vectype, new_phi_result); + reduc_inputs[0] =3D gimple_convert (&stmts, vectype, reduc_inputs[0]= ); gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); =20 /* Find maximum value from the vector of found indexes. */ @@ -5370,7 +5344,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_= vinfo, =20 /* Next we compare the new vector (MAX_INDEX_VEC) full of max indexes with the vector (INDUCTION_INDEX) of found indexes, choosing values - from the data vector (NEW_PHI_RESULT) for matches, 0 (ZERO_VEC) + from the data vector (REDUC_INPUTS 0) for matches, 0 (ZERO_VEC) otherwise. Only one value should match, resulting in a vector (VEC_COND) with one data value and the rest zeros. In the case where the loop never made any matches, every index will @@ -5389,7 +5363,8 @@ vect_create_epilog_for_reduction (loop_vec_info loop_= vinfo, zero. */ tree vec_cond =3D make_ssa_name (vectype); gimple *vec_cond_stmt =3D gimple_build_assign (vec_cond, VEC_COND_EX= PR, - vec_compare, new_phi_result, + vec_compare, + reduc_inputs[0], zero_vec); gsi_insert_before (&exit_gsi, vec_cond_stmt, GSI_SAME_STMT); =20 @@ -5437,7 +5412,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_= vinfo, val =3D data_reduc[i], idx_val =3D induction_index[i]; return val; */ =20 - tree data_eltype =3D TREE_TYPE (TREE_TYPE (new_phi_result)); + tree data_eltype =3D TREE_TYPE (TREE_TYPE (reduc_inputs[0])); tree idx_eltype =3D TREE_TYPE (TREE_TYPE (induction_index)); unsigned HOST_WIDE_INT el_size =3D tree_to_uhwi (TYPE_SIZE (idx_elty= pe)); poly_uint64 nunits =3D TYPE_VECTOR_SUBPARTS (TREE_TYPE (induction_in= dex)); @@ -5461,7 +5436,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_= vinfo, epilog_stmt =3D gimple_build_assign (val, BIT_FIELD_REF, build3 (BIT_FIELD_REF, data_eltype, - new_phi_result, + reduc_inputs[0], bitsize_int (el_size), bitsize_int (off))); gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); @@ -5513,10 +5488,10 @@ vect_create_epilog_for_reduction (loop_vec_info loo= p_vinfo, "Reduce using direct vector reduction.\n"); =20 gimple_seq stmts =3D NULL; - new_phi_result =3D gimple_convert (&stmts, vectype, new_phi_result); - vec_elem_type =3D TREE_TYPE (TREE_TYPE (new_phi_result)); + reduc_inputs[0] =3D gimple_convert (&stmts, vectype, reduc_inputs[0]= ); + vec_elem_type =3D TREE_TYPE (TREE_TYPE (reduc_inputs[0])); new_temp =3D gimple_build (&stmts, as_combined_fn (reduc_fn), - vec_elem_type, new_phi_result); + vec_elem_type, reduc_inputs[0]); new_temp =3D gimple_convert (&stmts, scalar_type, new_temp); gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); =20 @@ -5546,7 +5521,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_= vinfo, neutral value. We can then do a normal reduction on each vector. */ =20 /* Enforced by vectorizable_reduction. */ - gcc_assert (new_phis.length () =3D=3D 1); + gcc_assert (reduc_inputs.length () =3D=3D 1); gcc_assert (pow2p_hwi (group_size)); =20 slp_tree orig_phis_slp_node =3D slp_node_instance->reduc_phis; @@ -5602,7 +5577,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_= vinfo, =20 sel[j] =3D (index[j] =3D=3D i); =20 - which selects the elements of NEW_PHI_RESULT that should + which selects the elements of REDUC_INPUTS[0] that should be included in the result. */ tree compare_val =3D build_int_cst (index_elt_type, i); compare_val =3D build_vector_from_val (index_type, compare_val); @@ -5611,11 +5586,11 @@ vect_create_epilog_for_reduction (loop_vec_info loo= p_vinfo, =20 /* Calculate the equivalent of: =20 - vec =3D seq ? new_phi_result : vector_identity; + vec =3D seq ? reduc_inputs[0] : vector_identity; =20 VEC is now suitable for a full vector reduction. */ tree vec =3D gimple_build (&seq, VEC_COND_EXPR, vectype, - sel, new_phi_result, vector_identity); + sel, reduc_inputs[0], vector_identity); =20 /* Do the reduction and convert it to the appropriate type. */ tree scalar =3D gimple_build (&seq, as_combined_fn (reduc_fn), @@ -5630,7 +5605,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_= vinfo, bool reduce_with_shift; tree vec_temp; =20 - gcc_assert (slp_reduc || new_phis.length () =3D=3D 1); + gcc_assert (slp_reduc || reduc_inputs.length () =3D=3D 1); =20 /* See if the target wants to do the final (shift) reduction in a vector mode of smaller size and first reduce upper/lower @@ -5640,7 +5615,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_= vinfo, unsigned nunits =3D TYPE_VECTOR_SUBPARTS (vectype).to_constant (); unsigned nunits1 =3D nunits; if ((mode1 =3D targetm.vectorize.split_reduction (mode)) !=3D mode - && new_phis.length () =3D=3D 1) + && reduc_inputs.length () =3D=3D 1) { nunits1 =3D GET_MODE_NUNITS (mode1).to_constant (); /* For SLP reductions we have to make sure lanes match up, but @@ -5672,7 +5647,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_= vinfo, =20 /* First reduce the vector to the desired vector size we should do shift reduction on by combining upper and lower halves. */ - new_temp =3D new_phi_result; + new_temp =3D reduc_inputs[0]; while (nunits > nunits1) { nunits /=3D 2; @@ -5751,7 +5726,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_= vinfo, new_temp =3D make_ssa_name (vectype1); epilog_stmt =3D gimple_build_assign (new_temp, code, dst1, dst2); gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); - new_phis[0] =3D epilog_stmt; + reduc_inputs[0] =3D new_temp; } =20 if (reduce_with_shift && !slp_reduc) @@ -5832,13 +5807,9 @@ vect_create_epilog_for_reduction (loop_vec_info loop= _vinfo, int element_bitsize =3D tree_to_uhwi (bitsize); tree compute_type =3D TREE_TYPE (vectype); gimple_seq stmts =3D NULL; - FOR_EACH_VEC_ELT (new_phis, i, new_phi) + FOR_EACH_VEC_ELT (reduc_inputs, i, vec_temp) { int bit_offset; - if (gimple_code (new_phi) =3D=3D GIMPLE_PHI) - vec_temp =3D PHI_RESULT (new_phi); - else - vec_temp =3D gimple_assign_lhs (new_phi); new_temp =3D gimple_build (&stmts, BIT_FIELD_REF, compute_type, vec_temp, bitsize, bitsize_zero_node); =20 @@ -5929,11 +5900,10 @@ vect_create_epilog_for_reduction (loop_vec_info loo= p_vinfo, gimple_seq stmts =3D NULL; if (double_reduc) { - new_phi =3D new_phis[0]; gcc_assert (VECTOR_TYPE_P (TREE_TYPE (adjustment_def))); adjustment_def =3D gimple_convert (&stmts, vectype, adjustment_def); new_temp =3D gimple_build (&stmts, code, vectype, - PHI_RESULT (new_phi), adjustment_def); + reduc_inputs[0], adjustment_def); } else { @@ -5947,7 +5917,6 @@ vect_create_epilog_for_reduction (loop_vec_info loop_= vinfo, epilog_stmt =3D gimple_seq_last_stmt (stmts); gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); scalar_results[0] =3D new_temp; - new_phis[0] =3D epilog_stmt; } =20 if (double_reduc)