From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 48) id 5A9AB3858CDA; Wed, 13 Sep 2023 21:25:52 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 5A9AB3858CDA DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1694640352; bh=nA7zdF5qdFUmrUhC6I7/BLOEB0zeMurLR//8dkAkyY8=; h=From:To:Subject:Date:In-Reply-To:References:From; b=iWz8U/Xh/dcVv0xnAiIQbpIKvD1PMvj92/l6GjfgZvfSqXeGsRjG+VQYpQaegDqNX uTV2i6p14W8rQpbSr6yxbzunKS4HbdsMStBqw5IxnGolqxV+guIdAQ0l4Q0H5VBKV2 PcExi0EiMUHz7gxPHNUErTQXPBQddJzoFcdDcUKU= From: "rdapp at gcc dot gnu.org" To: gcc-bugs@gcc.gnu.org Subject: [Bug middle-end/111401] Middle-end: Missed optimization of MASK_LEN_FOLD_LEFT_PLUS Date: Wed, 13 Sep 2023 21:25:51 +0000 X-Bugzilla-Reason: CC X-Bugzilla-Type: changed X-Bugzilla-Watch-Reason: None X-Bugzilla-Product: gcc X-Bugzilla-Component: middle-end X-Bugzilla-Version: 14.0 X-Bugzilla-Keywords: missed-optimization X-Bugzilla-Severity: normal X-Bugzilla-Who: rdapp at gcc dot gnu.org X-Bugzilla-Status: NEW X-Bugzilla-Resolution: X-Bugzilla-Priority: P3 X-Bugzilla-Assigned-To: unassigned at gcc dot gnu.org X-Bugzilla-Target-Milestone: --- X-Bugzilla-Flags: X-Bugzilla-Changed-Fields: Message-ID: In-Reply-To: References: Content-Type: text/plain; charset="UTF-8" Content-Transfer-Encoding: quoted-printable X-Bugzilla-URL: http://gcc.gnu.org/bugzilla/ Auto-Submitted: auto-generated MIME-Version: 1.0 List-Id: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=3D111401 --- Comment #3 from Robin Dapp --- Several other things came up, so I'm just going to post the latest status h= ere without having revised or tested it. Going to try fixing it and testing tomorrow. --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -3672,7 +3672,7 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared) static bool fold_left_reduction_fn (code_helper code, internal_fn *reduc_fn) { - if (code =3D=3D PLUS_EXPR) + if (code =3D=3D PLUS_EXPR || code =3D=3D IFN_COND_ADD) { *reduc_fn =3D IFN_FOLD_LEFT_PLUS; return true; @@ -4106,8 +4106,13 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info, return NULL; } - nphi_def_loop_uses++; - phi_use_stmt =3D use_stmt; + /* We might have two uses in the same instruction, only count them as + one. */ + if (use_stmt !=3D phi_use_stmt) + { + nphi_def_loop_uses++; + phi_use_stmt =3D use_stmt; + } } tree latch_def =3D PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop)); @@ -6861,7 +6866,7 @@ vectorize_fold_left_reduction (loop_vec_info loop_vin= fo, gimple **vec_stmt, slp_tree slp_node, gimple *reduc_def_stmt, tree_code code, internal_fn reduc_fn, - tree ops[3], tree vectype_in, + tree *ops, int num_ops, tree vectype_in, int reduc_index, vec_loop_masks *masks, vec_loop_lens *lens) { @@ -6883,11 +6888,24 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo, gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype_out), TYPE_VECTOR_SUBPARTS (vectype_in))); - tree op0 =3D ops[1 - reduc_index]; + /* The operands either come from a binary operation or a COND_ADD operat= ion. + The former is a gimple assign and the latter is a gimple call with fo= ur + arguments. */ + gcc_assert (num_ops =3D=3D 2 || num_ops =3D=3D 4); + bool is_cond_add =3D num_ops =3D=3D 4; + tree op0, opmask; + if (!is_cond_add) + op0 =3D ops[1 - reduc_index]; + else + { + op0 =3D ops[2]; + opmask =3D ops[0]; + gcc_assert (!slp_node); + } int group_size =3D 1; stmt_vec_info scalar_dest_def_info; - auto_vec vec_oprnds0; + auto_vec vec_oprnds0, vec_opmask; if (slp_node) { auto_vec > vec_defs (2); @@ -6903,9 +6921,18 @@ vectorize_fold_left_reduction (loop_vec_info loop_vi= nfo, vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, 1, op0, &vec_oprnds0); scalar_dest_def_info =3D stmt_info; + if (is_cond_add) + { + vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, 1, + opmask, &vec_opmask); + gcc_assert (vec_opmask.length() =3D=3D 1); + } } - tree scalar_dest =3D gimple_assign_lhs (scalar_dest_def_info->stmt); + gimple *sdef =3D scalar_dest_def_info->stmt; + tree scalar_dest =3D is_gimple_call (sdef) + ? gimple_call_lhs (sdef) + : gimple_assign_lhs (scalar_dest_def_info->stmt); tree scalar_type =3D TREE_TYPE (scalar_dest); tree reduc_var =3D gimple_phi_result (reduc_def_stmt); @@ -6945,7 +6972,11 @@ vectorize_fold_left_reduction (loop_vec_info loop_vi= nfo, i, 1); signed char biasval =3D LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo); bias =3D build_int_cst (intQI_type_node, biasval); - mask =3D build_minus_one_cst (truth_type_for (vectype_in)); + /* If we have a COND_ADD take its mask. Otherwise use {-1, ...}.= */ + if (is_cond_add) + mask =3D vec_opmask[0]; + else + mask =3D build_minus_one_cst (truth_type_for (vectype_in)); } /* Handle MINUS by adding the negative. */ @@ -7440,6 +7471,9 @@ vectorizable_reduction (loop_vec_info loop_vinfo, if (i =3D=3D STMT_VINFO_REDUC_IDX (stmt_info)) continue; + if (op.ops[i] =3D=3D op.ops[STMT_VINFO_REDUC_IDX (stmt_info)]) + continue; + /* There should be only one cycle def in the stmt, the one leading to reduc_def. */ if (VECTORIZABLE_CYCLE_DEF (dt)) @@ -8211,8 +8245,21 @@ vect_transform_reduction (loop_vec_info loop_vinfo, vec_num =3D 1; } - code_helper code =3D canonicalize_code (op.code, op.type); - internal_fn cond_fn =3D get_conditional_internal_fn (code, op.type); + code_helper code (op.code); + internal_fn cond_fn; + + if (code.is_internal_fn ()) + { + internal_fn ifn =3D internal_fn (op.code); + code =3D canonicalize_code (conditional_internal_fn_code (ifn), op.t= ype); + cond_fn =3D ifn; + } + else + { + code =3D canonicalize_code (op.code, op.type); + cond_fn =3D get_conditional_internal_fn (code, op.type); + } + vec_loop_masks *masks =3D &LOOP_VINFO_MASKS (loop_vinfo); vec_loop_lens *lens =3D &LOOP_VINFO_LENS (loop_vinfo); bool mask_by_cond_expr =3D use_mask_by_cond_expr_p (code, cond_fn, vectype_in); @@ -8240,8 +8287,8 @@ vect_transform_reduction (loop_vec_info loop_vinfo, gcc_assert (code.is_tree_code ()); return vectorize_fold_left_reduction (loop_vinfo, stmt_info, gsi, vec_stmt, slp_node, reduc_def_phi, - tree_code (code), reduc_fn, op.ops, vectype_in, reduc_index, mas= ks, - lens); + tree_code (code), reduc_fn, op.ops, op.num_ops, vectype_in, + reduc_index, masks, lens); } bool single_defuse_cycle =3D STMT_VINFO_FORCE_SINGLE_CYCLE (reduc_info);=