From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 48) id 9C9D03858D39; Fri, 10 Nov 2023 12:02:59 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 9C9D03858D39 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1699617779; bh=h/KD68Hvl3IkWRnHYGJAHBZlL69iNLgaYJZKoyHpB5c=; h=From:To:Subject:Date:In-Reply-To:References:From; b=auqUb5Y7QbI29dlBDF9Mc55h6XwNm9oEi3g8u6E6pzCcfgSK6R/EI/QVjD1t6q/aD rhdfQ3tidiZU1V+eT+yA6eBNv4yXjXFPR/mzc2X2IrUJeR4q+q+pEtsRyjFtu9AHEH 342eLUdo5vkoGHfgBypX07GjdMFMc0Qo8Z4HmlAA= From: "juzhe.zhong at rivai dot ai" To: gcc-bugs@gcc.gnu.org Subject: [Bug tree-optimization/109088] GCC does not always vectorize conditional reduction Date: Fri, 10 Nov 2023 12:02:58 +0000 X-Bugzilla-Reason: CC X-Bugzilla-Type: changed X-Bugzilla-Watch-Reason: None X-Bugzilla-Product: gcc X-Bugzilla-Component: tree-optimization X-Bugzilla-Version: 13.0 X-Bugzilla-Keywords: missed-optimization X-Bugzilla-Severity: enhancement X-Bugzilla-Who: juzhe.zhong at rivai dot ai X-Bugzilla-Status: NEW X-Bugzilla-Resolution: X-Bugzilla-Priority: P3 X-Bugzilla-Assigned-To: unassigned at gcc dot gnu.org X-Bugzilla-Target-Milestone: --- X-Bugzilla-Flags: X-Bugzilla-Changed-Fields: Message-ID: In-Reply-To: References: Content-Type: text/plain; charset="UTF-8" Content-Transfer-Encoding: quoted-printable X-Bugzilla-URL: http://gcc.gnu.org/bugzilla/ Auto-Submitted: auto-generated MIME-Version: 1.0 List-Id: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=3D109088 --- Comment #15 from JuzheZhong --- Hi,Richard. Confirmed Robin's patch doesn't help with this issue. The root cause of this issue is failed to recognize it as possible vectorization of conditional reduction which means is_cond_scalar_reduction is FALSE. I have this following patch which bootstrap on X86 and regtest passed also passed on aarch64. This following patch can enhance if-conv conditional reduction recognition. diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc index a8c915913ae..2bdd3710a65 100644 --- a/gcc/tree-if-conv.cc +++ b/gcc/tree-if-conv.cc @@ -1784,14 +1784,119 @@ is_cond_scalar_reduction (gimple *phi, gimple **re= duc, tree arg_0, tree arg_1, r_nop2 =3D strip_nop_cond_scalar_reduction (*has_nop, r_op2); /* Make R_OP1 to hold reduction variable. */ + gimple *nonphi_use_stmt =3D NULL; if (r_nop2 =3D=3D PHI_RESULT (header_phi) && commutative_tree_code (reduction_op)) { std::swap (r_op1, r_op2); std::swap (r_nop1, r_nop2); } - else if (r_nop1 !=3D PHI_RESULT (header_phi)) - return false; + else if (r_nop1 =3D=3D PHI_RESULT (header_phi)) + ; + else + { + /* Analyze the statement chain of STMT so that we could teach genera= te + better if-converison code sequence. We are trying to catch this + following situation: + + loop-header: + reduc_1 =3D PHI <..., reduc_2> + ... + if (...) + tmp1 =3D reduc_1 + rhs1; + tmp2 =3D tmp1 + rhs2; + tmp3 =3D tmp2 + rhs3; + ... + reduc_3 =3D tmpN-1 + rhsN-1; + + reduc_2 =3D PHI + + and convert to + + reduc_2 =3D PHI <0, reduc_1> + tmp1 =3D rhs1; + tmp2 =3D tmp1 + rhs2; + tmp3 =3D tmp2 + rhs3; + ... + reduc_3 =3D tmpN-1 + rhsN-1; + ifcvt =3D cond_expr ? reduc_3 : 0; + reduc_1 =3D reduc_1 +/- ifcvt; */ + if (num_imm_uses (PHI_RESULT (header_phi)) !=3D 2) + return false; + if (!ANY_INTEGRAL_TYPE_P (TREE_TYPE (PHI_RESULT (phi))) + && !(FLOAT_TYPE_P (TREE_TYPE (PHI_RESULT (phi))) + && !HONOR_SIGNED_ZEROS (TREE_TYPE (PHI_RESULT (phi))) + && !HONOR_SIGN_DEPENDENT_ROUNDING (TREE_TYPE (PHI_RESULT (ph= i))) + && !HONOR_NANS (TREE_TYPE (PHI_RESULT (phi))))) + return false; + gimple *prev_use_stmt, *curr_use_stmt; + use_operand_p use; + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, PHI_RESULT (header_phi)) + { + prev_use_stmt =3D curr_use_stmt =3D USE_STMT (use_p); + if (is_gimple_assign (curr_use_stmt)) + { + if (TREE_CODE (gimple_assign_lhs (curr_use_stmt)) !=3D SSA_NA= ME) + return false; + if (*has_nop) + { + if (!CONVERT_EXPR_CODE_P ( + gimple_assign_rhs_code (curr_use_stmt))) + return false; + } + else + { + if (gimple_assign_rhs_code (curr_use_stmt) !=3D reduction= _op) + return false; + } + + bool visited_p =3D false; + nonphi_use_stmt =3D curr_use_stmt; + while (!visited_p) + { + if (!single_imm_use (gimple_assign_lhs (prev_use_stmt), &= use, + &curr_use_stmt) + || gimple_bb (curr_use_stmt) !=3D gimple_bb (stmt) + || !is_gimple_assign (curr_use_stmt) + || TREE_CODE (gimple_assign_lhs (curr_use_stmt)) + !=3D SSA_NAME + || gimple_assign_rhs_code (curr_use_stmt) !=3D reduction_op) + return false; + if (curr_use_stmt =3D=3D stmt) + { + if (*has_nop) + { + if (!single_imm_use (gimple_assign_lhs ( + nonphi_use_stmt), + &use, &curr_use_stmt)) + return false; + r_op1 =3D gimple_assign_lhs (nonphi_use_stmt); + r_nop1 =3D PHI_RESULT (header_phi); + nonphi_use_stmt =3D curr_use_stmt; + } + else + r_op1 =3D PHI_RESULT (header_phi); + + if (*has_nop) + { + if (!single_imm_use (gimple_assign_lhs (stmt), &u= se, + &curr_use_stmt)) + return false; + r_op2 =3D gimple_assign_lhs (stmt); + r_nop2 =3D gimple_assign_lhs (curr_use_stmt); + } + else + r_op2 =3D gimple_assign_lhs (stmt); + visited_p =3D true; + } + else + prev_use_stmt =3D curr_use_stmt; + } + } + else if (curr_use_stmt !=3D phi) + return false; + } + } if (*has_nop) { @@ -1816,12 +1921,41 @@ is_cond_scalar_reduction (gimple *phi, gimple **red= uc, tree arg_0, tree arg_1, continue; if (use_stmt =3D=3D stmt) continue; + if (use_stmt =3D=3D nonphi_use_stmt) + continue; if (gimple_code (use_stmt) !=3D GIMPLE_PHI) return false; } *op0 =3D r_op1; *op1 =3D r_op2; *reduc =3D stmt; + + if (nonphi_use_stmt) + { + /* Transform: + + if (...) + tmp1 =3D reduc_1 + rhs1; + tmp2 =3D tmp1 + rhs2; + tmp3 =3D tmp2 + rhs3; + + into: + + tmp1 =3D rhs1 + 0; ---> We replace reduc_1 into '0' + tmp2 =3D tmp1 + rhs2; + tmp3 =3D tmp2 + rhs3; + ... + reduc_3 =3D tmpN-1 + rhsN-1; + ifcvt =3D cond_expr ? reduc_3 : 0; */ + gcc_assert (gimple_assign_rhs_code (nonphi_use_stmt) =3D=3D reductio= n_op); + if (gimple_assign_rhs1 (nonphi_use_stmt) =3D=3D r_op1) + gimple_assign_set_rhs1 (nonphi_use_stmt, + build_zero_cst (TREE_TYPE (r_op1))); + else if (gimple_assign_rhs2 (nonphi_use_stmt) =3D=3D r_op1) + gimple_assign_set_rhs2 (nonphi_use_stmt, + build_zero_cst (TREE_TYPE (r_op1))); + update_stmt (nonphi_use_stmt); + } return true; } @@ -1886,12 +2020,17 @@ convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi, gsi_remove (&stmt_it, true); release_defs (nop_reduc); } + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); /* Delete original reduction stmt. */ - stmt_it =3D gsi_for_stmt (reduc); - gsi_remove (&stmt_it, true); - release_defs (reduc); + if (op1 !=3D gimple_assign_lhs (reduc)) + { + stmt_it =3D gsi_for_stmt (reduc); + gsi_remove (&stmt_it, true); + release_defs (reduc); + } + return rhs; } I have fully tested it with different kinds of condtional reduction. All can be vectorized. I am not sure whether it is a feasible solution.=