From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtpbguseast2.qq.com (smtpbguseast2.qq.com [54.204.34.130]) by sourceware.org (Postfix) with ESMTPS id 84590385C6E6 for ; Thu, 19 Oct 2023 08:34:06 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org 84590385C6E6 Authentication-Results: sourceware.org; dmarc=none (p=none dis=none) header.from=rivai.ai Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=rivai.ai ARC-Filter: OpenARC Filter v1.0.0 sourceware.org 84590385C6E6 Authentication-Results: server2.sourceware.org; arc=none smtp.remote-ip=54.204.34.130 ARC-Seal: i=1; a=rsa-sha256; d=sourceware.org; s=key; t=1697704452; cv=none; b=fMvymXfXNqS9GOIHvNF1qUQZP5zcG5I/qxeHTdxzz3JCbBIyIPfi3DQEe4qKLHPB1yZJ+USXWD5ea8w374xJaRiqGkAK9w2w7U7qpFjNA/3yb7K+YLOwJyvSVXdcY7eYGNA3YFI3SGz19FMiXcv/37cGema5/kAVOE6hd+qSeoM= ARC-Message-Signature: i=1; a=rsa-sha256; d=sourceware.org; s=key; t=1697704452; c=relaxed/simple; bh=XDiDPCvo65DMmfxG/y3wbX0oI0Mn7B/rpjCBhgtV8J8=; h=From:To:Subject:Date:Message-Id:MIME-Version; b=NXIVgBWqgyAh6Hge22vN6u5+oUDD/6Z+z2udj/9lh0Vk50RiDlnzeyl8RXDfzKBlIYMj/lKYUV+5mthnXqOA9lYu4A//khIrZslDW7hkxZRBUktY/g3zEPlRLGBcLtUVuHPvGXpoRWmyvlhMQD/g5jFF8HRffHuWtpQOsxVm3Bc= ARC-Authentication-Results: i=1; server2.sourceware.org X-QQ-mid: bizesmtp65t1697704442td6154by Received: from rios-cad121.hadoop.rioslab.org ( [58.60.1.9]) by bizesmtp.qq.com (ESMTP) with id ; Thu, 19 Oct 2023 16:34:01 +0800 (CST) X-QQ-SSF: 01400000000000C0F000000A0000000 X-QQ-FEAT: kSiwI1dGg+zW+EsUfSUk4TLpihTvkSDbKJyqmR3k0xKAKkd+UDlkD5G8Ea4+s nWBdL58f/CdYV4sOI6QP/un57dZB8HNY15wMrKsmUOnnuOAOFRzAL2attiYpoU/PPba5rXy CmEdGq9uh+M8sb23rJ2P/C2LUxUn+Uzs4eZ7n85ySIjFO8QIFKCy4rL33yW/WKWk9RYaVub E/sgR5PMXsvZzl1KiChNZSLnp4erJuxMDXoRcS8moB3nuIbHgb89yC6HtOmfpEvdt7v32f1 8y4oTYANCiXBNTmAnHOw8r2XXmvy5nCPkioEd28adNU7EAKF15qSFGqy5WZ3BgJEcqpHzAB ZSLzSFCSL+J/egpDs9M+K3G3RjSd/chmBGVGQbGa9NcFudwk6q0d/eD4TNoQOpD22Y+ZrcX FsQVYS0S7wM= X-QQ-GoodBg: 2 X-BIZMAIL-ID: 10710535258031187367 From: Lehua Ding To: gcc-patches@gcc.gnu.org Cc: juzhe.zhong@rivai.ai, kito.cheng@gmail.com, rdapp.gcc@gmail.com, palmer@rivosinc.com, jeffreyalaw@gmail.com, lehua.ding@rivai.ai Subject: [PATCH V3 08/11] RISC-V: P8: Refactor emit-vsetvl phase and delete post optimization Date: Thu, 19 Oct 2023 16:33:30 +0800 Message-Id: <20231019083333.2052340-9-lehua.ding@rivai.ai> X-Mailer: git-send-email 2.36.3 In-Reply-To: <20231019083333.2052340-1-lehua.ding@rivai.ai> References: <20231019083333.2052340-1-lehua.ding@rivai.ai> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-QQ-SENDSIZE: 520 Feedback-ID: bizesmtp:rivai.ai:qybglogicsvrgz:qybglogicsvrgz6a-0 X-Spam-Status: No, score=-11.6 required=5.0 tests=BAYES_00,GIT_PATCH_0,KAM_DMARC_STATUS,RCVD_IN_DNSWL_NONE,RCVD_IN_MSPIKE_H3,RCVD_IN_MSPIKE_WL,SPF_HELO_PASS,SPF_PASS,TXREP autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org List-Id: gcc/ChangeLog: * config/riscv/riscv-vsetvl.cc (pre_vsetvl::emit_vsetvl): New. (pre_vsetvl::cleaup): New. (pre_vsetvl::remove_avl_operand): New. (pre_vsetvl::remove_unused_dest_operand): New. (pass_vsetvl::get_vsetvl_at_end): Removed. (local_avl_compatible_p): Removed. (pass_vsetvl::local_eliminate_vsetvl_insn): Removed. (get_first_vsetvl_before_rvv_insns): Removed. (pass_vsetvl::global_eliminate_vsetvl_insn): Removed. (pass_vsetvl::ssa_post_optimization): Removed. (has_no_uses): Removed. (pass_vsetvl::df_post_optimization): Removed. (pass_vsetvl::init): Removed. (pass_vsetvl::done): Removed. (pass_vsetvl::compute_probabilities): Removed. (pass_vsetvl::lazy_vsetvl): Removed. (pass_vsetvl::execute): Removed. (make_pass_vsetvl): Removed. --- gcc/config/riscv/riscv-vsetvl.cc | 878 +++++++------------------------ 1 file changed, 203 insertions(+), 675 deletions(-) diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc index 855edd6d0f5..06d02d25cb3 100644 --- a/gcc/config/riscv/riscv-vsetvl.cc +++ b/gcc/config/riscv/riscv-vsetvl.cc @@ -3601,6 +3601,209 @@ pre_vsetvl::pre_global_vsetvl_info () } } +void +pre_vsetvl::emit_vsetvl () +{ + bool need_commit = false; + + for (const bb_info *bb : crtl->ssa->bbs ()) + { + for (const auto &curr_info : get_block_info (bb).infos) + { + insn_info *insn = curr_info.get_insn (); + if (curr_info.delete_p ()) + { + if (vsetvl_insn_p (insn->rtl ())) + remove_vsetvl_insn (curr_info); + continue; + } + else if (curr_info.valid_p ()) + { + if (vsetvl_insn_p (insn->rtl ())) + { + const vsetvl_info temp = vsetvl_info (insn); + if (!(curr_info == temp)) + { + if (dump_file) + { + fprintf (dump_file, "\n Change vsetvl info from: "); + temp.dump (dump_file, " "); + fprintf (dump_file, " to: "); + curr_info.dump (dump_file, " "); + } + change_vsetvl_insn (curr_info); + } + } + else + { + if (dump_file) + { + fprintf (dump_file, + "\n Insert vsetvl info before insn %d: ", + insn->uid ()); + curr_info.dump (dump_file, " "); + } + insert_vsetvl_insn (EMIT_BEFORE, curr_info); + } + } + } + } + + for (const vsetvl_info &item : m_delete_list) + { + gcc_assert (vsetvl_insn_p (item.get_insn ()->rtl ())); + remove_vsetvl_insn (item); + } + + /* m_insert vsetvl as LCM suggest. */ + for (int ed = 0; ed < NUM_EDGES (m_edges); ed++) + { + edge eg = INDEX_EDGE (m_edges, ed); + sbitmap i = m_insert[ed]; + if (bitmap_count_bits (i) < 1) + continue; + + if (bitmap_count_bits (i) > 1) + /* For code with infinite loop (e.g. pr61634.c), The data flow is + completely wrong. */ + continue; + + gcc_assert (bitmap_count_bits (i) == 1); + unsigned expr_index = bitmap_first_set_bit (i); + const vsetvl_info &info = *m_exprs[expr_index]; + gcc_assert (info.valid_p ()); + if (dump_file) + { + fprintf (dump_file, + "\n Insert vsetvl info at edge(bb %u -> bb %u): ", + eg->src->index, eg->dest->index); + info.dump (dump_file, " "); + } + rtl_profile_for_edge (eg); + start_sequence (); + + insert_vsetvl_insn (EMIT_DIRECT, info); + rtx_insn *rinsn = get_insns (); + end_sequence (); + default_rtl_profile (); + + /* We should not get an abnormal edge here. */ + gcc_assert (!(eg->flags & EDGE_ABNORMAL)); + need_commit = true; + insert_insn_on_edge (rinsn, eg); + } + + /* Insert vsetvl info that was not deleted after lift up. */ + for (const bb_info *bb : crtl->ssa->bbs ()) + { + const vsetvl_block_info &block_info = get_block_info (bb); + if (!block_info.has_info ()) + continue; + + const vsetvl_info &footer_info = block_info.get_exit_info (); + + if (footer_info.delete_p ()) + continue; + + edge eg; + edge_iterator eg_iterator; + FOR_EACH_EDGE (eg, eg_iterator, bb->cfg_bb ()->succs) + { + gcc_assert (!(eg->flags & EDGE_ABNORMAL)); + if (dump_file) + { + fprintf ( + dump_file, + "\n Insert missed vsetvl info at edge(bb %u -> bb %u): ", + eg->src->index, eg->dest->index); + footer_info.dump (dump_file, " "); + } + start_sequence (); + insert_vsetvl_insn (EMIT_DIRECT, footer_info); + rtx_insn *rinsn = get_insns (); + end_sequence (); + default_rtl_profile (); + insert_insn_on_edge (rinsn, eg); + need_commit = true; + } + } + + if (need_commit) + commit_edge_insertions (); +} + +void +pre_vsetvl::cleaup () +{ + remove_avl_operand (); + remove_unused_dest_operand (); +} + +void +pre_vsetvl::remove_avl_operand () +{ + basic_block cfg_bb; + rtx_insn *rinsn; + FOR_ALL_BB_FN (cfg_bb, cfun) + FOR_BB_INSNS (cfg_bb, rinsn) + if (NONDEBUG_INSN_P (rinsn) && has_vl_op (rinsn) + && REG_P (get_vl (rinsn))) + { + rtx avl = get_vl (rinsn); + if (count_regno_occurrences (rinsn, REGNO (avl)) == 1) + { + rtx new_pat; + if (fault_first_load_p (rinsn)) + new_pat + = simplify_replace_rtx (PATTERN (rinsn), avl, const0_rtx); + else + { + rtx set = single_set (rinsn); + rtx src + = simplify_replace_rtx (SET_SRC (set), avl, const0_rtx); + new_pat = gen_rtx_SET (SET_DEST (set), src); + } + if (dump_file) + { + fprintf (dump_file, " Cleanup insn %u's avl operand:\n", + INSN_UID (rinsn)); + print_rtl_single (dump_file, rinsn); + } + validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat, false); + } + } +} + +void +pre_vsetvl::remove_unused_dest_operand () +{ + df_analyze (); + basic_block cfg_bb; + rtx_insn *rinsn; + FOR_ALL_BB_FN (cfg_bb, cfun) + FOR_BB_INSNS (cfg_bb, rinsn) + if (NONDEBUG_INSN_P (rinsn) && vsetvl_insn_p (rinsn)) + { + rtx vl = get_vl (rinsn); + vsetvl_info info = vsetvl_info (rinsn); + if (has_no_uses (cfg_bb, rinsn, REGNO (vl))) + if (!info.has_vlmax_avl ()) + { + rtx new_pat = info.get_vsetvl_pat (true); + if (dump_file) + { + fprintf (dump_file, + " Remove vsetvl insn %u's dest(vl) operand since " + "it unused:\n", + INSN_UID (rinsn)); + print_rtl_single (dump_file, rinsn); + } + validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat, + false); + } + } +} + const pass_data pass_data_vsetvl = { RTL_PASS, /* type */ "vsetvl", /* name */ @@ -3738,678 +3941,3 @@ make_pass_vsetvl (gcc::context *ctxt) { return new pass_vsetvl (ctxt); } - -/* Some instruction can not be accessed in RTL_SSA when we don't re-init - the new RTL_SSA framework but it is definetely at the END of the block. - - Here we optimize the VSETVL is hoisted by LCM: - - Before LCM: - bb 1: - vsetvli a5,a2,e32,m1,ta,mu - bb 2: - vsetvli zero,a5,e32,m1,ta,mu - ... - - After LCM: - bb 1: - vsetvli a5,a2,e32,m1,ta,mu - LCM INSERTED: vsetvli zero,a5,e32,m1,ta,mu --> eliminate - bb 2: - ... - */ -rtx_insn * -pass_vsetvl::get_vsetvl_at_end (const bb_info *bb, vector_insn_info *dem) const -{ - rtx_insn *end_vsetvl = BB_END (bb->cfg_bb ()); - if (end_vsetvl && NONDEBUG_INSN_P (end_vsetvl)) - { - if (JUMP_P (end_vsetvl)) - end_vsetvl = PREV_INSN (end_vsetvl); - - if (NONDEBUG_INSN_P (end_vsetvl) - && vsetvl_discard_result_insn_p (end_vsetvl)) - { - /* Only handle single succ. here, multiple succ. is much - more complicated. */ - if (single_succ_p (bb->cfg_bb ())) - { - edge e = single_succ_edge (bb->cfg_bb ()); - *dem = get_block_info (e->dest).local_dem; - return end_vsetvl; - } - } - } - return nullptr; -} - -/* This predicator should only used within same basic block. */ -static bool -local_avl_compatible_p (rtx avl1, rtx avl2) -{ - if (!REG_P (avl1) || !REG_P (avl2)) - return false; - - return REGNO (avl1) == REGNO (avl2); -} - -/* Local user vsetvl optimizaiton: - - Case 1: - vsetvl a5,a4,e8,mf8 - ... - vsetvl zero,a5,e8,mf8 --> Eliminate directly. - - Case 2: - vsetvl a5,a4,e8,mf8 --> vsetvl a5,a4,e32,mf2 - ... - vsetvl zero,a5,e32,mf2 --> Eliminate directly. */ -void -pass_vsetvl::local_eliminate_vsetvl_insn (const bb_info *bb) const -{ - rtx_insn *prev_vsetvl = nullptr; - rtx_insn *curr_vsetvl = nullptr; - rtx vl_placeholder = RVV_VLMAX; - rtx prev_avl = vl_placeholder; - rtx curr_avl = vl_placeholder; - vector_insn_info prev_dem; - - /* Instruction inserted by LCM is not appeared in RTL-SSA yet, try to - found those instruciton. */ - if (rtx_insn *end_vsetvl = get_vsetvl_at_end (bb, &prev_dem)) - { - prev_avl = get_avl (end_vsetvl); - prev_vsetvl = end_vsetvl; - } - - bool skip_one = false; - /* Backward propgate vsetvl info, drop the later one (prev_vsetvl) if it's - compatible with current vsetvl (curr_avl), and merge the vtype and avl - info. into current vsetvl. */ - for (insn_info *insn : bb->reverse_real_nondebug_insns ()) - { - rtx_insn *rinsn = insn->rtl (); - const auto &curr_dem = get_vector_info (insn); - bool need_invalidate = false; - - /* Skip if this insn already handled in last iteration. */ - if (skip_one) - { - skip_one = false; - continue; - } - - if (vsetvl_insn_p (rinsn)) - { - curr_vsetvl = rinsn; - /* vsetvl are using vl rather than avl since it will try to merge - with other vsetvl_discard_result. - - v--- avl - vsetvl a5,a4,e8,mf8 # vsetvl - ... ^--- vl - vsetvl zero,a5,e8,mf8 # vsetvl_discard_result - ^--- avl - */ - curr_avl = get_vl (rinsn); - /* vsetvl is a cut point of local backward vsetvl elimination. */ - need_invalidate = true; - } - else if (has_vtype_op (rinsn) && NONDEBUG_INSN_P (PREV_INSN (rinsn)) - && (vsetvl_discard_result_insn_p (PREV_INSN (rinsn)) - || vsetvl_insn_p (PREV_INSN (rinsn)))) - { - curr_vsetvl = PREV_INSN (rinsn); - - if (vsetvl_insn_p (PREV_INSN (rinsn))) - { - /* Need invalidate and skip if it's vsetvl. */ - need_invalidate = true; - /* vsetvl_discard_result_insn_p won't appeared in RTL-SSA, - * so only need to skip for vsetvl. */ - skip_one = true; - } - - curr_avl = curr_dem.get_avl (); - - /* Some instrucion like pred_extract_first don't reqruie avl, so - the avl is null, use vl_placeholder for unify the handling - logic. */ - if (!curr_avl) - curr_avl = vl_placeholder; - } - else if (insn->is_call () || insn->is_asm () - || find_access (insn->defs (), VL_REGNUM) - || find_access (insn->defs (), VTYPE_REGNUM) - || (REG_P (prev_avl) - && find_access (insn->defs (), REGNO (prev_avl)))) - { - /* Invalidate if this insn can't propagate vl, vtype or avl. */ - need_invalidate = true; - prev_dem = vector_insn_info (); - } - else - /* Not interested instruction. */ - continue; - - /* Local AVL compatibility checking is simpler than global, we only - need to check the REGNO is same. */ - if (prev_dem.valid_or_dirty_p () - && prev_dem.skip_avl_compatible_p (curr_dem) - && local_avl_compatible_p (prev_avl, curr_avl)) - { - /* curr_dem and prev_dem is compatible! */ - /* Update avl info since we need to make sure they are fully - compatible before merge. */ - prev_dem.set_avl_info (curr_dem.get_avl_info ()); - /* Merge both and update into curr_vsetvl. */ - prev_dem = curr_dem.local_merge (prev_dem); - change_vsetvl_insn (curr_dem.get_insn (), prev_dem); - /* Then we can drop prev_vsetvl. */ - eliminate_insn (prev_vsetvl); - } - - if (need_invalidate) - { - prev_vsetvl = nullptr; - curr_vsetvl = nullptr; - prev_avl = vl_placeholder; - curr_avl = vl_placeholder; - prev_dem = vector_insn_info (); - } - else - { - prev_vsetvl = curr_vsetvl; - prev_avl = curr_avl; - prev_dem = curr_dem; - } - } -} - -/* Return the first vsetvl instruction in CFG_BB or NULL if - none exists or if a user RVV instruction is enountered - prior to any vsetvl. */ -static rtx_insn * -get_first_vsetvl_before_rvv_insns (basic_block cfg_bb, - enum vsetvl_type insn_type) -{ - gcc_assert (insn_type == VSETVL_DISCARD_RESULT - || insn_type == VSETVL_VTYPE_CHANGE_ONLY); - rtx_insn *rinsn; - FOR_BB_INSNS (cfg_bb, rinsn) - { - if (!NONDEBUG_INSN_P (rinsn)) - continue; - /* If we don't find any inserted vsetvli before user RVV instructions, - we don't need to optimize the vsetvls in this block. */ - if (has_vtype_op (rinsn) || vsetvl_insn_p (rinsn)) - return nullptr; - - if (insn_type == VSETVL_DISCARD_RESULT - && vsetvl_discard_result_insn_p (rinsn)) - return rinsn; - if (insn_type == VSETVL_VTYPE_CHANGE_ONLY - && vsetvl_vtype_change_only_p (rinsn)) - return rinsn; - } - return nullptr; -} - -/* Global user vsetvl optimizaiton: - - Case 1: - bb 1: - vsetvl a5,a4,e8,mf8 - ... - bb 2: - ... - vsetvl zero,a5,e8,mf8 --> Eliminate directly. - - Case 2: - bb 1: - vsetvl a5,a4,e8,mf8 --> vsetvl a5,a4,e32,mf2 - ... - bb 2: - ... - vsetvl zero,a5,e32,mf2 --> Eliminate directly. - - Case 3: - bb 1: - vsetvl a5,a4,e8,mf8 --> vsetvl a5,a4,e32,mf2 - ... - bb 2: - ... - vsetvl a5,a4,e8,mf8 --> vsetvl a5,a4,e32,mf2 - goto bb 3 - bb 3: - ... - vsetvl zero,a5,e32,mf2 --> Eliminate directly. -*/ -bool -pass_vsetvl::global_eliminate_vsetvl_insn (const bb_info *bb) const -{ - rtx_insn *vsetvl_rinsn = NULL; - vector_insn_info dem = vector_insn_info (); - const auto &block_info = get_block_info (bb); - basic_block cfg_bb = bb->cfg_bb (); - - if (block_info.local_dem.valid_or_dirty_p ()) - { - /* Optimize the local vsetvl. */ - dem = block_info.local_dem; - vsetvl_rinsn - = get_first_vsetvl_before_rvv_insns (cfg_bb, VSETVL_DISCARD_RESULT); - } - if (!vsetvl_rinsn) - /* Optimize the global vsetvl inserted by LCM. */ - vsetvl_rinsn = get_vsetvl_at_end (bb, &dem); - - /* No need to optimize if block doesn't have vsetvl instructions. */ - if (!dem.valid_or_dirty_p () || !vsetvl_rinsn || !dem.get_avl_source () - || !dem.has_avl_reg ()) - return false; - - /* Condition 1: Check it has preds. */ - if (EDGE_COUNT (cfg_bb->preds) == 0) - return false; - - /* If all preds has VL/VTYPE status setted by user vsetvls, and these - user vsetvls are all skip_avl_compatible_p with the vsetvl in this - block, we can eliminate this vsetvl instruction. */ - sbitmap avin = m_vector_manager->vector_avin[cfg_bb->index]; - - unsigned int bb_index; - sbitmap_iterator sbi; - rtx avl = dem.get_avl (); - hash_set sets - = get_all_sets (dem.get_avl_source (), true, false, false); - /* Condition 2: All VL/VTYPE available in are all compatible. */ - EXECUTE_IF_SET_IN_BITMAP (avin, 0, bb_index, sbi) - { - const auto &expr = m_vector_manager->vector_exprs[bb_index]; - const auto &insn = expr->get_insn (); - def_info *def = find_access (insn->defs (), REGNO (avl)); - set_info *set = safe_dyn_cast (def); - if (!vsetvl_insn_p (insn->rtl ()) || insn->bb () == bb - || !sets.contains (set)) - return false; - } - - /* Condition 3: We don't do the global optimization for the block - has a pred is entry block or exit block. */ - /* Condition 4: All preds have available VL/VTYPE out. */ - edge e; - edge_iterator ei; - FOR_EACH_EDGE (e, ei, cfg_bb->preds) - { - sbitmap avout = m_vector_manager->vector_avout[e->src->index]; - if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun) - || e->src == EXIT_BLOCK_PTR_FOR_FN (cfun) - || (unsigned int) e->src->index - >= m_vector_manager->vector_block_infos.length () - || bitmap_empty_p (avout)) - return false; - - EXECUTE_IF_SET_IN_BITMAP (avout, 0, bb_index, sbi) - { - const auto &expr = m_vector_manager->vector_exprs[bb_index]; - const auto &insn = expr->get_insn (); - def_info *def = find_access (insn->defs (), REGNO (avl)); - set_info *set = safe_dyn_cast (def); - if (!vsetvl_insn_p (insn->rtl ()) || insn->bb () == bb - || !sets.contains (set) || !expr->skip_avl_compatible_p (dem)) - return false; - } - } - - /* Step1: Reshape the VL/VTYPE status to make sure everything compatible. */ - auto_vec pred_cfg_bbs - = get_dominated_by (CDI_POST_DOMINATORS, cfg_bb); - FOR_EACH_EDGE (e, ei, cfg_bb->preds) - { - sbitmap avout = m_vector_manager->vector_avout[e->src->index]; - EXECUTE_IF_SET_IN_BITMAP (avout, 0, bb_index, sbi) - { - vector_insn_info prev_dem = *m_vector_manager->vector_exprs[bb_index]; - vector_insn_info curr_dem = dem; - insn_info *insn = prev_dem.get_insn (); - if (!pred_cfg_bbs.contains (insn->bb ()->cfg_bb ())) - continue; - /* Update avl info since we need to make sure they are fully - compatible before merge. */ - curr_dem.set_avl_info (prev_dem.get_avl_info ()); - /* Merge both and update into curr_vsetvl. */ - prev_dem = curr_dem.local_merge (prev_dem); - change_vsetvl_insn (insn, prev_dem); - } - } - - /* Step2: eliminate the vsetvl instruction. */ - eliminate_insn (vsetvl_rinsn); - return true; -} - -/* This function does the following post optimization base on RTL_SSA: - - 1. Local user vsetvl optimizations. - 2. Global user vsetvl optimizations. - 3. AVL dependencies removal: - Before VSETVL PASS, RVV instructions pattern is depending on AVL operand - implicitly. Since we will emit VSETVL instruction and make RVV - instructions depending on VL/VTYPE global status registers, we remove the - such AVL operand in the RVV instructions pattern here in order to remove - AVL dependencies when AVL operand is a register operand. - - Before the VSETVL PASS: - li a5,32 - ... - vadd.vv (..., a5) - After the VSETVL PASS: - li a5,32 - vsetvli zero, a5, ... - ... - vadd.vv (..., const_int 0). */ -void -pass_vsetvl::ssa_post_optimization (void) const -{ - for (const bb_info *bb : crtl->ssa->bbs ()) - { - local_eliminate_vsetvl_insn (bb); - bool changed_p = true; - while (changed_p) - { - changed_p = false; - changed_p |= global_eliminate_vsetvl_insn (bb); - } - for (insn_info *insn : bb->real_nondebug_insns ()) - { - rtx_insn *rinsn = insn->rtl (); - if (vlmax_avl_insn_p (rinsn)) - { - eliminate_insn (rinsn); - continue; - } - - /* Erase the AVL operand from the instruction. */ - if (!has_vl_op (rinsn) || !REG_P (get_vl (rinsn))) - continue; - rtx avl = get_vl (rinsn); - if (count_regno_occurrences (rinsn, REGNO (avl)) == 1) - { - /* Get the list of uses for the new instruction. */ - auto attempt = crtl->ssa->new_change_attempt (); - insn_change change (insn); - /* Remove the use of the substituted value. */ - access_array_builder uses_builder (attempt); - uses_builder.reserve (insn->num_uses () - 1); - for (use_info *use : insn->uses ()) - if (use != find_access (insn->uses (), REGNO (avl))) - uses_builder.quick_push (use); - use_array new_uses = use_array (uses_builder.finish ()); - change.new_uses = new_uses; - change.move_range = insn->ebb ()->insn_range (); - rtx pat; - if (fault_first_load_p (rinsn)) - pat = simplify_replace_rtx (PATTERN (rinsn), avl, const0_rtx); - else - { - rtx set = single_set (rinsn); - rtx src - = simplify_replace_rtx (SET_SRC (set), avl, const0_rtx); - pat = gen_rtx_SET (SET_DEST (set), src); - } - bool ok = change_insn (crtl->ssa, change, insn, pat); - gcc_assert (ok); - } - } - } -} - -/* Return true if the SET result is not used by any instructions. */ -static bool -has_no_uses (basic_block cfg_bb, rtx_insn *rinsn, int regno) -{ - /* Handle the following case that can not be detected in RTL_SSA. */ - /* E.g. - li a5, 100 - vsetvli a6, a5... - ... - vadd (use a6) - - The use of "a6" is removed from "vadd" but the information is - not updated in RTL_SSA framework. We don't want to re-new - a new RTL_SSA which is expensive, instead, we use data-flow - analysis to check whether "a6" has no uses. */ - if (bitmap_bit_p (df_get_live_out (cfg_bb), regno)) - return false; - - rtx_insn *iter; - for (iter = NEXT_INSN (rinsn); iter && iter != NEXT_INSN (BB_END (cfg_bb)); - iter = NEXT_INSN (iter)) - if (df_find_use (iter, regno_reg_rtx[regno])) - return false; - - return true; -} - -/* This function does the following post optimization base on dataflow - analysis: - - 1. Change vsetvl rd, rs1 --> vsevl zero, rs1, if rd is not used by any - nondebug instructions. Even though this PASS runs after RA and it doesn't - help for reduce register pressure, it can help instructions scheduling since - we remove the dependencies. - - 2. Remove redundant user vsetvls base on outcome of Phase 4 (LCM) && Phase 5 - (AVL dependencies removal). */ -void -pass_vsetvl::df_post_optimization (void) const -{ - df_analyze (); - hash_set to_delete; - basic_block cfg_bb; - rtx_insn *rinsn; - FOR_ALL_BB_FN (cfg_bb, cfun) - { - FOR_BB_INSNS (cfg_bb, rinsn) - { - if (NONDEBUG_INSN_P (rinsn) && vsetvl_insn_p (rinsn)) - { - rtx vl = get_vl (rinsn); - vector_insn_info info; - info.parse_insn (rinsn); - bool to_delete_p = m_vector_manager->to_delete_p (rinsn); - bool to_refine_p = m_vector_manager->to_refine_p (rinsn); - if (has_no_uses (cfg_bb, rinsn, REGNO (vl))) - { - if (to_delete_p) - to_delete.add (rinsn); - else if (to_refine_p) - { - rtx new_pat = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, - info, NULL_RTX); - validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat, - false); - } - else if (!vlmax_avl_p (info.get_avl ())) - { - rtx new_pat = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, info, - NULL_RTX); - validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat, - false); - } - } - } - } - } - for (rtx_insn *rinsn : to_delete) - eliminate_insn (rinsn); -} - -void -pass_vsetvl::init (void) -{ - if (optimize > 0) - { - /* Initialization of RTL_SSA. */ - calculate_dominance_info (CDI_DOMINATORS); - calculate_dominance_info (CDI_POST_DOMINATORS); - df_analyze (); - crtl->ssa = new function_info (cfun); - } - - m_vector_manager = new vector_infos_manager (); - compute_probabilities (); - - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "\nPrologue: Initialize vector infos\n"); - m_vector_manager->dump (dump_file); - } -} - -void -pass_vsetvl::done (void) -{ - if (optimize > 0) - { - /* Finalization of RTL_SSA. */ - free_dominance_info (CDI_DOMINATORS); - free_dominance_info (CDI_POST_DOMINATORS); - if (crtl->ssa->perform_pending_updates ()) - cleanup_cfg (0); - delete crtl->ssa; - crtl->ssa = nullptr; - } - m_vector_manager->release (); - delete m_vector_manager; - m_vector_manager = nullptr; -} - -/* Compute probability for each block. */ -void -pass_vsetvl::compute_probabilities (void) -{ - /* Don't compute it in -O0 since we don't need it. */ - if (!optimize) - return; - edge e; - edge_iterator ei; - - for (const bb_info *bb : crtl->ssa->bbs ()) - { - basic_block cfg_bb = bb->cfg_bb (); - auto &curr_prob = get_block_info (cfg_bb).probability; - - /* GCC assume entry block (bb 0) are always so - executed so set its probability as "always". */ - if (ENTRY_BLOCK_PTR_FOR_FN (cfun) == cfg_bb) - curr_prob = profile_probability::always (); - /* Exit block (bb 1) is the block we don't need to process. */ - if (EXIT_BLOCK_PTR_FOR_FN (cfun) == cfg_bb) - continue; - - gcc_assert (curr_prob.initialized_p ()); - FOR_EACH_EDGE (e, ei, cfg_bb->succs) - { - auto &new_prob = get_block_info (e->dest).probability; - /* Normally, the edge probability should be initialized. - However, some special testing code which is written in - GIMPLE IR style force the edge probility uninitialized, - we conservatively set it as never so that it will not - affect PRE (Phase 3 && Phse 4). */ - if (!e->probability.initialized_p ()) - new_prob = profile_probability::never (); - else if (!new_prob.initialized_p ()) - new_prob = curr_prob * e->probability; - else if (new_prob == profile_probability::always ()) - continue; - else - new_prob += curr_prob * e->probability; - } - } -} - -/* Lazy vsetvl insertion for optimize > 0. */ -void -pass_vsetvl::lazy_vsetvl (void) -{ - if (dump_file) - fprintf (dump_file, - "\nEntering Lazy VSETVL PASS and Handling %d basic blocks for " - "function:%s\n", - n_basic_blocks_for_fn (cfun), function_name (cfun)); - - /* Phase 1 - Compute the local dems within each block. - The data-flow analysis within each block is backward analysis. */ - if (dump_file) - fprintf (dump_file, "\nPhase 1: Compute local backward vector infos\n"); - for (const bb_info *bb : crtl->ssa->bbs ()) - compute_local_backward_infos (bb); - if (dump_file && (dump_flags & TDF_DETAILS)) - m_vector_manager->dump (dump_file); - - /* Phase 2 - Emit vsetvl instructions within each basic block according to - demand, compute and save ANTLOC && AVLOC of each block. */ - if (dump_file) - fprintf (dump_file, - "\nPhase 2: Emit vsetvl instruction within each block\n"); - for (const bb_info *bb : crtl->ssa->bbs ()) - emit_local_forward_vsetvls (bb); - if (dump_file && (dump_flags & TDF_DETAILS)) - m_vector_manager->dump (dump_file); - - /* Phase 3 - Propagate demanded info across blocks. */ - if (dump_file) - fprintf (dump_file, "\nPhase 3: Demands propagation across blocks\n"); - vsetvl_fusion (); - - /* Phase 4 - Lazy code motion. */ - if (dump_file) - fprintf (dump_file, "\nPhase 4: PRE vsetvl by Lazy code motion (LCM)\n"); - pre_vsetvl (); - - /* Phase 5 - Post optimization base on RTL_SSA. */ - if (dump_file) - fprintf (dump_file, "\nPhase 5: Post optimization base on RTL_SSA\n"); - ssa_post_optimization (); - - /* Phase 6 - Post optimization base on data-flow analysis. */ - if (dump_file) - fprintf (dump_file, - "\nPhase 6: Post optimization base on data-flow analysis\n"); - df_post_optimization (); -} - -/* Main entry point for this pass. */ -unsigned int -pass_vsetvl::execute (function *) -{ - if (n_basic_blocks_for_fn (cfun) <= 0) - return 0; - - /* The RVV instruction may change after split which is not a stable - instruction. We need to split it here to avoid potential issue - since the VSETVL PASS is insert before split PASS. */ - split_all_insns (); - - /* Early return for there is no vector instructions. */ - if (!has_vector_insn (cfun)) - return 0; - - init (); - - if (!optimize) - simple_vsetvl (); - else - lazy_vsetvl (); - - done (); - return 0; -} - -rtl_opt_pass * -make_pass_vsetvl (gcc::context *ctxt) -{ - return new pass_vsetvl (ctxt); -} -- 2.36.3