From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtpbgbr2.qq.com (smtpbgbr2.qq.com [54.207.22.56]) by sourceware.org (Postfix) with ESMTPS id 0889A3857702 for ; Thu, 19 Oct 2023 08:34:05 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org 0889A3857702 Authentication-Results: sourceware.org; dmarc=none (p=none dis=none) header.from=rivai.ai Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=rivai.ai ARC-Filter: OpenARC Filter v1.0.0 sourceware.org 0889A3857702 Authentication-Results: server2.sourceware.org; arc=none smtp.remote-ip=54.207.22.56 ARC-Seal: i=1; a=rsa-sha256; d=sourceware.org; s=key; t=1697704452; cv=none; b=ATEfWTU/+Q98UfqMx4hJDDILDAjiKnQTccjo+ySub2TrmZXdtkHg/78f0Th1rQ/ZwLrPc8BsHBd3HZKjZV6+sVkeV5glFP32VYuRkaeJlparfzhZuUrRgDOm7HS8l9EaJqxZTVFqWcqCi08xjaBMYYHI9m7RTSnoXvsBxpO8424= ARC-Message-Signature: i=1; a=rsa-sha256; d=sourceware.org; s=key; t=1697704452; c=relaxed/simple; bh=yY0G5RwqmI+xWwywrRkgI5+XWY12ybOnjjzwQDwJvng=; h=From:To:Subject:Date:Message-Id:MIME-Version; b=kOKnqH8arj+RUcSHdiWgTDw/SgR224uWVdKx/DetwHc+MVN/IVmKbMKGVl70UJ4S9oXQn+Q/txEtb5MWR4cOC/bJzDkPemE6DELx/gW5+fb4rhJtFXFTB0/8iVFAswiAwuLVx1xT1O+lQQ4t5GgexDEBW3zn7pVqqqF+ppZjfEk= ARC-Authentication-Results: i=1; server2.sourceware.org X-QQ-mid: bizesmtp65t1697704438tczjd1pg Received: from rios-cad121.hadoop.rioslab.org ( [58.60.1.9]) by bizesmtp.qq.com (ESMTP) with id ; Thu, 19 Oct 2023 16:33:57 +0800 (CST) X-QQ-SSF: 01400000000000C0F000000A0000000 X-QQ-FEAT: Mxc3K7F63kylehL/Z2MOMFnv5JMn4CFss/a5ZQSaeTTwRQVC0Eb7PoSSS34tz lt5i/7TOh+BBFaXv1DiJXXpFL1nxytf7um/UdbkW2zc0N15xLFjuaFSqO8WbsQF63Fd9Aw5 AdDx7ZI0YVGw6ebABzsvdW4rem8l4k3tkskqIVJEL5fmwR402Ptpjo7rka8lat89Bn4eK+e VrRxkj0CvnTGzn6FJyxQESLPWCn/VTQvrhM/iyrccLo7oR8q5XDRo2hVQXc3hA/EELggMHZ +4neDzWTklYRk5LNYf6GS/1aK4eErBoaxPW/XmjI3I4s9feIBvXmyHG8XRI+MrcJWrcN1F1 4qEb5e8aml7CZYkJ7SVT3w1R5KQf37sbh+6YQ2IE+8ZaTPsBLpU4J++0qW3jYhonbau8tyx Yya1RcUvOVI= X-QQ-GoodBg: 2 X-BIZMAIL-ID: 9760710122705807349 From: Lehua Ding To: gcc-patches@gcc.gnu.org Cc: juzhe.zhong@rivai.ai, kito.cheng@gmail.com, rdapp.gcc@gmail.com, palmer@rivosinc.com, jeffreyalaw@gmail.com, lehua.ding@rivai.ai Subject: [PATCH V3 07/11] RISC-V: P7: Move earliest fuse and lcm code to pre_vsetvl class Date: Thu, 19 Oct 2023 16:33:29 +0800 Message-Id: <20231019083333.2052340-8-lehua.ding@rivai.ai> X-Mailer: git-send-email 2.36.3 In-Reply-To: <20231019083333.2052340-1-lehua.ding@rivai.ai> References: <20231019083333.2052340-1-lehua.ding@rivai.ai> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-QQ-SENDSIZE: 520 Feedback-ID: bizesmtp:rivai.ai:qybglogicsvrgz:qybglogicsvrgz6a-0 X-Spam-Status: No, score=-11.6 required=5.0 tests=BAYES_00,GIT_PATCH_0,KAM_DMARC_STATUS,RCVD_IN_DNSWL_NONE,RCVD_IN_MSPIKE_H2,SPF_HELO_PASS,SPF_PASS,TXREP autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org List-Id: gcc/ChangeLog: * config/riscv/riscv-vsetvl.cc (pre_vsetvl::earliest_fuse_vsetvl_info): New. (pre_vsetvl::pre_global_vsetvl_info): New. (pass_vsetvl::prune_expressions): Removed. (pass_vsetvl::compute_local_properties): Removed. (pass_vsetvl::earliest_fusion): Removed. (pass_vsetvl::vsetvl_fusion): Removed. (pass_vsetvl::can_refine_vsetvl_p): Removed. (pass_vsetvl::refine_vsetvls): Removed. (pass_vsetvl::cleanup_vsetvls): Removed. (pass_vsetvl::commit_vsetvls): Removed. (pass_vsetvl::pre_vsetvl): Removed. --- gcc/config/riscv/riscv-vsetvl.cc | 1004 +++++++++++------------------- 1 file changed, 361 insertions(+), 643 deletions(-) diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc index 27d47d7c039..855edd6d0f5 100644 --- a/gcc/config/riscv/riscv-vsetvl.cc +++ b/gcc/config/riscv/riscv-vsetvl.cc @@ -2721,7 +2721,6 @@ public: } }; - void pre_vsetvl::compute_avl_def_data () { @@ -3241,6 +3240,367 @@ pre_vsetvl::fuse_local_vsetvl_info () } +bool +pre_vsetvl::earliest_fuse_vsetvl_info () +{ + compute_avl_def_data (); + compute_vsetvl_def_data (); + compute_lcm_local_properties (); + + unsigned num_exprs = m_exprs.length (); + struct edge_list *m_edges = create_edge_list (); + unsigned num_edges = NUM_EDGES (m_edges); + sbitmap *antin + = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs); + sbitmap *antout + = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs); + + sbitmap *earliest = sbitmap_vector_alloc (num_edges, num_exprs); + + compute_available (m_avloc, m_kill, m_avout, m_avin); + compute_antinout_edge (m_antloc, m_transp, antin, antout); + compute_earliest (m_edges, num_exprs, antin, antout, m_avout, m_kill, + earliest); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "\n Compute LCM earliest insert data:\n\n"); + fprintf (dump_file, " Expression List (%u):\n", num_exprs); + for (unsigned i = 0; i < num_exprs; i++) + { + const auto &info = *m_exprs[i]; + fprintf (dump_file, " Expr[%u]: ", i); + info.dump (dump_file, " "); + } + fprintf (dump_file, "\n bitmap data:\n"); + for (const bb_info *bb : crtl->ssa->bbs ()) + { + unsigned int i = bb->index (); + fprintf (dump_file, " BB %u:\n", i); + fprintf (dump_file, " avloc: "); + dump_bitmap_file (dump_file, m_avloc[i]); + fprintf (dump_file, " kill: "); + dump_bitmap_file (dump_file, m_kill[i]); + fprintf (dump_file, " antloc: "); + dump_bitmap_file (dump_file, m_antloc[i]); + fprintf (dump_file, " transp: "); + dump_bitmap_file (dump_file, m_transp[i]); + + fprintf (dump_file, " avin: "); + dump_bitmap_file (dump_file, m_avin[i]); + fprintf (dump_file, " avout: "); + dump_bitmap_file (dump_file, m_avout[i]); + fprintf (dump_file, " antin: "); + dump_bitmap_file (dump_file, antin[i]); + fprintf (dump_file, " antout: "); + dump_bitmap_file (dump_file, antout[i]); + } + fprintf (dump_file, "\n"); + fprintf (dump_file, " earliest:\n"); + for (unsigned ed = 0; ed < num_edges; ed++) + { + edge eg = INDEX_EDGE (m_edges, ed); + + if (bitmap_empty_p (earliest[ed])) + continue; + fprintf (dump_file, " Edge(bb %u -> bb %u): ", eg->src->index, + eg->dest->index); + dump_bitmap_file (dump_file, earliest[ed]); + } + fprintf (dump_file, "\n"); + } + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, " Fused global info result:\n"); + } + + bool changed = false; + for (unsigned ed = 0; ed < num_edges; ed++) + { + sbitmap e = earliest[ed]; + if (bitmap_empty_p (e)) + continue; + + unsigned int expr_index; + sbitmap_iterator sbi; + EXECUTE_IF_SET_IN_BITMAP (e, 0, expr_index, sbi) + { + vsetvl_info &curr_info = *m_exprs[expr_index]; + if (!curr_info.valid_p ()) + continue; + + edge eg = INDEX_EDGE (m_edges, ed); + if (eg->probability == profile_probability::never ()) + continue; + if (eg->src == ENTRY_BLOCK_PTR_FOR_FN (cfun) + || eg->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)) + continue; + + vsetvl_block_info &src_block_info = get_block_info (eg->src); + vsetvl_block_info &dest_block_info = get_block_info (eg->dest); + + if (src_block_info.probability + == profile_probability::uninitialized ()) + continue; + + if (src_block_info.empty_p ()) + { + vsetvl_info new_curr_info = curr_info; + new_curr_info.set_bb (crtl->ssa->bb (eg->dest)); + bool has_compatible_p = false; + unsigned int def_expr_index; + sbitmap_iterator sbi2; + EXECUTE_IF_SET_IN_BITMAP ( + m_vsetvl_def_in[new_curr_info.get_bb ()->index ()], 0, + def_expr_index, sbi2) + { + vsetvl_info &prev_info = *m_vsetvl_def_exprs[def_expr_index]; + if (!prev_info.valid_p ()) + continue; + if (m_dem.compatible_p (prev_info, new_curr_info)) + { + has_compatible_p = true; + break; + } + } + if (!has_compatible_p) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, + " Forbidden lift up vsetvl info into bb %u " + "since there is no vsetvl info that reaching in " + "is compatible with it:", + eg->src->index); + curr_info.dump (dump_file, " "); + } + continue; + } + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, + " Set empty bb %u to info:", eg->src->index); + curr_info.dump (dump_file, " "); + } + src_block_info.set_info (curr_info); + src_block_info.probability = dest_block_info.probability; + changed = true; + } + else if (src_block_info.has_info ()) + { + vsetvl_info &prev_info = src_block_info.get_exit_info (); + gcc_assert (prev_info.valid_p ()); + + if (m_dem.compatible_p (prev_info, curr_info)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, " Fuse curr info since prev info " + "compatible with it:\n"); + fprintf (dump_file, " prev_info: "); + prev_info.dump (dump_file, " "); + fprintf (dump_file, " curr_info: "); + curr_info.dump (dump_file, " "); + } + m_dem.merge (prev_info, curr_info); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, " prev_info after fused: "); + prev_info.dump (dump_file, " "); + fprintf (dump_file, "\n"); + } + changed = true; + if (src_block_info.has_info ()) + src_block_info.probability += dest_block_info.probability; + } + else if (src_block_info.has_info () + && !m_dem.compatible_p (prev_info, curr_info)) + { + /* Cancel lift up if probabilities are equal. */ + if (successors_probability_equal_p (eg->src)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, + " Change empty bb %u to from:", + eg->src->index); + prev_info.dump (dump_file, " "); + fprintf (dump_file, + " to (higher probability):"); + curr_info.dump (dump_file, " "); + } + src_block_info.set_empty_info (); + src_block_info.probability + = profile_probability::uninitialized (); + changed = true; + } + /* Choose the one with higher probability. */ + else if (dest_block_info.probability + > src_block_info.probability) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, + " Change empty bb %u to from:", + eg->src->index); + prev_info.dump (dump_file, " "); + fprintf (dump_file, + " to (higher probability):"); + curr_info.dump (dump_file, " "); + } + src_block_info.set_info (curr_info); + src_block_info.probability = dest_block_info.probability; + changed = true; + } + } + } + else + { + vsetvl_info &prev_info = src_block_info.get_exit_info (); + if (!prev_info.valid_p () + || m_dem.available_p (prev_info, curr_info)) + continue; + + if (m_dem.compatible_p (prev_info, curr_info)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, " Fuse curr info since prev info " + "compatible with it:\n"); + fprintf (dump_file, " prev_info: "); + prev_info.dump (dump_file, " "); + fprintf (dump_file, " curr_info: "); + curr_info.dump (dump_file, " "); + } + m_dem.merge (prev_info, curr_info); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, " prev_info after fused: "); + prev_info.dump (dump_file, " "); + fprintf (dump_file, "\n"); + } + changed = true; + } + } + } + } + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "\n"); + } + + sbitmap_vector_free (antin); + sbitmap_vector_free (antout); + sbitmap_vector_free (earliest); + free_edge_list (m_edges); + + return changed; +} + +void +pre_vsetvl::pre_global_vsetvl_info () +{ + compute_avl_def_data (); + compute_vsetvl_def_data (); + compute_lcm_local_properties (); + + unsigned num_exprs = m_exprs.length (); + m_edges = pre_edge_lcm_avs (num_exprs, m_transp, m_avloc, m_antloc, m_kill, + m_avin, m_avout, &m_insert, &m_del); + unsigned num_edges = NUM_EDGES (m_edges); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "\n Compute LCM insert and delete data:\n\n"); + fprintf (dump_file, " Expression List (%u):\n", num_exprs); + for (unsigned i = 0; i < num_exprs; i++) + { + const auto &info = *m_exprs[i]; + fprintf (dump_file, " Expr[%u]: ", i); + info.dump (dump_file, " "); + } + fprintf (dump_file, "\n bitmap data:\n"); + for (const bb_info *bb : crtl->ssa->bbs ()) + { + unsigned i = bb->index (); + fprintf (dump_file, " BB %u:\n", i); + fprintf (dump_file, " avloc: "); + dump_bitmap_file (dump_file, m_avloc[i]); + fprintf (dump_file, " kill: "); + dump_bitmap_file (dump_file, m_kill[i]); + fprintf (dump_file, " antloc: "); + dump_bitmap_file (dump_file, m_antloc[i]); + fprintf (dump_file, " transp: "); + dump_bitmap_file (dump_file, m_transp[i]); + + fprintf (dump_file, " avin: "); + dump_bitmap_file (dump_file, m_avin[i]); + fprintf (dump_file, " avout: "); + dump_bitmap_file (dump_file, m_avout[i]); + fprintf (dump_file, " del: "); + dump_bitmap_file (dump_file, m_del[i]); + } + fprintf (dump_file, "\n"); + fprintf (dump_file, " insert:\n"); + for (unsigned ed = 0; ed < num_edges; ed++) + { + edge eg = INDEX_EDGE (m_edges, ed); + + if (bitmap_empty_p (m_insert[ed])) + continue; + fprintf (dump_file, " Edge(bb %u -> bb %u): ", eg->src->index, + eg->dest->index); + dump_bitmap_file (dump_file, m_insert[ed]); + } + } + + /* Remove vsetvl infos as LCM suggest */ + for (const bb_info *bb : crtl->ssa->bbs ()) + { + sbitmap d = m_del[bb->index ()]; + if (bitmap_count_bits (d) == 0) + continue; + gcc_assert (bitmap_count_bits (d) == 1); + unsigned expr_index = bitmap_first_set_bit (d); + vsetvl_info &info = *m_exprs[expr_index]; + gcc_assert (info.valid_p ()); + gcc_assert (info.get_bb () == bb); + const vsetvl_block_info &block_info = get_block_info (info.get_bb ()); + gcc_assert (block_info.get_entry_info () == info); + info.set_delete (); + } + + for (const bb_info *bb : crtl->ssa->bbs ()) + { + vsetvl_block_info &block_info = get_block_info (bb); + if (block_info.empty_p ()) + continue; + vsetvl_info &curr_info = block_info.get_entry_info (); + if (curr_info.delete_p ()) + { + if (block_info.infos.is_empty ()) + continue; + curr_info = block_info.infos[0]; + } + if (curr_info.valid_p () && !curr_info.vl_use_by_non_rvv_insn_p () + && preds_has_same_avl_p (curr_info)) + curr_info.set_change_vtype_only (); + + vsetvl_info prev_info = vsetvl_info (); + prev_info.set_empty (); + for (auto &curr_info : block_info.infos) + { + if (prev_info.valid_p () && curr_info.valid_p () + && m_dem.avl_available_p (prev_info, curr_info)) + curr_info.set_change_vtype_only (); + prev_info = curr_info; + } + } +} + const pass_data pass_data_vsetvl = { RTL_PASS, /* type */ "vsetvl", /* name */ @@ -3379,648 +3739,6 @@ make_pass_vsetvl (gcc::context *ctxt) return new pass_vsetvl (ctxt); } -/* Assemble the candidates expressions for LCM. */ -void -pass_vsetvl::prune_expressions (void) -{ - for (const bb_info *bb : crtl->ssa->bbs ()) - { - if (m_vector_manager->vector_block_infos[bb->index ()] - .local_dem.valid_or_dirty_p ()) - m_vector_manager->create_expr ( - m_vector_manager->vector_block_infos[bb->index ()].local_dem); - if (m_vector_manager->vector_block_infos[bb->index ()] - .reaching_out.valid_or_dirty_p ()) - m_vector_manager->create_expr ( - m_vector_manager->vector_block_infos[bb->index ()].reaching_out); - } - - if (dump_file) - { - fprintf (dump_file, "\nThe total VSETVL expression num = %d\n", - m_vector_manager->vector_exprs.length ()); - fprintf (dump_file, "Expression List:\n"); - for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++) - { - fprintf (dump_file, "Expr[%ld]:\n", i); - m_vector_manager->vector_exprs[i]->dump (dump_file); - fprintf (dump_file, "\n"); - } - } -} - -/* Compute the local properties of each recorded expression. - - Local properties are those that are defined by the block, irrespective of - other blocks. - - An expression is transparent in a block if its operands are not modified - in the block. - - An expression is computed (locally available) in a block if it is computed - at least once and expression would contain the same value if the - computation was moved to the end of the block. - - An expression is locally anticipatable in a block if it is computed at - least once and expression would contain the same value if the computation - was moved to the beginning of the block. */ -void -pass_vsetvl::compute_local_properties (void) -{ - /* - If T is locally available at the end of a block, then T' must be - available at the end of the same block. Since some optimization has - occurred earlier, T' might not be locally available, however, it must - have been previously computed on all paths. As a formula, T at AVLOC(B) - implies that T' at AVOUT(B). - An "available occurrence" is one that is the last occurrence in the - basic block and the operands are not modified by following statements in - the basic block [including this insn]. - - - If T is locally anticipated at the beginning of a block, then either - T', is locally anticipated or it is already available from previous - blocks. As a formula, this means that T at ANTLOC(B) implies that T' at - ANTLOC(B) at AVIN(B). - An "anticipatable occurrence" is one that is the first occurrence in the - basic block, the operands are not modified in the basic block prior - to the occurrence and the output is not used between the start of - the block and the occurrence. */ - - basic_block cfg_bb; - for (const bb_info *bb : crtl->ssa->bbs ()) - { - unsigned int curr_bb_idx = bb->index (); - if (curr_bb_idx == ENTRY_BLOCK || curr_bb_idx == EXIT_BLOCK) - continue; - const auto local_dem - = m_vector_manager->vector_block_infos[curr_bb_idx].local_dem; - const auto reaching_out - = m_vector_manager->vector_block_infos[curr_bb_idx].reaching_out; - - /* Compute transparent. */ - for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++) - { - const auto *expr = m_vector_manager->vector_exprs[i]; - if (local_dem.valid_or_dirty_p () || local_dem.unknown_p ()) - bitmap_clear_bit (m_vector_manager->vector_transp[curr_bb_idx], i); - else if (expr->has_avl_reg ()) - { - rtx reg = expr->get_avl_or_vl_reg (); - for (const insn_info *insn : bb->real_nondebug_insns ()) - { - if (find_access (insn->defs (), REGNO (reg))) - { - bitmap_clear_bit ( - m_vector_manager->vector_transp[curr_bb_idx], i); - break; - } - else if (vlmax_avl_p (expr->get_avl ()) - && find_access (insn->uses (), REGNO (reg))) - { - bitmap_clear_bit ( - m_vector_manager->vector_transp[curr_bb_idx], i); - break; - } - } - } - } - - /* Compute anticipatable occurrences. */ - if (local_dem.valid_or_dirty_p ()) - if (anticipatable_occurrence_p (bb, local_dem)) - bitmap_set_bit (m_vector_manager->vector_antic[curr_bb_idx], - m_vector_manager->get_expr_id (local_dem)); - - /* Compute available occurrences. */ - if (reaching_out.valid_or_dirty_p ()) - { - auto_vec available_list - = m_vector_manager->get_all_available_exprs (reaching_out); - for (size_t i = 0; i < available_list.length (); i++) - { - const vector_insn_info *expr - = m_vector_manager->vector_exprs[available_list[i]]; - if (available_occurrence_p (bb, *expr)) - bitmap_set_bit (m_vector_manager->vector_comp[curr_bb_idx], - available_list[i]); - } - } - - if (loop_basic_block_p (bb->cfg_bb ()) && local_dem.valid_or_dirty_p () - && reaching_out.valid_or_dirty_p () - && !local_dem.compatible_p (reaching_out)) - bitmap_clear_bit (m_vector_manager->vector_antic[curr_bb_idx], - m_vector_manager->get_expr_id (local_dem)); - } - - /* Compute kill for each basic block using: - - ~(TRANSP | COMP) - */ - - FOR_EACH_BB_FN (cfg_bb, cfun) - { - bitmap_ior (m_vector_manager->vector_kill[cfg_bb->index], - m_vector_manager->vector_transp[cfg_bb->index], - m_vector_manager->vector_comp[cfg_bb->index]); - bitmap_not (m_vector_manager->vector_kill[cfg_bb->index], - m_vector_manager->vector_kill[cfg_bb->index]); - } - - FOR_EACH_BB_FN (cfg_bb, cfun) - { - edge e; - edge_iterator ei; - - /* If the current block is the destination of an abnormal edge, we - kill all trapping (for PRE) and memory (for hoist) expressions - because we won't be able to properly place the instruction on - the edge. So make them neither anticipatable nor transparent. - This is fairly conservative. - - ??? For hoisting it may be necessary to check for set-and-jump - instructions here, not just for abnormal edges. The general problem - is that when an expression cannot not be placed right at the end of - a basic block we should account for any side-effects of a subsequent - jump instructions that could clobber the expression. It would - be best to implement this check along the lines of - should_hoist_expr_to_dom where the target block is already known - and, hence, there's no need to conservatively prune expressions on - "intermediate" set-and-jump instructions. */ - FOR_EACH_EDGE (e, ei, cfg_bb->preds) - if (e->flags & EDGE_COMPLEX) - { - bitmap_clear (m_vector_manager->vector_antic[cfg_bb->index]); - bitmap_clear (m_vector_manager->vector_transp[cfg_bb->index]); - } - } -} - -/* Fuse demand info for earliest edge. */ -bool -pass_vsetvl::earliest_fusion (void) -{ - bool changed_p = false; - for (int ed = 0; ed < NUM_EDGES (m_vector_manager->vector_edge_list); ed++) - { - for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++) - { - auto &expr = *m_vector_manager->vector_exprs[i]; - if (expr.empty_p ()) - continue; - edge eg = INDEX_EDGE (m_vector_manager->vector_edge_list, ed); - /* If it is the edge that we never reach, skip its possible PRE - fusion conservatively. */ - if (eg->probability == profile_probability::never ()) - break; - if (eg->src == ENTRY_BLOCK_PTR_FOR_FN (cfun) - || eg->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)) - break; - if (bitmap_bit_p (m_vector_manager->vector_earliest[ed], i)) - { - auto &src_block_info = get_block_info (eg->src); - auto &dest_block_info = get_block_info (eg->dest); - if (src_block_info.reaching_out.unknown_p ()) - break; - - gcc_assert (!(eg->flags & EDGE_ABNORMAL)); - vector_insn_info new_info = vector_insn_info (); - profile_probability prob = src_block_info.probability; - /* We don't fuse user vsetvl into EMPTY or - DIRTY (EMPTY but polluted) block for these - following reasons: - - - The user vsetvl instruction is configured as - no side effects that the previous passes - (GSCE, Loop-invariant, ..., etc) - should be able to do a good job on optimization - of user explicit vsetvls so we don't need to - PRE optimization (The user vsetvls should be - on the optimal local already before this pass) - again for user vsetvls in VSETVL PASS here - (Phase 3 && Phase 4). - - - Allowing user vsetvls be optimized in PRE - optimization here (Phase 3 && Phase 4) will - complicate the codes so much so we prefer user - vsetvls be optimized in post-optimization - (Phase 5 && Phase 6). */ - if (vsetvl_insn_p (expr.get_insn ()->rtl ())) - { - if (src_block_info.reaching_out.empty_p ()) - continue; - else if (src_block_info.reaching_out.dirty_p () - && !src_block_info.reaching_out.compatible_p (expr)) - { - new_info.set_empty (); - /* Update probability as uninitialized status so that - we won't try to fuse any demand info into such EMPTY - block any more. */ - prob = profile_probability::uninitialized (); - update_block_info (eg->src->index, prob, new_info); - continue; - } - } - - if (src_block_info.reaching_out.empty_p ()) - { - if (src_block_info.probability - == profile_probability::uninitialized ()) - continue; - new_info = expr.global_merge (expr, eg->src->index); - new_info.set_dirty (); - prob = dest_block_info.probability; - update_block_info (eg->src->index, prob, new_info); - changed_p = true; - } - else if (src_block_info.reaching_out.dirty_p ()) - { - /* DIRTY -> DIRTY or VALID -> DIRTY. */ - if (demands_can_be_fused_p (src_block_info.reaching_out, - expr)) - { - new_info = src_block_info.reaching_out.global_merge ( - expr, eg->src->index); - new_info.set_dirty (); - prob += dest_block_info.probability; - } - else if (!src_block_info.reaching_out.compatible_p (expr) - && !m_vector_manager->earliest_fusion_worthwhile_p ( - eg->src)) - { - new_info.set_empty (); - prob = profile_probability::uninitialized (); - } - else if (!src_block_info.reaching_out.compatible_p (expr) - && dest_block_info.probability - > src_block_info.probability) - { - new_info = expr; - new_info.set_dirty (); - prob = dest_block_info.probability; - } - else - continue; - update_block_info (eg->src->index, prob, new_info); - changed_p = true; - } - else - { - rtx vl = NULL_RTX; - if (vsetvl_insn_p ( - src_block_info.reaching_out.get_insn ()->rtl ()) - && vsetvl_dominated_by_p (eg->src, expr, - src_block_info.reaching_out, - true)) - ; - else if (!demands_can_be_fused_p (src_block_info.reaching_out, - expr)) - continue; - else if (!earliest_pred_can_be_fused_p ( - crtl->ssa->bb (eg->src), - src_block_info.reaching_out, expr, &vl)) - continue; - - vector_insn_info new_info - = src_block_info.reaching_out.global_merge (expr, - eg->src->index); - - prob = std::max (dest_block_info.probability, - src_block_info.probability); - change_vsetvl_insn (new_info.get_insn (), new_info, vl); - update_block_info (eg->src->index, prob, new_info); - changed_p = true; - } - } - } - } - return changed_p; -} - -/* Fuse VSETVL demand info according LCM computed location. */ -void -pass_vsetvl::vsetvl_fusion (void) -{ - /* Fuse VSETVL demand info until VSETVL CFG fixed. */ - bool changed_p = true; - int fusion_no = 0; - while (changed_p) - { - changed_p = false; - fusion_no++; - prune_expressions (); - m_vector_manager->create_bitmap_vectors (); - compute_local_properties (); - /* Compute global availability. */ - compute_available (m_vector_manager->vector_comp, - m_vector_manager->vector_kill, - m_vector_manager->vector_avout, - m_vector_manager->vector_avin); - /* Compute global anticipatability. */ - compute_antinout_edge (m_vector_manager->vector_antic, - m_vector_manager->vector_transp, - m_vector_manager->vector_antin, - m_vector_manager->vector_antout); - /* Compute earliestness. */ - compute_earliest (m_vector_manager->vector_edge_list, - m_vector_manager->vector_exprs.length (), - m_vector_manager->vector_antin, - m_vector_manager->vector_antout, - m_vector_manager->vector_avout, - m_vector_manager->vector_kill, - m_vector_manager->vector_earliest); - changed_p |= earliest_fusion (); - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "\nEARLIEST fusion %d\n", fusion_no); - m_vector_manager->dump (dump_file); - } - m_vector_manager->free_bitmap_vectors (); - if (!m_vector_manager->vector_exprs.is_empty ()) - m_vector_manager->vector_exprs.release (); - } -} - -/* Return true if VSETVL in the block can be refined as vsetvl zero,zero. */ -bool -pass_vsetvl::can_refine_vsetvl_p (const basic_block cfg_bb, - const vector_insn_info &info) const -{ - if (!m_vector_manager->all_same_ratio_p ( - m_vector_manager->vector_avin[cfg_bb->index])) - return false; - - if (!m_vector_manager->all_same_avl_p ( - cfg_bb, m_vector_manager->vector_avin[cfg_bb->index])) - return false; - - size_t expr_id - = bitmap_first_set_bit (m_vector_manager->vector_avin[cfg_bb->index]); - if (!m_vector_manager->vector_exprs[expr_id]->same_vlmax_p (info)) - return false; - if (!m_vector_manager->vector_exprs[expr_id]->compatible_avl_p (info)) - return false; - - edge e; - edge_iterator ei; - bool all_valid_p = true; - FOR_EACH_EDGE (e, ei, cfg_bb->preds) - { - if (bitmap_empty_p (m_vector_manager->vector_avout[e->src->index])) - { - all_valid_p = false; - break; - } - } - - if (!all_valid_p) - return false; - return true; -} - -/* Optimize athe case like this: - - bb 0: - vsetvl 0 a5,zero,e8,mf8 - insn 0 (demand SEW + LMUL) - bb 1: - vsetvl 1 a5,zero,e16,mf4 - insn 1 (demand SEW + LMUL) - - In this case, we should be able to refine - vsetvl 1 into vsetvl zero, zero according AVIN. */ -void -pass_vsetvl::refine_vsetvls (void) const -{ - basic_block cfg_bb; - FOR_EACH_BB_FN (cfg_bb, cfun) - { - auto info = get_block_info (cfg_bb).local_dem; - insn_info *insn = info.get_insn (); - if (!info.valid_p ()) - continue; - - rtx_insn *rinsn = insn->rtl (); - if (!can_refine_vsetvl_p (cfg_bb, info)) - continue; - - /* We can't refine user vsetvl into vsetvl zero,zero since the dest - will be used by the following instructions. */ - if (vector_config_insn_p (rinsn)) - { - m_vector_manager->to_refine_vsetvls.add (rinsn); - continue; - } - - /* If all incoming edges to a block have a vector state that is compatbile - with the block. In such a case we need not emit a vsetvl in the current - block. */ - - gcc_assert (has_vtype_op (insn->rtl ())); - rinsn = PREV_INSN (insn->rtl ()); - gcc_assert (vector_config_insn_p (PREV_INSN (insn->rtl ()))); - if (m_vector_manager->all_avail_in_compatible_p (cfg_bb)) - { - size_t id = m_vector_manager->get_expr_id (info); - if (bitmap_bit_p (m_vector_manager->vector_del[cfg_bb->index], id)) - continue; - eliminate_insn (rinsn); - } - else - { - rtx new_pat - = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, info, NULL_RTX); - change_insn (rinsn, new_pat); - } - } -} - -void -pass_vsetvl::cleanup_vsetvls () -{ - basic_block cfg_bb; - FOR_EACH_BB_FN (cfg_bb, cfun) - { - auto &info = get_block_info (cfg_bb).reaching_out; - gcc_assert (m_vector_manager->expr_set_num ( - m_vector_manager->vector_del[cfg_bb->index]) - <= 1); - for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++) - { - if (bitmap_bit_p (m_vector_manager->vector_del[cfg_bb->index], i)) - { - if (info.dirty_p ()) - info.set_unknown (); - else - { - const auto dem = get_block_info (cfg_bb).local_dem; - gcc_assert (dem == *m_vector_manager->vector_exprs[i]); - insn_info *insn = dem.get_insn (); - gcc_assert (insn && insn->rtl ()); - rtx_insn *rinsn; - /* We can't eliminate user vsetvl since the dest will be used - * by the following instructions. */ - if (vector_config_insn_p (insn->rtl ())) - { - m_vector_manager->to_delete_vsetvls.add (insn->rtl ()); - continue; - } - - gcc_assert (has_vtype_op (insn->rtl ())); - rinsn = PREV_INSN (insn->rtl ()); - gcc_assert (vector_config_insn_p (PREV_INSN (insn->rtl ()))); - eliminate_insn (rinsn); - } - } - } - } -} - -bool -pass_vsetvl::commit_vsetvls (void) -{ - bool need_commit = false; - - for (int ed = 0; ed < NUM_EDGES (m_vector_manager->vector_edge_list); ed++) - { - for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++) - { - edge eg = INDEX_EDGE (m_vector_manager->vector_edge_list, ed); - if (bitmap_bit_p (m_vector_manager->vector_insert[ed], i)) - { - const vector_insn_info *require - = m_vector_manager->vector_exprs[i]; - gcc_assert (require->valid_or_dirty_p ()); - rtl_profile_for_edge (eg); - start_sequence (); - - insn_info *insn = require->get_insn (); - vector_insn_info prev_info = vector_insn_info (); - sbitmap bitdata = m_vector_manager->vector_avout[eg->src->index]; - if (m_vector_manager->all_same_ratio_p (bitdata) - && m_vector_manager->all_same_avl_p (eg->dest, bitdata)) - { - size_t first = bitmap_first_set_bit (bitdata); - prev_info = *m_vector_manager->vector_exprs[first]; - } - - insert_vsetvl (EMIT_DIRECT, insn->rtl (), *require, prev_info); - rtx_insn *rinsn = get_insns (); - end_sequence (); - default_rtl_profile (); - - /* We should not get an abnormal edge here. */ - gcc_assert (!(eg->flags & EDGE_ABNORMAL)); - need_commit = true; - insert_insn_on_edge (rinsn, eg); - - if (dump_file) - { - fprintf (dump_file, - "\nInsert vsetvl insn %d at edge %d from to " - ":\n", - INSN_UID (rinsn), ed, eg->src->index, - eg->dest->index); - print_rtl_single (dump_file, rinsn); - } - } - } - } - - for (const bb_info *bb : crtl->ssa->bbs ()) - { - basic_block cfg_bb = bb->cfg_bb (); - const auto reaching_out = get_block_info (cfg_bb).reaching_out; - if (!reaching_out.dirty_p ()) - continue; - - rtx new_pat; - if (!reaching_out.demand_p (DEMAND_AVL)) - { - vl_vtype_info new_info = reaching_out; - new_info.set_avl_info (avl_info (const0_rtx, nullptr)); - new_pat = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, new_info, NULL_RTX); - } - else if (can_refine_vsetvl_p (cfg_bb, reaching_out)) - new_pat - = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, reaching_out, NULL_RTX); - else if (vlmax_avl_p (reaching_out.get_avl ())) - { - rtx vl = reaching_out.get_avl_or_vl_reg (); - new_pat = gen_vsetvl_pat (VSETVL_NORMAL, reaching_out, vl); - } - else - new_pat - = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, reaching_out, NULL_RTX); - - edge eg; - edge_iterator eg_iterator; - FOR_EACH_EDGE (eg, eg_iterator, cfg_bb->succs) - { - /* We should not get an abnormal edge here. */ - gcc_assert (!(eg->flags & EDGE_ABNORMAL)); - /* We failed to optimize this case in Phase 3 (earliest fusion): - - bb 2: vsetvl a5, a3 ... - goto bb 4 - bb 3: vsetvl a5, a2 ... - goto bb 4 - bb 4: vsetvli zero, a5 ---> Redundant, should be elided. - - Since "a5" value can come from either bb 2 or bb 3, we can't make - it optimized in Phase 3 which will make phase 3 so complicated. - Now, we do post optimization here to elide the redundant VSETVL - insn in bb4. */ - if (m_vector_manager->vsetvl_dominated_by_all_preds_p (cfg_bb, - reaching_out)) - continue; - - start_sequence (); - emit_insn (copy_rtx (new_pat)); - rtx_insn *rinsn = get_insns (); - end_sequence (); - - insert_insn_on_edge (rinsn, eg); - need_commit = true; - if (dump_file) - { - fprintf (dump_file, - "\nInsert vsetvl insn %d from to :\n", - INSN_UID (rinsn), cfg_bb->index, eg->dest->index); - print_rtl_single (dump_file, rinsn); - } - } - } - - return need_commit; -} - -void -pass_vsetvl::pre_vsetvl (void) -{ - /* Compute entity list. */ - prune_expressions (); - - m_vector_manager->create_bitmap_vectors (); - compute_local_properties (); - m_vector_manager->vector_edge_list = pre_edge_lcm_avs ( - m_vector_manager->vector_exprs.length (), m_vector_manager->vector_transp, - m_vector_manager->vector_comp, m_vector_manager->vector_antic, - m_vector_manager->vector_kill, m_vector_manager->vector_avin, - m_vector_manager->vector_avout, &m_vector_manager->vector_insert, - &m_vector_manager->vector_del); - - /* We should dump the information before CFG is changed. Otherwise it will - produce ICE (internal compiler error). */ - if (dump_file && (dump_flags & TDF_DETAILS)) - m_vector_manager->dump (dump_file); - - refine_vsetvls (); - cleanup_vsetvls (); - bool need_commit = commit_vsetvls (); - if (need_commit) - commit_edge_insertions (); -} - /* Some instruction can not be accessed in RTL_SSA when we don't re-init the new RTL_SSA framework but it is definetely at the END of the block. -- 2.36.3