public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Lehua Ding <lehua.ding@rivai.ai>
To: gcc-patches@gcc.gnu.org
Cc: juzhe.zhong@rivai.ai, kito.cheng@gmail.com, rdapp.gcc@gmail.com,
	palmer@rivosinc.com, jeffreyalaw@gmail.com, lehua.ding@rivai.ai
Subject: [PATCH V3 08/11] RISC-V: P8: Refactor emit-vsetvl phase and delete post optimization
Date: Thu, 19 Oct 2023 16:33:30 +0800	[thread overview]
Message-ID: <20231019083333.2052340-9-lehua.ding@rivai.ai> (raw)
In-Reply-To: <20231019083333.2052340-1-lehua.ding@rivai.ai>

gcc/ChangeLog:

	* config/riscv/riscv-vsetvl.cc (pre_vsetvl::emit_vsetvl): New.
	(pre_vsetvl::cleaup): New.
	(pre_vsetvl::remove_avl_operand): New.
	(pre_vsetvl::remove_unused_dest_operand): New.
	(pass_vsetvl::get_vsetvl_at_end): Removed.
	(local_avl_compatible_p): Removed.
	(pass_vsetvl::local_eliminate_vsetvl_insn): Removed.
	(get_first_vsetvl_before_rvv_insns): Removed.
	(pass_vsetvl::global_eliminate_vsetvl_insn): Removed.
	(pass_vsetvl::ssa_post_optimization): Removed.
	(has_no_uses): Removed.
	(pass_vsetvl::df_post_optimization): Removed.
	(pass_vsetvl::init): Removed.
	(pass_vsetvl::done): Removed.
	(pass_vsetvl::compute_probabilities): Removed.
	(pass_vsetvl::lazy_vsetvl): Removed.
	(pass_vsetvl::execute): Removed.
	(make_pass_vsetvl): Removed.

---
 gcc/config/riscv/riscv-vsetvl.cc | 878 +++++++------------------------
 1 file changed, 203 insertions(+), 675 deletions(-)

diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index 855edd6d0f5..06d02d25cb3 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -3601,6 +3601,209 @@ pre_vsetvl::pre_global_vsetvl_info ()
     }
 }
 
+void
+pre_vsetvl::emit_vsetvl ()
+{
+  bool need_commit = false;
+
+  for (const bb_info *bb : crtl->ssa->bbs ())
+    {
+      for (const auto &curr_info : get_block_info (bb).infos)
+	{
+	  insn_info *insn = curr_info.get_insn ();
+	  if (curr_info.delete_p ())
+	    {
+	      if (vsetvl_insn_p (insn->rtl ()))
+		remove_vsetvl_insn (curr_info);
+	      continue;
+	    }
+	  else if (curr_info.valid_p ())
+	    {
+	      if (vsetvl_insn_p (insn->rtl ()))
+		{
+		  const vsetvl_info temp = vsetvl_info (insn);
+		  if (!(curr_info == temp))
+		    {
+		      if (dump_file)
+			{
+			  fprintf (dump_file, "\n  Change vsetvl info from: ");
+			  temp.dump (dump_file, "    ");
+			  fprintf (dump_file, "  to: ");
+			  curr_info.dump (dump_file, "    ");
+			}
+		      change_vsetvl_insn (curr_info);
+		    }
+		}
+	      else
+		{
+		  if (dump_file)
+		    {
+		      fprintf (dump_file,
+			       "\n  Insert vsetvl info before insn %d: ",
+			       insn->uid ());
+		      curr_info.dump (dump_file, "    ");
+		    }
+		  insert_vsetvl_insn (EMIT_BEFORE, curr_info);
+		}
+	    }
+	}
+    }
+
+  for (const vsetvl_info &item : m_delete_list)
+    {
+      gcc_assert (vsetvl_insn_p (item.get_insn ()->rtl ()));
+      remove_vsetvl_insn (item);
+    }
+
+  /* m_insert vsetvl as LCM suggest. */
+  for (int ed = 0; ed < NUM_EDGES (m_edges); ed++)
+    {
+      edge eg = INDEX_EDGE (m_edges, ed);
+      sbitmap i = m_insert[ed];
+      if (bitmap_count_bits (i) < 1)
+	continue;
+
+      if (bitmap_count_bits (i) > 1)
+	/* For code with infinite loop (e.g. pr61634.c), The data flow is
+	   completely wrong.  */
+	continue;
+
+      gcc_assert (bitmap_count_bits (i) == 1);
+      unsigned expr_index = bitmap_first_set_bit (i);
+      const vsetvl_info &info = *m_exprs[expr_index];
+      gcc_assert (info.valid_p ());
+      if (dump_file)
+	{
+	  fprintf (dump_file,
+		   "\n  Insert vsetvl info at edge(bb %u -> bb %u): ",
+		   eg->src->index, eg->dest->index);
+	  info.dump (dump_file, "    ");
+	}
+      rtl_profile_for_edge (eg);
+      start_sequence ();
+
+      insert_vsetvl_insn (EMIT_DIRECT, info);
+      rtx_insn *rinsn = get_insns ();
+      end_sequence ();
+      default_rtl_profile ();
+
+      /* We should not get an abnormal edge here.  */
+      gcc_assert (!(eg->flags & EDGE_ABNORMAL));
+      need_commit = true;
+      insert_insn_on_edge (rinsn, eg);
+    }
+
+  /* Insert vsetvl info that was not deleted after lift up.  */
+  for (const bb_info *bb : crtl->ssa->bbs ())
+    {
+      const vsetvl_block_info &block_info = get_block_info (bb);
+      if (!block_info.has_info ())
+	continue;
+
+      const vsetvl_info &footer_info = block_info.get_exit_info ();
+
+      if (footer_info.delete_p ())
+	continue;
+
+      edge eg;
+      edge_iterator eg_iterator;
+      FOR_EACH_EDGE (eg, eg_iterator, bb->cfg_bb ()->succs)
+	{
+	  gcc_assert (!(eg->flags & EDGE_ABNORMAL));
+	  if (dump_file)
+	    {
+	      fprintf (
+		dump_file,
+		"\n  Insert missed vsetvl info at edge(bb %u -> bb %u): ",
+		eg->src->index, eg->dest->index);
+	      footer_info.dump (dump_file, "    ");
+	    }
+	  start_sequence ();
+	  insert_vsetvl_insn (EMIT_DIRECT, footer_info);
+	  rtx_insn *rinsn = get_insns ();
+	  end_sequence ();
+	  default_rtl_profile ();
+	  insert_insn_on_edge (rinsn, eg);
+	  need_commit = true;
+	}
+    }
+
+  if (need_commit)
+    commit_edge_insertions ();
+}
+
+void
+pre_vsetvl::cleaup ()
+{
+  remove_avl_operand ();
+  remove_unused_dest_operand ();
+}
+
+void
+pre_vsetvl::remove_avl_operand ()
+{
+  basic_block cfg_bb;
+  rtx_insn *rinsn;
+  FOR_ALL_BB_FN (cfg_bb, cfun)
+    FOR_BB_INSNS (cfg_bb, rinsn)
+      if (NONDEBUG_INSN_P (rinsn) && has_vl_op (rinsn)
+	  && REG_P (get_vl (rinsn)))
+	{
+	  rtx avl = get_vl (rinsn);
+	  if (count_regno_occurrences (rinsn, REGNO (avl)) == 1)
+	    {
+	      rtx new_pat;
+	      if (fault_first_load_p (rinsn))
+		new_pat
+		  = simplify_replace_rtx (PATTERN (rinsn), avl, const0_rtx);
+	      else
+		{
+		  rtx set = single_set (rinsn);
+		  rtx src
+		    = simplify_replace_rtx (SET_SRC (set), avl, const0_rtx);
+		  new_pat = gen_rtx_SET (SET_DEST (set), src);
+		}
+	      if (dump_file)
+		{
+		  fprintf (dump_file, "  Cleanup insn %u's avl operand:\n",
+			   INSN_UID (rinsn));
+		  print_rtl_single (dump_file, rinsn);
+		}
+	      validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat, false);
+	    }
+	}
+}
+
+void
+pre_vsetvl::remove_unused_dest_operand ()
+{
+  df_analyze ();
+  basic_block cfg_bb;
+  rtx_insn *rinsn;
+  FOR_ALL_BB_FN (cfg_bb, cfun)
+    FOR_BB_INSNS (cfg_bb, rinsn)
+      if (NONDEBUG_INSN_P (rinsn) && vsetvl_insn_p (rinsn))
+	{
+	  rtx vl = get_vl (rinsn);
+	  vsetvl_info info = vsetvl_info (rinsn);
+	  if (has_no_uses (cfg_bb, rinsn, REGNO (vl)))
+	    if (!info.has_vlmax_avl ())
+	      {
+		rtx new_pat = info.get_vsetvl_pat (true);
+		if (dump_file)
+		  {
+		    fprintf (dump_file,
+			     "  Remove vsetvl insn %u's dest(vl) operand since "
+			     "it unused:\n",
+			     INSN_UID (rinsn));
+		    print_rtl_single (dump_file, rinsn);
+		  }
+		validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat,
+					 false);
+	      }
+	}
+}
+
 const pass_data pass_data_vsetvl = {
   RTL_PASS,	 /* type */
   "vsetvl",	 /* name */
@@ -3738,678 +3941,3 @@ make_pass_vsetvl (gcc::context *ctxt)
 {
   return new pass_vsetvl (ctxt);
 }
-
-/* Some instruction can not be accessed in RTL_SSA when we don't re-init
-   the new RTL_SSA framework but it is definetely at the END of the block.
-
-  Here we optimize the VSETVL is hoisted by LCM:
-
-   Before LCM:
-     bb 1:
-       vsetvli a5,a2,e32,m1,ta,mu
-     bb 2:
-       vsetvli zero,a5,e32,m1,ta,mu
-       ...
-
-   After LCM:
-     bb 1:
-       vsetvli a5,a2,e32,m1,ta,mu
-       LCM INSERTED: vsetvli zero,a5,e32,m1,ta,mu --> eliminate
-     bb 2:
-       ...
-   */
-rtx_insn *
-pass_vsetvl::get_vsetvl_at_end (const bb_info *bb, vector_insn_info *dem) const
-{
-  rtx_insn *end_vsetvl = BB_END (bb->cfg_bb ());
-  if (end_vsetvl && NONDEBUG_INSN_P (end_vsetvl))
-    {
-      if (JUMP_P (end_vsetvl))
-	end_vsetvl = PREV_INSN (end_vsetvl);
-
-      if (NONDEBUG_INSN_P (end_vsetvl)
-	  && vsetvl_discard_result_insn_p (end_vsetvl))
-	{
-	  /* Only handle single succ. here, multiple succ. is much
-	     more complicated.  */
-	  if (single_succ_p (bb->cfg_bb ()))
-	    {
-	      edge e = single_succ_edge (bb->cfg_bb ());
-	      *dem = get_block_info (e->dest).local_dem;
-	      return end_vsetvl;
-	    }
-	}
-    }
-  return nullptr;
-}
-
-/* This predicator should only used within same basic block.  */
-static bool
-local_avl_compatible_p (rtx avl1, rtx avl2)
-{
-  if (!REG_P (avl1) || !REG_P (avl2))
-    return false;
-
-  return REGNO (avl1) == REGNO (avl2);
-}
-
-/* Local user vsetvl optimizaiton:
-
-     Case 1:
-       vsetvl a5,a4,e8,mf8
-       ...
-       vsetvl zero,a5,e8,mf8 --> Eliminate directly.
-
-     Case 2:
-       vsetvl a5,a4,e8,mf8    --> vsetvl a5,a4,e32,mf2
-       ...
-       vsetvl zero,a5,e32,mf2 --> Eliminate directly.  */
-void
-pass_vsetvl::local_eliminate_vsetvl_insn (const bb_info *bb) const
-{
-  rtx_insn *prev_vsetvl = nullptr;
-  rtx_insn *curr_vsetvl = nullptr;
-  rtx vl_placeholder = RVV_VLMAX;
-  rtx prev_avl = vl_placeholder;
-  rtx curr_avl = vl_placeholder;
-  vector_insn_info prev_dem;
-
-  /* Instruction inserted by LCM is not appeared in RTL-SSA yet, try to
-     found those instruciton.   */
-  if (rtx_insn *end_vsetvl = get_vsetvl_at_end (bb, &prev_dem))
-    {
-      prev_avl = get_avl (end_vsetvl);
-      prev_vsetvl = end_vsetvl;
-    }
-
-  bool skip_one = false;
-  /* Backward propgate vsetvl info, drop the later one (prev_vsetvl) if it's
-     compatible with current vsetvl (curr_avl), and merge the vtype and avl
-     info. into current vsetvl.  */
-  for (insn_info *insn : bb->reverse_real_nondebug_insns ())
-    {
-      rtx_insn *rinsn = insn->rtl ();
-      const auto &curr_dem = get_vector_info (insn);
-      bool need_invalidate = false;
-
-      /* Skip if this insn already handled in last iteration.  */
-      if (skip_one)
-	{
-	  skip_one = false;
-	  continue;
-	}
-
-      if (vsetvl_insn_p (rinsn))
-	{
-	  curr_vsetvl = rinsn;
-	  /* vsetvl are using vl rather than avl since it will try to merge
-	     with other vsetvl_discard_result.
-
-			v--- avl
-	     vsetvl a5,a4,e8,mf8   # vsetvl
-	     ...    ^--- vl
-	     vsetvl zero,a5,e8,mf8 # vsetvl_discard_result
-			 ^--- avl
-	     */
-	  curr_avl = get_vl (rinsn);
-	  /* vsetvl is a cut point of local backward vsetvl elimination.  */
-	  need_invalidate = true;
-	}
-      else if (has_vtype_op (rinsn) && NONDEBUG_INSN_P (PREV_INSN (rinsn))
-	       && (vsetvl_discard_result_insn_p (PREV_INSN (rinsn))
-		   || vsetvl_insn_p (PREV_INSN (rinsn))))
-	{
-	  curr_vsetvl = PREV_INSN (rinsn);
-
-	  if (vsetvl_insn_p (PREV_INSN (rinsn)))
-	    {
-	      /* Need invalidate and skip if it's vsetvl.  */
-	      need_invalidate = true;
-	      /* vsetvl_discard_result_insn_p won't appeared in RTL-SSA,
-	       * so only need to skip for vsetvl.  */
-	      skip_one = true;
-	    }
-
-	  curr_avl = curr_dem.get_avl ();
-
-	  /* Some instrucion like pred_extract_first<mode> don't reqruie avl, so
-	     the avl is null, use vl_placeholder for unify the handling
-	     logic. */
-	  if (!curr_avl)
-	    curr_avl = vl_placeholder;
-	}
-      else if (insn->is_call () || insn->is_asm ()
-	       || find_access (insn->defs (), VL_REGNUM)
-	       || find_access (insn->defs (), VTYPE_REGNUM)
-	       || (REG_P (prev_avl)
-		   && find_access (insn->defs (), REGNO (prev_avl))))
-	{
-	  /* Invalidate if this insn can't propagate vl, vtype or avl.  */
-	  need_invalidate = true;
-	  prev_dem = vector_insn_info ();
-	}
-      else
-	/* Not interested instruction.  */
-	continue;
-
-      /* Local AVL compatibility checking is simpler than global, we only
-	 need to check the REGNO is same.  */
-      if (prev_dem.valid_or_dirty_p ()
-	  && prev_dem.skip_avl_compatible_p (curr_dem)
-	  && local_avl_compatible_p (prev_avl, curr_avl))
-	{
-	  /* curr_dem and prev_dem is compatible!  */
-	  /* Update avl info since we need to make sure they are fully
-	     compatible before merge.  */
-	  prev_dem.set_avl_info (curr_dem.get_avl_info ());
-	  /* Merge both and update into curr_vsetvl.  */
-	  prev_dem = curr_dem.local_merge (prev_dem);
-	  change_vsetvl_insn (curr_dem.get_insn (), prev_dem);
-	  /* Then we can drop prev_vsetvl.  */
-	  eliminate_insn (prev_vsetvl);
-	}
-
-      if (need_invalidate)
-	{
-	  prev_vsetvl = nullptr;
-	  curr_vsetvl = nullptr;
-	  prev_avl = vl_placeholder;
-	  curr_avl = vl_placeholder;
-	  prev_dem = vector_insn_info ();
-	}
-      else
-	{
-	  prev_vsetvl = curr_vsetvl;
-	  prev_avl = curr_avl;
-	  prev_dem = curr_dem;
-	}
-    }
-}
-
-/* Return the first vsetvl instruction in CFG_BB or NULL if
-   none exists or if a user RVV instruction is enountered
-   prior to any vsetvl.  */
-static rtx_insn *
-get_first_vsetvl_before_rvv_insns (basic_block cfg_bb,
-				   enum vsetvl_type insn_type)
-{
-  gcc_assert (insn_type == VSETVL_DISCARD_RESULT
-	      || insn_type == VSETVL_VTYPE_CHANGE_ONLY);
-  rtx_insn *rinsn;
-  FOR_BB_INSNS (cfg_bb, rinsn)
-    {
-      if (!NONDEBUG_INSN_P (rinsn))
-	continue;
-      /* If we don't find any inserted vsetvli before user RVV instructions,
-	 we don't need to optimize the vsetvls in this block.  */
-      if (has_vtype_op (rinsn) || vsetvl_insn_p (rinsn))
-	return nullptr;
-
-      if (insn_type == VSETVL_DISCARD_RESULT
-	  && vsetvl_discard_result_insn_p (rinsn))
-	return rinsn;
-      if (insn_type == VSETVL_VTYPE_CHANGE_ONLY
-	  && vsetvl_vtype_change_only_p (rinsn))
-	return rinsn;
-    }
-  return nullptr;
-}
-
-/* Global user vsetvl optimizaiton:
-
-     Case 1:
-     bb 1:
-       vsetvl a5,a4,e8,mf8
-       ...
-     bb 2:
-       ...
-       vsetvl zero,a5,e8,mf8 --> Eliminate directly.
-
-     Case 2:
-      bb 1:
-       vsetvl a5,a4,e8,mf8    --> vsetvl a5,a4,e32,mf2
-       ...
-      bb 2:
-       ...
-       vsetvl zero,a5,e32,mf2 --> Eliminate directly.
-
-     Case 3:
-      bb 1:
-       vsetvl a5,a4,e8,mf8    --> vsetvl a5,a4,e32,mf2
-       ...
-      bb 2:
-       ...
-       vsetvl a5,a4,e8,mf8    --> vsetvl a5,a4,e32,mf2
-       goto bb 3
-      bb 3:
-       ...
-       vsetvl zero,a5,e32,mf2 --> Eliminate directly.
-*/
-bool
-pass_vsetvl::global_eliminate_vsetvl_insn (const bb_info *bb) const
-{
-  rtx_insn *vsetvl_rinsn = NULL;
-  vector_insn_info dem = vector_insn_info ();
-  const auto &block_info = get_block_info (bb);
-  basic_block cfg_bb = bb->cfg_bb ();
-
-  if (block_info.local_dem.valid_or_dirty_p ())
-    {
-      /* Optimize the local vsetvl.  */
-      dem = block_info.local_dem;
-      vsetvl_rinsn
-	= get_first_vsetvl_before_rvv_insns (cfg_bb, VSETVL_DISCARD_RESULT);
-    }
-  if (!vsetvl_rinsn)
-    /* Optimize the global vsetvl inserted by LCM.  */
-    vsetvl_rinsn = get_vsetvl_at_end (bb, &dem);
-
-  /* No need to optimize if block doesn't have vsetvl instructions.  */
-  if (!dem.valid_or_dirty_p () || !vsetvl_rinsn || !dem.get_avl_source ()
-      || !dem.has_avl_reg ())
-    return false;
-
-  /* Condition 1: Check it has preds.  */
-  if (EDGE_COUNT (cfg_bb->preds) == 0)
-    return false;
-
-  /* If all preds has VL/VTYPE status setted by user vsetvls, and these
-     user vsetvls are all skip_avl_compatible_p with the vsetvl in this
-     block, we can eliminate this vsetvl instruction.  */
-  sbitmap avin = m_vector_manager->vector_avin[cfg_bb->index];
-
-  unsigned int bb_index;
-  sbitmap_iterator sbi;
-  rtx avl = dem.get_avl ();
-  hash_set<set_info *> sets
-    = get_all_sets (dem.get_avl_source (), true, false, false);
-  /* Condition 2: All VL/VTYPE available in are all compatible.  */
-  EXECUTE_IF_SET_IN_BITMAP (avin, 0, bb_index, sbi)
-    {
-      const auto &expr = m_vector_manager->vector_exprs[bb_index];
-      const auto &insn = expr->get_insn ();
-      def_info *def = find_access (insn->defs (), REGNO (avl));
-      set_info *set = safe_dyn_cast<set_info *> (def);
-      if (!vsetvl_insn_p (insn->rtl ()) || insn->bb () == bb
-	  || !sets.contains (set))
-	return false;
-    }
-
-  /* Condition 3: We don't do the global optimization for the block
-     has a pred is entry block or exit block.  */
-  /* Condition 4: All preds have available VL/VTYPE out.  */
-  edge e;
-  edge_iterator ei;
-  FOR_EACH_EDGE (e, ei, cfg_bb->preds)
-    {
-      sbitmap avout = m_vector_manager->vector_avout[e->src->index];
-      if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)
-	  || e->src == EXIT_BLOCK_PTR_FOR_FN (cfun)
-	  || (unsigned int) e->src->index
-	       >= m_vector_manager->vector_block_infos.length ()
-	  || bitmap_empty_p (avout))
-	return false;
-
-      EXECUTE_IF_SET_IN_BITMAP (avout, 0, bb_index, sbi)
-	{
-	  const auto &expr = m_vector_manager->vector_exprs[bb_index];
-	  const auto &insn = expr->get_insn ();
-	  def_info *def = find_access (insn->defs (), REGNO (avl));
-	  set_info *set = safe_dyn_cast<set_info *> (def);
-	  if (!vsetvl_insn_p (insn->rtl ()) || insn->bb () == bb
-	      || !sets.contains (set) || !expr->skip_avl_compatible_p (dem))
-	    return false;
-	}
-    }
-
-  /* Step1: Reshape the VL/VTYPE status to make sure everything compatible.  */
-  auto_vec<basic_block> pred_cfg_bbs
-    = get_dominated_by (CDI_POST_DOMINATORS, cfg_bb);
-  FOR_EACH_EDGE (e, ei, cfg_bb->preds)
-    {
-      sbitmap avout = m_vector_manager->vector_avout[e->src->index];
-      EXECUTE_IF_SET_IN_BITMAP (avout, 0, bb_index, sbi)
-	{
-	  vector_insn_info prev_dem = *m_vector_manager->vector_exprs[bb_index];
-	  vector_insn_info curr_dem = dem;
-	  insn_info *insn = prev_dem.get_insn ();
-	  if (!pred_cfg_bbs.contains (insn->bb ()->cfg_bb ()))
-	    continue;
-	  /* Update avl info since we need to make sure they are fully
-	     compatible before merge.  */
-	  curr_dem.set_avl_info (prev_dem.get_avl_info ());
-	  /* Merge both and update into curr_vsetvl.  */
-	  prev_dem = curr_dem.local_merge (prev_dem);
-	  change_vsetvl_insn (insn, prev_dem);
-	}
-    }
-
-  /* Step2: eliminate the vsetvl instruction.  */
-  eliminate_insn (vsetvl_rinsn);
-  return true;
-}
-
-/* This function does the following post optimization base on RTL_SSA:
-
-   1. Local user vsetvl optimizations.
-   2. Global user vsetvl optimizations.
-   3. AVL dependencies removal:
-      Before VSETVL PASS, RVV instructions pattern is depending on AVL operand
-      implicitly. Since we will emit VSETVL instruction and make RVV
-      instructions depending on VL/VTYPE global status registers, we remove the
-      such AVL operand in the RVV instructions pattern here in order to remove
-      AVL dependencies when AVL operand is a register operand.
-
-      Before the VSETVL PASS:
-	li a5,32
-	...
-	vadd.vv (..., a5)
-      After the VSETVL PASS:
-	li a5,32
-	vsetvli zero, a5, ...
-	...
-	vadd.vv (..., const_int 0).  */
-void
-pass_vsetvl::ssa_post_optimization (void) const
-{
-  for (const bb_info *bb : crtl->ssa->bbs ())
-    {
-      local_eliminate_vsetvl_insn (bb);
-      bool changed_p = true;
-      while (changed_p)
-	{
-	  changed_p = false;
-	  changed_p |= global_eliminate_vsetvl_insn (bb);
-	}
-      for (insn_info *insn : bb->real_nondebug_insns ())
-	{
-	  rtx_insn *rinsn = insn->rtl ();
-	  if (vlmax_avl_insn_p (rinsn))
-	    {
-	      eliminate_insn (rinsn);
-	      continue;
-	    }
-
-	  /* Erase the AVL operand from the instruction.  */
-	  if (!has_vl_op (rinsn) || !REG_P (get_vl (rinsn)))
-	    continue;
-	  rtx avl = get_vl (rinsn);
-	  if (count_regno_occurrences (rinsn, REGNO (avl)) == 1)
-	    {
-	      /* Get the list of uses for the new instruction.  */
-	      auto attempt = crtl->ssa->new_change_attempt ();
-	      insn_change change (insn);
-	      /* Remove the use of the substituted value.  */
-	      access_array_builder uses_builder (attempt);
-	      uses_builder.reserve (insn->num_uses () - 1);
-	      for (use_info *use : insn->uses ())
-		if (use != find_access (insn->uses (), REGNO (avl)))
-		  uses_builder.quick_push (use);
-	      use_array new_uses = use_array (uses_builder.finish ());
-	      change.new_uses = new_uses;
-	      change.move_range = insn->ebb ()->insn_range ();
-	      rtx pat;
-	      if (fault_first_load_p (rinsn))
-		pat = simplify_replace_rtx (PATTERN (rinsn), avl, const0_rtx);
-	      else
-		{
-		  rtx set = single_set (rinsn);
-		  rtx src
-		    = simplify_replace_rtx (SET_SRC (set), avl, const0_rtx);
-		  pat = gen_rtx_SET (SET_DEST (set), src);
-		}
-	      bool ok = change_insn (crtl->ssa, change, insn, pat);
-	      gcc_assert (ok);
-	    }
-	}
-    }
-}
-
-/* Return true if the SET result is not used by any instructions.  */
-static bool
-has_no_uses (basic_block cfg_bb, rtx_insn *rinsn, int regno)
-{
-  /* Handle the following case that can not be detected in RTL_SSA.  */
-  /* E.g.
-	  li a5, 100
-	  vsetvli a6, a5...
-	  ...
-	  vadd (use a6)
-
-	The use of "a6" is removed from "vadd" but the information is
-	not updated in RTL_SSA framework. We don't want to re-new
-	a new RTL_SSA which is expensive, instead, we use data-flow
-	analysis to check whether "a6" has no uses.  */
-  if (bitmap_bit_p (df_get_live_out (cfg_bb), regno))
-    return false;
-
-  rtx_insn *iter;
-  for (iter = NEXT_INSN (rinsn); iter && iter != NEXT_INSN (BB_END (cfg_bb));
-       iter = NEXT_INSN (iter))
-    if (df_find_use (iter, regno_reg_rtx[regno]))
-      return false;
-
-  return true;
-}
-
-/* This function does the following post optimization base on dataflow
-   analysis:
-
-   1. Change vsetvl rd, rs1 --> vsevl zero, rs1, if rd is not used by any
-   nondebug instructions. Even though this PASS runs after RA and it doesn't
-   help for reduce register pressure, it can help instructions scheduling since
-   we remove the dependencies.
-
-   2. Remove redundant user vsetvls base on outcome of Phase 4 (LCM) && Phase 5
-   (AVL dependencies removal).  */
-void
-pass_vsetvl::df_post_optimization (void) const
-{
-  df_analyze ();
-  hash_set<rtx_insn *> to_delete;
-  basic_block cfg_bb;
-  rtx_insn *rinsn;
-  FOR_ALL_BB_FN (cfg_bb, cfun)
-    {
-      FOR_BB_INSNS (cfg_bb, rinsn)
-	{
-	  if (NONDEBUG_INSN_P (rinsn) && vsetvl_insn_p (rinsn))
-	    {
-	      rtx vl = get_vl (rinsn);
-	      vector_insn_info info;
-	      info.parse_insn (rinsn);
-	      bool to_delete_p = m_vector_manager->to_delete_p (rinsn);
-	      bool to_refine_p = m_vector_manager->to_refine_p (rinsn);
-	      if (has_no_uses (cfg_bb, rinsn, REGNO (vl)))
-		{
-		  if (to_delete_p)
-		    to_delete.add (rinsn);
-		  else if (to_refine_p)
-		    {
-		      rtx new_pat = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY,
-						    info, NULL_RTX);
-		      validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat,
-					       false);
-		    }
-		  else if (!vlmax_avl_p (info.get_avl ()))
-		    {
-		      rtx new_pat = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, info,
-						    NULL_RTX);
-		      validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat,
-					       false);
-		    }
-		}
-	    }
-	}
-    }
-  for (rtx_insn *rinsn : to_delete)
-    eliminate_insn (rinsn);
-}
-
-void
-pass_vsetvl::init (void)
-{
-  if (optimize > 0)
-    {
-      /* Initialization of RTL_SSA.  */
-      calculate_dominance_info (CDI_DOMINATORS);
-      calculate_dominance_info (CDI_POST_DOMINATORS);
-      df_analyze ();
-      crtl->ssa = new function_info (cfun);
-    }
-
-  m_vector_manager = new vector_infos_manager ();
-  compute_probabilities ();
-
-  if (dump_file && (dump_flags & TDF_DETAILS))
-    {
-      fprintf (dump_file, "\nPrologue: Initialize vector infos\n");
-      m_vector_manager->dump (dump_file);
-    }
-}
-
-void
-pass_vsetvl::done (void)
-{
-  if (optimize > 0)
-    {
-      /* Finalization of RTL_SSA.  */
-      free_dominance_info (CDI_DOMINATORS);
-      free_dominance_info (CDI_POST_DOMINATORS);
-      if (crtl->ssa->perform_pending_updates ())
-	cleanup_cfg (0);
-      delete crtl->ssa;
-      crtl->ssa = nullptr;
-    }
-  m_vector_manager->release ();
-  delete m_vector_manager;
-  m_vector_manager = nullptr;
-}
-
-/* Compute probability for each block.  */
-void
-pass_vsetvl::compute_probabilities (void)
-{
-  /* Don't compute it in -O0 since we don't need it.  */
-  if (!optimize)
-    return;
-  edge e;
-  edge_iterator ei;
-
-  for (const bb_info *bb : crtl->ssa->bbs ())
-    {
-      basic_block cfg_bb = bb->cfg_bb ();
-      auto &curr_prob = get_block_info (cfg_bb).probability;
-
-      /* GCC assume entry block (bb 0) are always so
-	 executed so set its probability as "always".  */
-      if (ENTRY_BLOCK_PTR_FOR_FN (cfun) == cfg_bb)
-	curr_prob = profile_probability::always ();
-      /* Exit block (bb 1) is the block we don't need to process.  */
-      if (EXIT_BLOCK_PTR_FOR_FN (cfun) == cfg_bb)
-	continue;
-
-      gcc_assert (curr_prob.initialized_p ());
-      FOR_EACH_EDGE (e, ei, cfg_bb->succs)
-	{
-	  auto &new_prob = get_block_info (e->dest).probability;
-	  /* Normally, the edge probability should be initialized.
-	     However, some special testing code which is written in
-	     GIMPLE IR style force the edge probility uninitialized,
-	     we conservatively set it as never so that it will not
-	     affect PRE (Phase 3 && Phse 4).  */
-	  if (!e->probability.initialized_p ())
-	    new_prob = profile_probability::never ();
-	  else if (!new_prob.initialized_p ())
-	    new_prob = curr_prob * e->probability;
-	  else if (new_prob == profile_probability::always ())
-	    continue;
-	  else
-	    new_prob += curr_prob * e->probability;
-	}
-    }
-}
-
-/* Lazy vsetvl insertion for optimize > 0. */
-void
-pass_vsetvl::lazy_vsetvl (void)
-{
-  if (dump_file)
-    fprintf (dump_file,
-	     "\nEntering Lazy VSETVL PASS and Handling %d basic blocks for "
-	     "function:%s\n",
-	     n_basic_blocks_for_fn (cfun), function_name (cfun));
-
-  /* Phase 1 - Compute the local dems within each block.
-     The data-flow analysis within each block is backward analysis.  */
-  if (dump_file)
-    fprintf (dump_file, "\nPhase 1: Compute local backward vector infos\n");
-  for (const bb_info *bb : crtl->ssa->bbs ())
-    compute_local_backward_infos (bb);
-  if (dump_file && (dump_flags & TDF_DETAILS))
-    m_vector_manager->dump (dump_file);
-
-  /* Phase 2 - Emit vsetvl instructions within each basic block according to
-     demand, compute and save ANTLOC && AVLOC of each block.  */
-  if (dump_file)
-    fprintf (dump_file,
-	     "\nPhase 2: Emit vsetvl instruction within each block\n");
-  for (const bb_info *bb : crtl->ssa->bbs ())
-    emit_local_forward_vsetvls (bb);
-  if (dump_file && (dump_flags & TDF_DETAILS))
-    m_vector_manager->dump (dump_file);
-
-  /* Phase 3 - Propagate demanded info across blocks.  */
-  if (dump_file)
-    fprintf (dump_file, "\nPhase 3: Demands propagation across blocks\n");
-  vsetvl_fusion ();
-
-  /* Phase 4 - Lazy code motion.  */
-  if (dump_file)
-    fprintf (dump_file, "\nPhase 4: PRE vsetvl by Lazy code motion (LCM)\n");
-  pre_vsetvl ();
-
-  /* Phase 5 - Post optimization base on RTL_SSA.  */
-  if (dump_file)
-    fprintf (dump_file, "\nPhase 5: Post optimization base on RTL_SSA\n");
-  ssa_post_optimization ();
-
-  /* Phase 6 - Post optimization base on data-flow analysis.  */
-  if (dump_file)
-    fprintf (dump_file,
-	     "\nPhase 6: Post optimization base on data-flow analysis\n");
-  df_post_optimization ();
-}
-
-/* Main entry point for this pass.  */
-unsigned int
-pass_vsetvl::execute (function *)
-{
-  if (n_basic_blocks_for_fn (cfun) <= 0)
-    return 0;
-
-  /* The RVV instruction may change after split which is not a stable
-     instruction. We need to split it here to avoid potential issue
-     since the VSETVL PASS is insert before split PASS.  */
-  split_all_insns ();
-
-  /* Early return for there is no vector instructions.  */
-  if (!has_vector_insn (cfun))
-    return 0;
-
-  init ();
-
-  if (!optimize)
-    simple_vsetvl ();
-  else
-    lazy_vsetvl ();
-
-  done ();
-  return 0;
-}
-
-rtl_opt_pass *
-make_pass_vsetvl (gcc::context *ctxt)
-{
-  return new pass_vsetvl (ctxt);
-}
-- 
2.36.3



  parent reply	other threads:[~2023-10-19  8:34 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-10-19  8:33 [PATCH V3 00/11] Refactor and cleanup vsetvl pass Lehua Ding
2023-10-19  8:33 ` [PATCH V3 01/11] RISC-V: P1: Refactor avl_info/vl_vtype_info/vector_insn_info/vector_block_info Lehua Ding
2023-10-19  8:33 ` [PATCH V3 02/11] RISC-V: P2: Refactor and cleanup demand system Lehua Ding
2023-10-19  8:33 ` [PATCH V3 03/11] RISC-V: P3: Refactor vector_infos_manager Lehua Ding
2023-10-19  8:33 ` [PATCH V3 04/11] RISC-V: P4: move method from pass_vsetvl to pre_vsetvl Lehua Ding
2023-10-19  8:33 ` [PATCH V3 05/11] RISC-V: P5: Combine phase 1 and 2 Lehua Ding
2023-10-19  8:33 ` [PATCH V3 06/11] RISC-V: P6: Add computing reaching definition data flow Lehua Ding
2023-10-19  8:33 ` [PATCH V3 07/11] RISC-V: P7: Move earliest fuse and lcm code to pre_vsetvl class Lehua Ding
2023-10-19  8:33 ` Lehua Ding [this message]
2023-10-19  8:33 ` [PATCH V3 09/11] RISC-V: P9: Cleanup and reorganize helper functions Lehua Ding
2023-10-19  8:33 ` [PATCH V3 10/11] RISC-V: P10: Delete riscv-vsetvl.h and adjust riscv-vsetvl.def Lehua Ding
2023-10-19  8:33 ` [PATCH V3 11/11] RISC-V: P11: Adjust and add testcases Lehua Ding
2023-10-19  8:38 ` [PATCH V3 00/11] Refactor and cleanup vsetvl pass Robin Dapp
2023-10-19  8:43   ` Lehua Ding
2023-10-19  8:50 ` 钟居哲
2023-10-19 18:04   ` Patrick O'Neill
2023-10-20  2:20     ` Lehua Ding
2023-10-20  3:58     ` Lehua Ding
2023-10-23 18:30       ` Patrick O'Neill
2023-10-23 21:41         ` 钟居哲
2023-10-23 22:46           ` Patrick O'Neill
2023-10-23 22:50             ` 钟居哲
2023-10-23 23:42               ` Patrick O'Neill
2023-10-24  0:51                 ` juzhe.zhong
2023-10-24  1:01                   ` Patrick O'Neill
2023-10-24  2:27                     ` juzhe.zhong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231019083333.2052340-9-lehua.ding@rivai.ai \
    --to=lehua.ding@rivai.ai \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=jeffreyalaw@gmail.com \
    --cc=juzhe.zhong@rivai.ai \
    --cc=kito.cheng@gmail.com \
    --cc=palmer@rivosinc.com \
    --cc=rdapp.gcc@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).