public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] RISC-V: Fix VSETLV PASS compile-time issue
@ 2024-01-29 11:32 Juzhe-Zhong
  2024-01-30 21:19 ` Robin Dapp
  0 siblings, 1 reply; 2+ messages in thread
From: Juzhe-Zhong @ 2024-01-29 11:32 UTC (permalink / raw)
  To: gcc-patches; +Cc: kito.cheng, kito.cheng, jeffreyalaw, rdapp.gcc, Juzhe-Zhong

The compile time issue was discovered in SPEC 2017 wrf:

Use time and -ftime-report to analyze the profile data of SPEC 2017 wrf compilation .

Before this patch (Lazy vsetvl):

scheduling                         : 121.89 ( 15%)   0.53 ( 11%) 122.72 ( 15%)    13M (  1%)
machine dep reorg                  : 424.61 ( 53%)   1.84 ( 37%) 427.44 ( 53%)  5290k (  0%)
real    13m27.074s
user    13m19.539s
sys     0m5.180s

Simple vsetvl:

machine dep reorg                  :   0.10 (  0%)   0.00 (  0%)   0.11 (  0%)  4138k (  0%)
real    6m5.780s
user    6m2.396s
sys     0m2.373s

The machine dep reorg is the compile time of VSETVL PASS (424 seconds) which counts 53% of
the compilation time, spends much more time than scheduling.

After investigation, the critical patch of VSETVL pass is compute_lcm_local_properties which
is called every iteration of phase 2 (earliest fusion) and phase 3 (global lcm).

This patch optimized the codes of compute_lcm_local_properties to reduce the compilation time.

After this patch:

scheduling                         : 117.51 ( 27%)   0.21 (  6%) 118.04 ( 27%)    13M (  1%)
machine dep reorg                  :  80.13 ( 18%)   0.91 ( 26%)  81.26 ( 18%)  5290k (  0%)
real    7m25.374s
user    7m20.116s
sys     0m3.795s

The optimization of this patch is very obvious, lazy VSETVL PASS: 424s (53%) -> 80s (18%) which
spend less time than scheduling.

Tested on both RV32 and RV64 no regression.  Ok for trunk ?
 
	PR target/113495

gcc/ChangeLog:

	* config/riscv/riscv-vsetvl.cc (extract_single_source): Remove.
	(pre_vsetvl::compute_vsetvl_def_data): Fix compile time issue.
	(pre_vsetvl::compute_transparent): New function.
	(pre_vsetvl::compute_lcm_local_properties): Fix compile time time issue.

---
 gcc/config/riscv/riscv-vsetvl.cc | 184 ++++++++++---------------------
 1 file changed, 60 insertions(+), 124 deletions(-)

diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index d7b40a5c813..cec862329c5 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -599,14 +599,6 @@ extract_single_source (set_info *set)
   return first_insn;
 }
 
-static insn_info *
-extract_single_source (def_info *def)
-{
-  if (!def)
-    return nullptr;
-  return extract_single_source (dyn_cast<set_info *> (def));
-}
-
 static bool
 same_equiv_note_p (set_info *set1, set_info *set2)
 {
@@ -2374,6 +2366,7 @@ public:
   }
 
   void compute_vsetvl_def_data ();
+  void compute_transparent (const bb_info *);
   void compute_lcm_local_properties ();
 
   void fuse_local_vsetvl_info ();
@@ -2452,20 +2445,16 @@ pre_vsetvl::compute_vsetvl_def_data ()
 	{
 	  for (unsigned i = 0; i < m_vsetvl_def_exprs.length (); i += 1)
 	    {
-	      const vsetvl_info &info = *m_vsetvl_def_exprs[i];
-	      if (!info.has_nonvlmax_reg_avl ())
-		continue;
-	      unsigned int regno;
-	      sbitmap_iterator sbi;
-	      EXECUTE_IF_SET_IN_BITMAP (m_reg_def_loc[bb->index ()], 0, regno,
-					sbi)
-		if (regno == REGNO (info.get_avl ()))
-		  {
-		    bitmap_set_bit (m_kill[bb->index ()], i);
-		    bitmap_set_bit (def_loc[bb->index ()],
-				    get_expr_index (m_vsetvl_def_exprs,
-						    m_unknow_info));
-		  }
+	      auto *info = m_vsetvl_def_exprs[i];
+	      if (info->has_nonvlmax_reg_avl ()
+		  && bitmap_bit_p (m_reg_def_loc[bb->index ()],
+				   REGNO (info->get_avl ())))
+		{
+		  bitmap_set_bit (m_kill[bb->index ()], i);
+		  bitmap_set_bit (def_loc[bb->index ()],
+				  get_expr_index (m_vsetvl_def_exprs,
+						  m_unknow_info));
+		}
 	    }
 	  continue;
 	}
@@ -2516,6 +2505,36 @@ pre_vsetvl::compute_vsetvl_def_data ()
   sbitmap_vector_free (m_kill);
 }
 
+/* Subroutine of compute_lcm_local_properties which Compute local transparent
+   BB. Note that the compile time is very sensitive to compute_transparent and
+   compute_lcm_local_properties, any change of these 2 functions should be
+   aware of the compile time changing of the program which has a large number of
+   blocks, e.g SPEC 2017 wrf.
+
+   Current compile time profile of SPEC 2017 wrf:
+
+     1. scheduling - 27%
+     2. machine dep reorg (VSETVL PASS) - 18%
+
+   VSETVL pass should not spend more time than scheduling in compilation.  */
+void
+pre_vsetvl::compute_transparent (const bb_info *bb)
+{
+  int num_exprs = m_exprs.length ();
+  unsigned bb_index = bb->index ();
+  for (int i = 0; i < num_exprs; i++)
+    {
+      auto *info = m_exprs[i];
+      if (info->has_nonvlmax_reg_avl ()
+	  && bitmap_bit_p (m_reg_def_loc[bb_index], REGNO (info->get_avl ())))
+	bitmap_clear_bit (m_transp[bb_index], i);
+      else if (info->has_vl ()
+	       && bitmap_bit_p (m_reg_def_loc[bb_index],
+				REGNO (info->get_vl ())))
+	bitmap_clear_bit (m_transp[bb_index], i);
+    }
+}
+
 /* Compute the local properties of each recorded expression.
 
    Local properties are those that are defined by the block, irrespective of
@@ -2572,7 +2591,7 @@ pre_vsetvl::compute_lcm_local_properties ()
 
   bitmap_vector_clear (m_avloc, last_basic_block_for_fn (cfun));
   bitmap_vector_clear (m_antloc, last_basic_block_for_fn (cfun));
-  bitmap_vector_clear (m_transp, last_basic_block_for_fn (cfun));
+  bitmap_vector_ones (m_transp, last_basic_block_for_fn (cfun));
 
   /* -  If T is locally available at the end of a block, then T' must be
 	available at the end of the same block. Since some optimization has
@@ -2598,117 +2617,34 @@ pre_vsetvl::compute_lcm_local_properties ()
 
       /* Compute m_transp */
       if (block_info.empty_p ())
+	compute_transparent (bb);
+      else
 	{
-	  bitmap_ones (m_transp[bb_index]);
-	  for (int i = 0; i < num_exprs; i += 1)
-	    {
-	      const vsetvl_info &info = *m_exprs[i];
-	      if (!info.has_nonvlmax_reg_avl () && !info.has_vl ())
-		continue;
-
-	      if (info.has_nonvlmax_reg_avl ())
-		{
-		  unsigned int regno;
-		  sbitmap_iterator sbi;
-		  EXECUTE_IF_SET_IN_BITMAP (m_reg_def_loc[bb->index ()], 0,
-					    regno, sbi)
-		    {
-		      if (regno == REGNO (info.get_avl ()))
-			bitmap_clear_bit (m_transp[bb->index ()], i);
-		    }
-		}
-
-	      for (insn_info *insn : bb->real_nondebug_insns ())
-		{
-		  if (info.has_nonvlmax_reg_avl ()
-		      && find_access (insn->defs (), REGNO (info.get_avl ())))
-		    {
-		      bitmap_clear_bit (m_transp[bb_index], i);
-		      break;
-		    }
-
-		  if (info.has_vl ()
-		      && reg_mentioned_p (info.get_vl (), insn->rtl ()))
-		    {
-		      if (find_access (insn->defs (), REGNO (info.get_vl ())))
-			/* We can't fuse vsetvl into the blocks that modify the
-			   VL operand since successors of such blocks will need
-			   the value of those blocks are defining.
-
-					  bb 4: def a5
-					  /   \
-				  bb 5:use a5  bb 6:vsetvl a5, 5
-
-			   The example above shows that we can't fuse vsetvl
-			   from bb 6 into bb 4 since the successor bb 5 is using
-			   the value defined in bb 4.  */
-			;
-		      else
-			{
-			  /* We can't fuse vsetvl into the blocks that use the
-			     VL operand which has different value from the
-			     vsetvl info.
-
-					    bb 4: def a5
-					      |
-					    bb 5: use a5
-					      |
-					    bb 6: def a5
-					      |
-					    bb 7: use a5
-
-			     The example above shows that we can't fuse vsetvl
-			     from bb 6 into bb 5 since their value is different.
-			   */
-			  resource_info resource
-			    = full_register (REGNO (info.get_vl ()));
-			  def_lookup dl = crtl->ssa->find_def (resource, insn);
-			  def_info *def
-			    = dl.matching_set_or_last_def_of_prev_group ();
-			  insn_info *def_insn = extract_single_source (def);
-			  if (def_insn && vsetvl_insn_p (def_insn->rtl ()))
-			    {
-			      vsetvl_info def_info = vsetvl_info (def_insn);
-			      if (m_dem.compatible_p (def_info, info))
-				continue;
-			    }
-			}
+	  bitmap_clear (m_transp[bb_index]);
+	  vsetvl_info &header_info = block_info.get_entry_info ();
+	  vsetvl_info &footer_info = block_info.get_exit_info ();
 
-		      bitmap_clear_bit (m_transp[bb_index], i);
-		      break;
-		    }
-		}
-	    }
+	  if (header_info.valid_p () && anticipated_exp_p (header_info))
+	    bitmap_set_bit (m_antloc[bb_index],
+			    get_expr_index (m_exprs, header_info));
 
-	  continue;
+	  if (footer_info.valid_p ())
+	    for (int i = 0; i < num_exprs; i += 1)
+	      {
+		const vsetvl_info &info = *m_exprs[i];
+		if (!info.valid_p ())
+		  continue;
+		if (available_exp_p (footer_info, info))
+		  bitmap_set_bit (m_avloc[bb_index], i);
+	      }
 	}
 
-      vsetvl_info &header_info = block_info.get_entry_info ();
-      vsetvl_info &footer_info = block_info.get_exit_info ();
-
-      if (header_info.valid_p () && anticipated_exp_p (header_info))
-	bitmap_set_bit (m_antloc[bb_index],
-			get_expr_index (m_exprs, header_info));
-
-      if (footer_info.valid_p ())
-	for (int i = 0; i < num_exprs; i += 1)
-	  {
-	    const vsetvl_info &info = *m_exprs[i];
-	    if (!info.valid_p ())
-	      continue;
-	    if (available_exp_p (footer_info, info))
-	      bitmap_set_bit (m_avloc[bb_index], i);
-	  }
-    }
-
-  for (const bb_info *bb : crtl->ssa->bbs ())
-    {
-      unsigned bb_index = bb->index ();
       if (invalid_opt_bb_p (bb->cfg_bb ()))
 	{
 	  bitmap_clear (m_antloc[bb_index]);
 	  bitmap_clear (m_transp[bb_index]);
 	}
+
       /* Compute ae_kill for each basic block using:
 
 	 ~(TRANSP | COMP)
-- 
2.36.1


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] RISC-V: Fix VSETLV PASS compile-time issue
  2024-01-29 11:32 [PATCH] RISC-V: Fix VSETLV PASS compile-time issue Juzhe-Zhong
@ 2024-01-30 21:19 ` Robin Dapp
  0 siblings, 0 replies; 2+ messages in thread
From: Robin Dapp @ 2024-01-30 21:19 UTC (permalink / raw)
  To: Juzhe-Zhong, gcc-patches; +Cc: rdapp.gcc, kito.cheng, kito.cheng, jeffreyalaw

LGTM.

Regards
 Robin


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2024-01-30 21:19 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-01-29 11:32 [PATCH] RISC-V: Fix VSETLV PASS compile-time issue Juzhe-Zhong
2024-01-30 21:19 ` Robin Dapp

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).