public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Speed up jump table switch detection.
@ 2021-08-16  8:28 Martin Liška
  2021-08-16 11:27 ` Richard Biener
  0 siblings, 1 reply; 2+ messages in thread
From: Martin Liška @ 2021-08-16  8:28 UTC (permalink / raw)
  To: gcc-patches

Hi.

As mentioned in the PR, this patch speeds up rapidly jump table detection
in switch lowering.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
Martin

	PR tree-optimization/100393

gcc/ChangeLog:

	* tree-switch-conversion.c (group_cluster::dump): Use
	  get_comparison_count.
	(jump_table_cluster::find_jump_tables): Pre-compute number of
	comparisons and then decrement it. Cache also max_ratio.
	(jump_table_cluster::can_be_handled): Change signature.
	* tree-switch-conversion.h (get_comparison_count): New.
---
  gcc/tree-switch-conversion.c | 42 ++++++++++++++++++++----------------
  gcc/tree-switch-conversion.h | 14 ++++++++++--
  2 files changed, 35 insertions(+), 21 deletions(-)

diff --git a/gcc/tree-switch-conversion.c b/gcc/tree-switch-conversion.c
index 294b5457008..244cf4be010 100644
--- a/gcc/tree-switch-conversion.c
+++ b/gcc/tree-switch-conversion.c
@@ -1091,7 +1091,7 @@ group_cluster::dump (FILE *f, bool details)
    for (unsigned i = 0; i < m_cases.length (); i++)
      {
        simple_cluster *sc = static_cast<simple_cluster *> (m_cases[i]);
-      comparison_count += sc->m_range_p ? 2 : 1;
+      comparison_count += sc->get_comparison_count ();
      }
  
    unsigned HOST_WIDE_INT range = get_range (get_low (), get_high ());
@@ -1186,11 +1186,24 @@ jump_table_cluster::find_jump_tables (vec<cluster *> &clusters)
  
    min.quick_push (min_cluster_item (0, 0, 0));
  
+  unsigned HOST_WIDE_INT max_ratio
+    = (optimize_insn_for_size_p ()
+       ? param_jump_table_max_growth_ratio_for_size
+       : param_jump_table_max_growth_ratio_for_speed);
+
    for (unsigned i = 1; i <= l; i++)
      {
        /* Set minimal # of clusters with i-th item to infinite.  */
        min.quick_push (min_cluster_item (INT_MAX, INT_MAX, INT_MAX));
  
+      /* Pre-calculate number of comparisons for the clusters.  */
+      HOST_WIDE_INT comparison_count = 0;
+      for (unsigned k = 0; k <= i - 1; k++)
+	{
+	  simple_cluster *sc = static_cast<simple_cluster *> (clusters[k]);
+	  comparison_count += sc->get_comparison_count ();
+	}
+
        for (unsigned j = 0; j < i; j++)
  	{
  	  unsigned HOST_WIDE_INT s = min[j].m_non_jt_cases;
@@ -1201,10 +1214,15 @@ jump_table_cluster::find_jump_tables (vec<cluster *> &clusters)
  	  if ((min[j].m_count + 1 < min[i].m_count
  	       || (min[j].m_count + 1 == min[i].m_count
  		   && s < min[i].m_non_jt_cases))
-	      && can_be_handled (clusters, j, i - 1))
+	      && can_be_handled (clusters, j, i - 1, max_ratio,
+				 comparison_count))
  	    min[i] = min_cluster_item (min[j].m_count + 1, j, s);
+
+	  simple_cluster *sc = static_cast<simple_cluster *> (clusters[j]);
+	  comparison_count -= sc->get_comparison_count ();
  	}
  
+      gcc_checking_assert (comparison_count == 0);
        gcc_checking_assert (min[i].m_count != INT_MAX);
      }
  
@@ -1242,7 +1260,9 @@ jump_table_cluster::find_jump_tables (vec<cluster *> &clusters)
  
  bool
  jump_table_cluster::can_be_handled (const vec<cluster *> &clusters,
-				    unsigned start, unsigned end)
+				    unsigned start, unsigned end,
+				    unsigned HOST_WIDE_INT max_ratio,
+				    unsigned HOST_WIDE_INT comparison_count)
  {
    /* If the switch is relatively small such that the cost of one
       indirect jump on the target are higher than the cost of a
@@ -1261,10 +1281,6 @@ jump_table_cluster::can_be_handled (const vec<cluster *> &clusters,
    if (start == end)
      return true;
  
-  unsigned HOST_WIDE_INT max_ratio
-    = (optimize_insn_for_size_p ()
-       ? param_jump_table_max_growth_ratio_for_size
-       : param_jump_table_max_growth_ratio_for_speed);
    unsigned HOST_WIDE_INT range = get_range (clusters[start]->get_low (),
  					    clusters[end]->get_high ());
    /* Check overflow.  */
@@ -1278,18 +1294,6 @@ jump_table_cluster::can_be_handled (const vec<cluster *> &clusters,
    if (lhs < range)
      return false;
  
-  /* First make quick guess as each cluster
-     can add at maximum 2 to the comparison_count.  */
-  if (lhs > 2 * max_ratio * (end - start + 1))
-    return false;
-
-  unsigned HOST_WIDE_INT comparison_count = 0;
-  for (unsigned i = start; i <= end; i++)
-    {
-      simple_cluster *sc = static_cast<simple_cluster *> (clusters[i]);
-      comparison_count += sc->m_range_p ? 2 : 1;
-    }
-
    return lhs <= max_ratio * comparison_count;
  }
  
diff --git a/gcc/tree-switch-conversion.h b/gcc/tree-switch-conversion.h
index d76f19b57f6..a375e52636e 100644
--- a/gcc/tree-switch-conversion.h
+++ b/gcc/tree-switch-conversion.h
@@ -180,6 +180,13 @@ public:
      return tree_int_cst_equal (get_low (), get_high ());
    }
  
+  /* Return number of comparisons needed for the case.  */
+  unsigned
+  get_comparison_count ()
+  {
+    return m_range_p ? 2 : 1;
+  }
+
    /* Low value of the case.  */
    tree m_low;
  
@@ -267,9 +274,12 @@ public:
    static vec<cluster *> find_jump_tables (vec<cluster *> &clusters);
  
    /* Return true when cluster starting at START and ending at END (inclusive)
-     can build a jump-table.  */
+     can build a jump-table.  COMPARISON_COUNT is number of comparison
+     operations needed if the clusters are expanded as decision tree.
+     MAX_RATIO tells about the maximum code growth (in percent).  */
    static bool can_be_handled (const vec<cluster *> &clusters, unsigned start,
-			      unsigned end);
+			      unsigned end, unsigned HOST_WIDE_INT max_ratio,
+			      unsigned HOST_WIDE_INT comparison_count);
  
    /* Return true if cluster starting at START and ending at END (inclusive)
       is profitable transformation.  */
-- 
2.32.0


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] Speed up jump table switch detection.
  2021-08-16  8:28 [PATCH] Speed up jump table switch detection Martin Liška
@ 2021-08-16 11:27 ` Richard Biener
  0 siblings, 0 replies; 2+ messages in thread
From: Richard Biener @ 2021-08-16 11:27 UTC (permalink / raw)
  To: Martin Liška; +Cc: GCC Patches

On Mon, Aug 16, 2021 at 10:28 AM Martin Liška <mliska@suse.cz> wrote:
>
> Hi.
>
> As mentioned in the PR, this patch speeds up rapidly jump table detection
> in switch lowering.
>
> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
>
> Ready to be installed?

OK.

> Thanks,
> Martin
>
>         PR tree-optimization/100393
>
> gcc/ChangeLog:
>
>         * tree-switch-conversion.c (group_cluster::dump): Use
>           get_comparison_count.
>         (jump_table_cluster::find_jump_tables): Pre-compute number of
>         comparisons and then decrement it. Cache also max_ratio.
>         (jump_table_cluster::can_be_handled): Change signature.
>         * tree-switch-conversion.h (get_comparison_count): New.
> ---
>   gcc/tree-switch-conversion.c | 42 ++++++++++++++++++++----------------
>   gcc/tree-switch-conversion.h | 14 ++++++++++--
>   2 files changed, 35 insertions(+), 21 deletions(-)
>
> diff --git a/gcc/tree-switch-conversion.c b/gcc/tree-switch-conversion.c
> index 294b5457008..244cf4be010 100644
> --- a/gcc/tree-switch-conversion.c
> +++ b/gcc/tree-switch-conversion.c
> @@ -1091,7 +1091,7 @@ group_cluster::dump (FILE *f, bool details)
>     for (unsigned i = 0; i < m_cases.length (); i++)
>       {
>         simple_cluster *sc = static_cast<simple_cluster *> (m_cases[i]);
> -      comparison_count += sc->m_range_p ? 2 : 1;
> +      comparison_count += sc->get_comparison_count ();
>       }
>
>     unsigned HOST_WIDE_INT range = get_range (get_low (), get_high ());
> @@ -1186,11 +1186,24 @@ jump_table_cluster::find_jump_tables (vec<cluster *> &clusters)
>
>     min.quick_push (min_cluster_item (0, 0, 0));
>
> +  unsigned HOST_WIDE_INT max_ratio
> +    = (optimize_insn_for_size_p ()
> +       ? param_jump_table_max_growth_ratio_for_size
> +       : param_jump_table_max_growth_ratio_for_speed);
> +
>     for (unsigned i = 1; i <= l; i++)
>       {
>         /* Set minimal # of clusters with i-th item to infinite.  */
>         min.quick_push (min_cluster_item (INT_MAX, INT_MAX, INT_MAX));
>
> +      /* Pre-calculate number of comparisons for the clusters.  */
> +      HOST_WIDE_INT comparison_count = 0;
> +      for (unsigned k = 0; k <= i - 1; k++)
> +       {
> +         simple_cluster *sc = static_cast<simple_cluster *> (clusters[k]);
> +         comparison_count += sc->get_comparison_count ();
> +       }
> +
>         for (unsigned j = 0; j < i; j++)
>         {
>           unsigned HOST_WIDE_INT s = min[j].m_non_jt_cases;
> @@ -1201,10 +1214,15 @@ jump_table_cluster::find_jump_tables (vec<cluster *> &clusters)
>           if ((min[j].m_count + 1 < min[i].m_count
>                || (min[j].m_count + 1 == min[i].m_count
>                    && s < min[i].m_non_jt_cases))
> -             && can_be_handled (clusters, j, i - 1))
> +             && can_be_handled (clusters, j, i - 1, max_ratio,
> +                                comparison_count))
>             min[i] = min_cluster_item (min[j].m_count + 1, j, s);
> +
> +         simple_cluster *sc = static_cast<simple_cluster *> (clusters[j]);
> +         comparison_count -= sc->get_comparison_count ();
>         }
>
> +      gcc_checking_assert (comparison_count == 0);
>         gcc_checking_assert (min[i].m_count != INT_MAX);
>       }
>
> @@ -1242,7 +1260,9 @@ jump_table_cluster::find_jump_tables (vec<cluster *> &clusters)
>
>   bool
>   jump_table_cluster::can_be_handled (const vec<cluster *> &clusters,
> -                                   unsigned start, unsigned end)
> +                                   unsigned start, unsigned end,
> +                                   unsigned HOST_WIDE_INT max_ratio,
> +                                   unsigned HOST_WIDE_INT comparison_count)
>   {
>     /* If the switch is relatively small such that the cost of one
>        indirect jump on the target are higher than the cost of a
> @@ -1261,10 +1281,6 @@ jump_table_cluster::can_be_handled (const vec<cluster *> &clusters,
>     if (start == end)
>       return true;
>
> -  unsigned HOST_WIDE_INT max_ratio
> -    = (optimize_insn_for_size_p ()
> -       ? param_jump_table_max_growth_ratio_for_size
> -       : param_jump_table_max_growth_ratio_for_speed);
>     unsigned HOST_WIDE_INT range = get_range (clusters[start]->get_low (),
>                                             clusters[end]->get_high ());
>     /* Check overflow.  */
> @@ -1278,18 +1294,6 @@ jump_table_cluster::can_be_handled (const vec<cluster *> &clusters,
>     if (lhs < range)
>       return false;
>
> -  /* First make quick guess as each cluster
> -     can add at maximum 2 to the comparison_count.  */
> -  if (lhs > 2 * max_ratio * (end - start + 1))
> -    return false;
> -
> -  unsigned HOST_WIDE_INT comparison_count = 0;
> -  for (unsigned i = start; i <= end; i++)
> -    {
> -      simple_cluster *sc = static_cast<simple_cluster *> (clusters[i]);
> -      comparison_count += sc->m_range_p ? 2 : 1;
> -    }
> -
>     return lhs <= max_ratio * comparison_count;
>   }
>
> diff --git a/gcc/tree-switch-conversion.h b/gcc/tree-switch-conversion.h
> index d76f19b57f6..a375e52636e 100644
> --- a/gcc/tree-switch-conversion.h
> +++ b/gcc/tree-switch-conversion.h
> @@ -180,6 +180,13 @@ public:
>       return tree_int_cst_equal (get_low (), get_high ());
>     }
>
> +  /* Return number of comparisons needed for the case.  */
> +  unsigned
> +  get_comparison_count ()
> +  {
> +    return m_range_p ? 2 : 1;
> +  }
> +
>     /* Low value of the case.  */
>     tree m_low;
>
> @@ -267,9 +274,12 @@ public:
>     static vec<cluster *> find_jump_tables (vec<cluster *> &clusters);
>
>     /* Return true when cluster starting at START and ending at END (inclusive)
> -     can build a jump-table.  */
> +     can build a jump-table.  COMPARISON_COUNT is number of comparison
> +     operations needed if the clusters are expanded as decision tree.
> +     MAX_RATIO tells about the maximum code growth (in percent).  */
>     static bool can_be_handled (const vec<cluster *> &clusters, unsigned start,
> -                             unsigned end);
> +                             unsigned end, unsigned HOST_WIDE_INT max_ratio,
> +                             unsigned HOST_WIDE_INT comparison_count);
>
>     /* Return true if cluster starting at START and ending at END (inclusive)
>        is profitable transformation.  */
> --
> 2.32.0
>

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2021-08-16 11:28 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-16  8:28 [PATCH] Speed up jump table switch detection Martin Liška
2021-08-16 11:27 ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).