public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/marxin/heads/backport-9-v6)] switch conversion: make a rapid speed up
@ 2020-10-02  8:17 Martin Liska
  0 siblings, 0 replies; 3+ messages in thread
From: Martin Liska @ 2020-10-02  8:17 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:d14f9d989a2b2ccda91c062ad8daf7dca47fbb72

commit d14f9d989a2b2ccda91c062ad8daf7dca47fbb72
Author: Martin Liska <mliska@suse.cz>
Date:   Thu Oct 1 21:26:37 2020 +0200

    switch conversion: make a rapid speed up
    
    gcc/ChangeLog:
    
            PR tree-optimization/96979
            * tree-switch-conversion.c (jump_table_cluster::can_be_handled):
            Make a fast bail out.
            (bit_test_cluster::can_be_handled): Likewise here.
            * tree-switch-conversion.h (get_range): Use wi::to_wide instead
            of a folding.
    
    gcc/testsuite/ChangeLog:
    
            PR tree-optimization/96979
            * g++.dg/tree-ssa/pr96979.C: New test.
    
    (cherry picked from commit e46858e4eeee45d35ca4a7df1996186fe884879b)

Diff:
---
 gcc/testsuite/g++.dg/tree-ssa/pr96979.C | 48 +++++++++++++++++++++++++++++++++
 gcc/tree-switch-conversion.c            | 34 +++++++++++++++++++----
 gcc/tree-switch-conversion.h            |  7 +++--
 3 files changed, 80 insertions(+), 9 deletions(-)

diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr96979.C b/gcc/testsuite/g++.dg/tree-ssa/pr96979.C
new file mode 100644
index 00000000000..ec0f57a8548
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/pr96979.C
@@ -0,0 +1,48 @@
+/* PR tree-optimization/96979 */
+/* { dg-do compile } */
+/* { dg-options "-std=c++17 -O2" } */
+
+using u64 = unsigned long long;
+
+constexpr inline u64
+foo (const char *str) noexcept
+{
+  u64 value = 0xcbf29ce484222325ULL;
+  for (u64 i = 0; str[i]; i++)
+    value = (value ^ u64(str[i])) * 0x100000001b3ULL;
+  return value;
+}
+
+struct V
+{
+  enum W
+  {
+#define A(n) n,
+#define B(n) A(n##0) A(n##1) A(n##2) A(n##3) A(n##4) A(n##5) A(n##6) A(n##7) A(n##8) A(n##9)
+#define C(n) B(n##0) B(n##1) B(n##2) B(n##3) B(n##4) B(n##5) B(n##6) B(n##7) B(n##8) B(n##9)
+#define D(n) C(n##0) C(n##1) C(n##2) C(n##3) C(n##4) C(n##5) C(n##6) C(n##7) C(n##8) C(n##9)
+#define E D(foo1) D(foo2) D(foo3)
+    E
+    last
+  };
+
+  constexpr static W
+  bar (const u64 h) noexcept
+  {
+    switch (h)
+      {
+#undef A
+#define F(n) #n
+#define A(n) case foo (F(n)): return n;
+        E
+      }
+    return last;
+  }
+};
+
+int
+baz (const char *s)
+{
+  const u64 h = foo (s);
+  return V::bar (h);
+}
diff --git a/gcc/tree-switch-conversion.c b/gcc/tree-switch-conversion.c
index dd6dd8064de..c15594c4e7a 100644
--- a/gcc/tree-switch-conversion.c
+++ b/gcc/tree-switch-conversion.c
@@ -1270,12 +1270,25 @@ jump_table_cluster::can_be_handled (const vec<cluster *> &clusters,
 
   unsigned HOST_WIDE_INT max_ratio
     = optimize_insn_for_size_p () ? max_ratio_for_size : max_ratio_for_speed;
+  max_ratio *= 100;
   unsigned HOST_WIDE_INT range = get_range (clusters[start]->get_low (),
 					    clusters[end]->get_high ());
   /* Check overflow.  */
   if (range == 0)
     return false;
 
+  if (range > HOST_WIDE_INT_M1U / 100)
+    return false;
+
+  unsigned HOST_WIDE_INT lhs = 100 * range;
+  if (lhs < range)
+    return false;
+
+  /* First make quick guess as each cluster
+     can add at maximum 2 to the comparison_count.  */
+  if (lhs > 2 * max_ratio * (end - start + 1))
+    return false;
+
   unsigned HOST_WIDE_INT comparison_count = 0;
   for (unsigned i = start; i <= end; i++)
     {
@@ -1373,12 +1386,12 @@ bit_test_cluster::can_be_handled (unsigned HOST_WIDE_INT range,
 {
   /* Check overflow.  */
   if (range == 0)
-    return 0;
+    return false;
 
   if (range >= GET_MODE_BITSIZE (word_mode))
     return false;
 
-  return uniq <= 3;
+  return uniq <= m_max_case_bit_tests;
 }
 
 /* Return true when cluster starting at START and ending at END (inclusive)
@@ -1388,6 +1401,7 @@ bool
 bit_test_cluster::can_be_handled (const vec<cluster *> &clusters,
 				  unsigned start, unsigned end)
 {
+  auto_vec<int, m_max_case_bit_tests> dest_bbs;
   /* For algorithm correctness, bit test for a single case must return
      true.  We bail out in is_beneficial if it's called just for
      a single case.  */
@@ -1396,15 +1410,25 @@ bit_test_cluster::can_be_handled (const vec<cluster *> &clusters,
 
   unsigned HOST_WIDE_INT range = get_range (clusters[start]->get_low (),
 					    clusters[end]->get_high ());
-  auto_bitmap dest_bbs;
+
+  /* Make a guess first.  */
+  if (!can_be_handled (range, m_max_case_bit_tests))
+    return false;
 
   for (unsigned i = start; i <= end; i++)
     {
       simple_cluster *sc = static_cast<simple_cluster *> (clusters[i]);
-      bitmap_set_bit (dest_bbs, sc->m_case_bb->index);
+      /* m_max_case_bit_tests is very small integer, thus the operation
+	 is constant. */
+      if (!dest_bbs.contains (sc->m_case_bb->index))
+	{
+	  if (dest_bbs.length () >= m_max_case_bit_tests)
+	    return false;
+	  dest_bbs.quick_push (sc->m_case_bb->index);
+	}
     }
 
-  return can_be_handled (range, bitmap_count_bits (dest_bbs));
+  return true;
 }
 
 /* Return true when COUNT of cases of UNIQ labels is beneficial for bit test
diff --git a/gcc/tree-switch-conversion.h b/gcc/tree-switch-conversion.h
index b3bc4b9ddf7..dffd1ee6c4b 100644
--- a/gcc/tree-switch-conversion.h
+++ b/gcc/tree-switch-conversion.h
@@ -83,11 +83,10 @@ struct cluster
      then return 0.  */
   static unsigned HOST_WIDE_INT get_range (tree low, tree high)
   {
-    tree r = fold_build2 (MINUS_EXPR, TREE_TYPE (low), high, low);
-    if (!tree_fits_uhwi_p (r))
+    wide_int w = wi::to_wide (high) - wi::to_wide (low);
+    if (wi::neg_p (w, TYPE_SIGN (TREE_TYPE (low))) || !wi::fits_uhwi_p (w))
       return 0;
-
-    return tree_to_uhwi (r) + 1;
+    return w.to_uhwi () + 1;
   }
 
   /* Case label.  */


^ permalink raw reply	[flat|nested] 3+ messages in thread

* [gcc(refs/users/marxin/heads/backport-9-v6)] switch conversion: make a rapid speed up
@ 2020-10-01 19:40 Martin Liska
  0 siblings, 0 replies; 3+ messages in thread
From: Martin Liska @ 2020-10-01 19:40 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:336e3f2e9a0df68634e13ff374384906400d6b7b

commit 336e3f2e9a0df68634e13ff374384906400d6b7b
Author: Martin Liska <mliska@suse.cz>
Date:   Thu Oct 1 21:26:37 2020 +0200

    switch conversion: make a rapid speed up
    
    gcc/ChangeLog:
    
            PR tree-optimization/96979
            * tree-switch-conversion.c (jump_table_cluster::can_be_handled):
            Make a fast bail out.
            (bit_test_cluster::can_be_handled): Likewise here.
            * tree-switch-conversion.h (get_range): Use wi::to_wide instead
            of a folding.
    
    gcc/testsuite/ChangeLog:
    
            PR tree-optimization/96979
            * g++.dg/tree-ssa/pr96979.C: New test.
    
    (cherry picked from commit e46858e4eeee45d35ca4a7df1996186fe884879b)

Diff:
---
 gcc/testsuite/g++.dg/tree-ssa/pr96979.C | 48 +++++++++++++++++++++++++++++++++
 gcc/tree-switch-conversion.c            | 33 +++++++++++++++++++----
 gcc/tree-switch-conversion.h            |  7 +++--
 3 files changed, 79 insertions(+), 9 deletions(-)

diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr96979.C b/gcc/testsuite/g++.dg/tree-ssa/pr96979.C
new file mode 100644
index 00000000000..ec0f57a8548
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/pr96979.C
@@ -0,0 +1,48 @@
+/* PR tree-optimization/96979 */
+/* { dg-do compile } */
+/* { dg-options "-std=c++17 -O2" } */
+
+using u64 = unsigned long long;
+
+constexpr inline u64
+foo (const char *str) noexcept
+{
+  u64 value = 0xcbf29ce484222325ULL;
+  for (u64 i = 0; str[i]; i++)
+    value = (value ^ u64(str[i])) * 0x100000001b3ULL;
+  return value;
+}
+
+struct V
+{
+  enum W
+  {
+#define A(n) n,
+#define B(n) A(n##0) A(n##1) A(n##2) A(n##3) A(n##4) A(n##5) A(n##6) A(n##7) A(n##8) A(n##9)
+#define C(n) B(n##0) B(n##1) B(n##2) B(n##3) B(n##4) B(n##5) B(n##6) B(n##7) B(n##8) B(n##9)
+#define D(n) C(n##0) C(n##1) C(n##2) C(n##3) C(n##4) C(n##5) C(n##6) C(n##7) C(n##8) C(n##9)
+#define E D(foo1) D(foo2) D(foo3)
+    E
+    last
+  };
+
+  constexpr static W
+  bar (const u64 h) noexcept
+  {
+    switch (h)
+      {
+#undef A
+#define F(n) #n
+#define A(n) case foo (F(n)): return n;
+        E
+      }
+    return last;
+  }
+};
+
+int
+baz (const char *s)
+{
+  const u64 h = foo (s);
+  return V::bar (h);
+}
diff --git a/gcc/tree-switch-conversion.c b/gcc/tree-switch-conversion.c
index dd6dd8064de..32b76d8ec70 100644
--- a/gcc/tree-switch-conversion.c
+++ b/gcc/tree-switch-conversion.c
@@ -1276,6 +1276,18 @@ jump_table_cluster::can_be_handled (const vec<cluster *> &clusters,
   if (range == 0)
     return false;
 
+  if (range > HOST_WIDE_INT_M1U / 100)
+    return false;
+
+  unsigned HOST_WIDE_INT lhs = 100 * range;
+  if (lhs < range)
+    return false;
+
+  /* First make quick guess as each cluster
+     can add at maximum 2 to the comparison_count.  */
+  if (lhs > 2 * max_ratio * (end - start + 1))
+    return false;
+
   unsigned HOST_WIDE_INT comparison_count = 0;
   for (unsigned i = start; i <= end; i++)
     {
@@ -1373,12 +1385,12 @@ bit_test_cluster::can_be_handled (unsigned HOST_WIDE_INT range,
 {
   /* Check overflow.  */
   if (range == 0)
-    return 0;
+    return false;
 
   if (range >= GET_MODE_BITSIZE (word_mode))
     return false;
 
-  return uniq <= 3;
+  return uniq <= m_max_case_bit_tests;
 }
 
 /* Return true when cluster starting at START and ending at END (inclusive)
@@ -1388,6 +1400,7 @@ bool
 bit_test_cluster::can_be_handled (const vec<cluster *> &clusters,
 				  unsigned start, unsigned end)
 {
+  auto_vec<int, m_max_case_bit_tests> dest_bbs;
   /* For algorithm correctness, bit test for a single case must return
      true.  We bail out in is_beneficial if it's called just for
      a single case.  */
@@ -1396,15 +1409,25 @@ bit_test_cluster::can_be_handled (const vec<cluster *> &clusters,
 
   unsigned HOST_WIDE_INT range = get_range (clusters[start]->get_low (),
 					    clusters[end]->get_high ());
-  auto_bitmap dest_bbs;
+
+  /* Make a guess first.  */
+  if (!can_be_handled (range, m_max_case_bit_tests))
+    return false;
 
   for (unsigned i = start; i <= end; i++)
     {
       simple_cluster *sc = static_cast<simple_cluster *> (clusters[i]);
-      bitmap_set_bit (dest_bbs, sc->m_case_bb->index);
+      /* m_max_case_bit_tests is very small integer, thus the operation
+	 is constant. */
+      if (!dest_bbs.contains (sc->m_case_bb->index))
+	{
+	  if (dest_bbs.length () >= m_max_case_bit_tests)
+	    return false;
+	  dest_bbs.quick_push (sc->m_case_bb->index);
+	}
     }
 
-  return can_be_handled (range, bitmap_count_bits (dest_bbs));
+  return true;
 }
 
 /* Return true when COUNT of cases of UNIQ labels is beneficial for bit test
diff --git a/gcc/tree-switch-conversion.h b/gcc/tree-switch-conversion.h
index b3bc4b9ddf7..dffd1ee6c4b 100644
--- a/gcc/tree-switch-conversion.h
+++ b/gcc/tree-switch-conversion.h
@@ -83,11 +83,10 @@ struct cluster
      then return 0.  */
   static unsigned HOST_WIDE_INT get_range (tree low, tree high)
   {
-    tree r = fold_build2 (MINUS_EXPR, TREE_TYPE (low), high, low);
-    if (!tree_fits_uhwi_p (r))
+    wide_int w = wi::to_wide (high) - wi::to_wide (low);
+    if (wi::neg_p (w, TYPE_SIGN (TREE_TYPE (low))) || !wi::fits_uhwi_p (w))
       return 0;
-
-    return tree_to_uhwi (r) + 1;
+    return w.to_uhwi () + 1;
   }
 
   /* Case label.  */


^ permalink raw reply	[flat|nested] 3+ messages in thread

* [gcc(refs/users/marxin/heads/backport-9-v6)] switch conversion: make a rapid speed up
@ 2020-10-01 19:29 Martin Liska
  0 siblings, 0 replies; 3+ messages in thread
From: Martin Liska @ 2020-10-01 19:29 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:3ad2ba3c484ebdd759d26fb30af82e1c8e389e6d

commit 3ad2ba3c484ebdd759d26fb30af82e1c8e389e6d
Author: Martin Liska <mliska@suse.cz>
Date:   Thu Oct 1 21:26:37 2020 +0200

    switch conversion: make a rapid speed up
    
    gcc/ChangeLog:
    
            PR tree-optimization/96979
            * tree-switch-conversion.c (jump_table_cluster::can_be_handled):
            Make a fast bail out.
            (bit_test_cluster::can_be_handled): Likewise here.
            * tree-switch-conversion.h (get_range): Use wi::to_wide instead
            of a folding.
    
    gcc/testsuite/ChangeLog:
    
            PR tree-optimization/96979
            * g++.dg/tree-ssa/pr96979.C: New test.
    
    (cherry picked from commit e46858e4eeee45d35ca4a7df1996186fe884879b)

Diff:
---
 gcc/testsuite/g++.dg/tree-ssa/pr96979.C | 48 +++++++++++++++++++++++++++++++++
 gcc/tree-switch-conversion.c            | 29 ++++++++++++++++----
 gcc/tree-switch-conversion.h            |  7 +++--
 3 files changed, 75 insertions(+), 9 deletions(-)

diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr96979.C b/gcc/testsuite/g++.dg/tree-ssa/pr96979.C
new file mode 100644
index 00000000000..ec0f57a8548
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/pr96979.C
@@ -0,0 +1,48 @@
+/* PR tree-optimization/96979 */
+/* { dg-do compile } */
+/* { dg-options "-std=c++17 -O2" } */
+
+using u64 = unsigned long long;
+
+constexpr inline u64
+foo (const char *str) noexcept
+{
+  u64 value = 0xcbf29ce484222325ULL;
+  for (u64 i = 0; str[i]; i++)
+    value = (value ^ u64(str[i])) * 0x100000001b3ULL;
+  return value;
+}
+
+struct V
+{
+  enum W
+  {
+#define A(n) n,
+#define B(n) A(n##0) A(n##1) A(n##2) A(n##3) A(n##4) A(n##5) A(n##6) A(n##7) A(n##8) A(n##9)
+#define C(n) B(n##0) B(n##1) B(n##2) B(n##3) B(n##4) B(n##5) B(n##6) B(n##7) B(n##8) B(n##9)
+#define D(n) C(n##0) C(n##1) C(n##2) C(n##3) C(n##4) C(n##5) C(n##6) C(n##7) C(n##8) C(n##9)
+#define E D(foo1) D(foo2) D(foo3)
+    E
+    last
+  };
+
+  constexpr static W
+  bar (const u64 h) noexcept
+  {
+    switch (h)
+      {
+#undef A
+#define F(n) #n
+#define A(n) case foo (F(n)): return n;
+        E
+      }
+    return last;
+  }
+};
+
+int
+baz (const char *s)
+{
+  const u64 h = foo (s);
+  return V::bar (h);
+}
diff --git a/gcc/tree-switch-conversion.c b/gcc/tree-switch-conversion.c
index dd6dd8064de..f13d549865f 100644
--- a/gcc/tree-switch-conversion.c
+++ b/gcc/tree-switch-conversion.c
@@ -1276,6 +1276,14 @@ jump_table_cluster::can_be_handled (const vec<cluster *> &clusters,
   if (range == 0)
     return false;
 
+  if (range > HOST_WIDE_INT_M1U / 100)
+    return false;
+
+  /* First make quick guess as each cluster
+     can add at maximum 2 to the comparison_count.  */
+  if (lhs > 2 * max_ratio * (end - start + 1))
+    return false;
+
   unsigned HOST_WIDE_INT comparison_count = 0;
   for (unsigned i = start; i <= end; i++)
     {
@@ -1373,12 +1381,12 @@ bit_test_cluster::can_be_handled (unsigned HOST_WIDE_INT range,
 {
   /* Check overflow.  */
   if (range == 0)
-    return 0;
+    return false;
 
   if (range >= GET_MODE_BITSIZE (word_mode))
     return false;
 
-  return uniq <= 3;
+  return uniq <= m_max_case_bit_tests;
 }
 
 /* Return true when cluster starting at START and ending at END (inclusive)
@@ -1388,6 +1396,7 @@ bool
 bit_test_cluster::can_be_handled (const vec<cluster *> &clusters,
 				  unsigned start, unsigned end)
 {
+  auto_vec<int, m_max_case_bit_tests> dest_bbs;
   /* For algorithm correctness, bit test for a single case must return
      true.  We bail out in is_beneficial if it's called just for
      a single case.  */
@@ -1396,15 +1405,25 @@ bit_test_cluster::can_be_handled (const vec<cluster *> &clusters,
 
   unsigned HOST_WIDE_INT range = get_range (clusters[start]->get_low (),
 					    clusters[end]->get_high ());
-  auto_bitmap dest_bbs;
+
+  /* Make a guess first.  */
+  if (!can_be_handled (range, m_max_case_bit_tests))
+    return false;
 
   for (unsigned i = start; i <= end; i++)
     {
       simple_cluster *sc = static_cast<simple_cluster *> (clusters[i]);
-      bitmap_set_bit (dest_bbs, sc->m_case_bb->index);
+      /* m_max_case_bit_tests is very small integer, thus the operation
+	 is constant. */
+      if (!dest_bbs.contains (sc->m_case_bb->index))
+	{
+	  if (dest_bbs.length () >= m_max_case_bit_tests)
+	    return false;
+	  dest_bbs.quick_push (sc->m_case_bb->index);
+	}
     }
 
-  return can_be_handled (range, bitmap_count_bits (dest_bbs));
+  return true;
 }
 
 /* Return true when COUNT of cases of UNIQ labels is beneficial for bit test
diff --git a/gcc/tree-switch-conversion.h b/gcc/tree-switch-conversion.h
index b3bc4b9ddf7..dffd1ee6c4b 100644
--- a/gcc/tree-switch-conversion.h
+++ b/gcc/tree-switch-conversion.h
@@ -83,11 +83,10 @@ struct cluster
      then return 0.  */
   static unsigned HOST_WIDE_INT get_range (tree low, tree high)
   {
-    tree r = fold_build2 (MINUS_EXPR, TREE_TYPE (low), high, low);
-    if (!tree_fits_uhwi_p (r))
+    wide_int w = wi::to_wide (high) - wi::to_wide (low);
+    if (wi::neg_p (w, TYPE_SIGN (TREE_TYPE (low))) || !wi::fits_uhwi_p (w))
       return 0;
-
-    return tree_to_uhwi (r) + 1;
+    return w.to_uhwi () + 1;
   }
 
   /* Case label.  */


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2020-10-02  8:17 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-02  8:17 [gcc(refs/users/marxin/heads/backport-9-v6)] switch conversion: make a rapid speed up Martin Liska
  -- strict thread matches above, loose matches on Subject: below --
2020-10-01 19:40 Martin Liska
2020-10-01 19:29 Martin Liska

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).