public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/marxin/heads/PR96979-switch-clusters-speed)] switch conversion: make a rapid speed up
@ 2020-09-25 8:09 Martin Liska
0 siblings, 0 replies; 5+ messages in thread
From: Martin Liska @ 2020-09-25 8:09 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:fdec757f374ffa01621f1e200a987726e9e027d9
commit fdec757f374ffa01621f1e200a987726e9e027d9
Author: Martin Liska <mliska@suse.cz>
Date: Thu Sep 24 13:34:13 2020 +0200
switch conversion: make a rapid speed up
gcc/ChangeLog:
PR tree-optimization/96979
* tree-switch-conversion.c (jump_table_cluster::can_be_handled):
Make a fast bail out.
(bit_test_cluster::can_be_handled): Likewise here.
* tree-switch-conversion.h (get_range): Use wi::to_wide instead
of a folding.
gcc/testsuite/ChangeLog:
PR tree-optimization/96979
* g++.dg/tree-ssa/pr96979.C: New test.
Diff:
---
gcc/testsuite/g++.dg/tree-ssa/pr96979.C | 50 +++++++++++++++++++++++++++++++++
gcc/tree-switch-conversion.c | 32 +++++++++++++++------
gcc/tree-switch-conversion.h | 7 ++---
3 files changed, 76 insertions(+), 13 deletions(-)
diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr96979.C b/gcc/testsuite/g++.dg/tree-ssa/pr96979.C
new file mode 100644
index 00000000000..85c703a140d
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/pr96979.C
@@ -0,0 +1,50 @@
+/* PR tree-optimization/96979 */
+/* { dg-do compile } */
+/* { dg-options "-std=c++17 -O2 -fdump-tree-switchlower1" } */
+
+using u64 = unsigned long long;
+
+constexpr inline u64
+foo (const char *str) noexcept
+{
+ u64 value = 0xcbf29ce484222325ULL;
+ for (u64 i = 0; str[i]; i++)
+ value = (value ^ u64(str[i])) * 0x100000001b3ULL;
+ return value;
+}
+
+struct V
+{
+ enum W
+ {
+#define A(n) n,
+#define B(n) A(n##0) A(n##1) A(n##2) A(n##3) A(n##4) A(n##5) A(n##6) A(n##7) A(n##8) A(n##9)
+#define C(n) B(n##0) B(n##1) B(n##2) B(n##3) B(n##4) B(n##5) B(n##6) B(n##7) B(n##8) B(n##9)
+#define D(n) C(n##0) C(n##1) C(n##2) C(n##3) C(n##4) C(n##5) C(n##6) C(n##7) C(n##8) C(n##9)
+#define E D(foo1) D(foo2) D(foo3)
+ E
+ last
+ };
+
+ constexpr static W
+ bar (const u64 h) noexcept
+ {
+ switch (h)
+ {
+#undef A
+#define F(n) #n
+#define A(n) case foo (F(n)): return n;
+ E
+ }
+ return last;
+ }
+};
+
+int
+baz (const char *s)
+{
+ const u64 h = foo (s);
+ return V::bar (h);
+}
+
+/* { dg-final { scan-tree-dump-times ";; Bail out: --param=max-switch-clustering-attempts reached" 2 "switchlower1" } } */
diff --git a/gcc/tree-switch-conversion.c b/gcc/tree-switch-conversion.c
index 186411ff3c4..3212e964b84 100644
--- a/gcc/tree-switch-conversion.c
+++ b/gcc/tree-switch-conversion.c
@@ -1268,6 +1268,15 @@ jump_table_cluster::can_be_handled (const vec<cluster *> &clusters,
if (range == 0)
return false;
+ unsigned HOST_WIDE_INT lhs = 100 * range;
+ if (lhs < range)
+ return false;
+
+ /* First make quick guess as each cluster
+ can add at maximum 2 to the comparison_count. */
+ if (lhs > 2 * max_ratio * (end - start + 1))
+ return false;
+
unsigned HOST_WIDE_INT comparison_count = 0;
for (unsigned i = start; i <= end; i++)
{
@@ -1275,10 +1284,6 @@ jump_table_cluster::can_be_handled (const vec<cluster *> &clusters,
comparison_count += sc->m_range_p ? 2 : 1;
}
- unsigned HOST_WIDE_INT lhs = 100 * range;
- if (lhs < range)
- return false;
-
return lhs <= max_ratio * comparison_count;
}
@@ -1364,12 +1369,12 @@ bit_test_cluster::can_be_handled (unsigned HOST_WIDE_INT range,
{
/* Check overflow. */
if (range == 0)
- return 0;
+ return false;
if (range >= GET_MODE_BITSIZE (word_mode))
return false;
- return uniq <= 3;
+ return uniq <= m_max_case_bit_tests;
}
/* Return true when cluster starting at START and ending at END (inclusive)
@@ -1379,6 +1384,7 @@ bool
bit_test_cluster::can_be_handled (const vec<cluster *> &clusters,
unsigned start, unsigned end)
{
+ auto_vec<int> dest_bbs;
/* For algorithm correctness, bit test for a single case must return
true. We bail out in is_beneficial if it's called just for
a single case. */
@@ -1387,15 +1393,23 @@ bit_test_cluster::can_be_handled (const vec<cluster *> &clusters,
unsigned HOST_WIDE_INT range = get_range (clusters[start]->get_low (),
clusters[end]->get_high ());
- auto_bitmap dest_bbs;
+
+ /* Make a guess first. */
+ if (!can_be_handled (range, m_max_case_bit_tests))
+ return false;
for (unsigned i = start; i <= end; i++)
{
simple_cluster *sc = static_cast<simple_cluster *> (clusters[i]);
- bitmap_set_bit (dest_bbs, sc->m_case_bb->index);
+ if (!dest_bbs.contains (sc->m_case_bb->index))
+ {
+ dest_bbs.safe_push (sc->m_case_bb->index);
+ if (dest_bbs.length () > m_max_case_bit_tests)
+ return false;
+ }
}
- return can_be_handled (range, bitmap_count_bits (dest_bbs));
+ return true;
}
/* Return true when COUNT of cases of UNIQ labels is beneficial for bit test
diff --git a/gcc/tree-switch-conversion.h b/gcc/tree-switch-conversion.h
index 9ebcf109319..dbfd9eecba2 100644
--- a/gcc/tree-switch-conversion.h
+++ b/gcc/tree-switch-conversion.h
@@ -84,11 +84,10 @@ public:
then return 0. */
static unsigned HOST_WIDE_INT get_range (tree low, tree high)
{
- tree r = fold_build2 (MINUS_EXPR, TREE_TYPE (low), high, low);
- if (!tree_fits_uhwi_p (r))
+ wide_int w = wi::to_wide (high) - wi::to_wide (low);
+ if (wi::neg_p (w, TYPE_SIGN (TREE_TYPE (low))) || !wi::fits_uhwi_p (w))
return 0;
-
- return tree_to_uhwi (r) + 1;
+ return w.to_uhwi () + 1;
}
/* Case label. */
^ permalink raw reply [flat|nested] 5+ messages in thread
* [gcc(refs/users/marxin/heads/PR96979-switch-clusters-speed)] switch conversion: make a rapid speed up
@ 2020-09-25 14:16 Martin Liska
0 siblings, 0 replies; 5+ messages in thread
From: Martin Liska @ 2020-09-25 14:16 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:076fe816e067ee7aa794716d1f1104b289993876
commit 076fe816e067ee7aa794716d1f1104b289993876
Author: Martin Liska <mliska@suse.cz>
Date: Thu Sep 24 13:34:13 2020 +0200
switch conversion: make a rapid speed up
gcc/ChangeLog:
PR tree-optimization/96979
* tree-switch-conversion.c (jump_table_cluster::can_be_handled):
Make a fast bail out.
(bit_test_cluster::can_be_handled): Likewise here.
* tree-switch-conversion.h (get_range): Use wi::to_wide instead
of a folding.
gcc/testsuite/ChangeLog:
PR tree-optimization/96979
* g++.dg/tree-ssa/pr96979.C: New test.
Diff:
---
gcc/testsuite/g++.dg/tree-ssa/pr96979.C | 48 +++++++++++++++++++++++++++++++++
gcc/tree-switch-conversion.c | 37 ++++++++++++++++++-------
gcc/tree-switch-conversion.h | 7 +++--
3 files changed, 79 insertions(+), 13 deletions(-)
diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr96979.C b/gcc/testsuite/g++.dg/tree-ssa/pr96979.C
new file mode 100644
index 00000000000..ec0f57a8548
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/pr96979.C
@@ -0,0 +1,48 @@
+/* PR tree-optimization/96979 */
+/* { dg-do compile } */
+/* { dg-options "-std=c++17 -O2" } */
+
+using u64 = unsigned long long;
+
+constexpr inline u64
+foo (const char *str) noexcept
+{
+ u64 value = 0xcbf29ce484222325ULL;
+ for (u64 i = 0; str[i]; i++)
+ value = (value ^ u64(str[i])) * 0x100000001b3ULL;
+ return value;
+}
+
+struct V
+{
+ enum W
+ {
+#define A(n) n,
+#define B(n) A(n##0) A(n##1) A(n##2) A(n##3) A(n##4) A(n##5) A(n##6) A(n##7) A(n##8) A(n##9)
+#define C(n) B(n##0) B(n##1) B(n##2) B(n##3) B(n##4) B(n##5) B(n##6) B(n##7) B(n##8) B(n##9)
+#define D(n) C(n##0) C(n##1) C(n##2) C(n##3) C(n##4) C(n##5) C(n##6) C(n##7) C(n##8) C(n##9)
+#define E D(foo1) D(foo2) D(foo3)
+ E
+ last
+ };
+
+ constexpr static W
+ bar (const u64 h) noexcept
+ {
+ switch (h)
+ {
+#undef A
+#define F(n) #n
+#define A(n) case foo (F(n)): return n;
+ E
+ }
+ return last;
+ }
+};
+
+int
+baz (const char *s)
+{
+ const u64 h = foo (s);
+ return V::bar (h);
+}
diff --git a/gcc/tree-switch-conversion.c b/gcc/tree-switch-conversion.c
index 186411ff3c4..03a1fe632d0 100644
--- a/gcc/tree-switch-conversion.c
+++ b/gcc/tree-switch-conversion.c
@@ -1268,6 +1268,18 @@ jump_table_cluster::can_be_handled (const vec<cluster *> &clusters,
if (range == 0)
return false;
+ if (range > HOST_WIDE_INT_M1U / 100)
+ return false;
+
+ unsigned HOST_WIDE_INT lhs = 100 * range;
+ if (lhs < range)
+ return false;
+
+ /* First make quick guess as each cluster
+ can add at maximum 2 to the comparison_count. */
+ if (lhs > 2 * max_ratio * (end - start + 1))
+ return false;
+
unsigned HOST_WIDE_INT comparison_count = 0;
for (unsigned i = start; i <= end; i++)
{
@@ -1275,10 +1287,6 @@ jump_table_cluster::can_be_handled (const vec<cluster *> &clusters,
comparison_count += sc->m_range_p ? 2 : 1;
}
- unsigned HOST_WIDE_INT lhs = 100 * range;
- if (lhs < range)
- return false;
-
return lhs <= max_ratio * comparison_count;
}
@@ -1364,12 +1372,12 @@ bit_test_cluster::can_be_handled (unsigned HOST_WIDE_INT range,
{
/* Check overflow. */
if (range == 0)
- return 0;
+ return false;
if (range >= GET_MODE_BITSIZE (word_mode))
return false;
- return uniq <= 3;
+ return uniq <= m_max_case_bit_tests;
}
/* Return true when cluster starting at START and ending at END (inclusive)
@@ -1379,6 +1387,7 @@ bool
bit_test_cluster::can_be_handled (const vec<cluster *> &clusters,
unsigned start, unsigned end)
{
+ auto_vec<int, m_max_case_bit_tests> dest_bbs;
/* For algorithm correctness, bit test for a single case must return
true. We bail out in is_beneficial if it's called just for
a single case. */
@@ -1387,15 +1396,25 @@ bit_test_cluster::can_be_handled (const vec<cluster *> &clusters,
unsigned HOST_WIDE_INT range = get_range (clusters[start]->get_low (),
clusters[end]->get_high ());
- auto_bitmap dest_bbs;
+
+ /* Make a guess first. */
+ if (!can_be_handled (range, m_max_case_bit_tests))
+ return false;
for (unsigned i = start; i <= end; i++)
{
simple_cluster *sc = static_cast<simple_cluster *> (clusters[i]);
- bitmap_set_bit (dest_bbs, sc->m_case_bb->index);
+ /* m_max_case_bit_tests is very small integer, thus the operation
+ is constant. */
+ if (!dest_bbs.contains (sc->m_case_bb->index))
+ {
+ if (dest_bbs.length () >= m_max_case_bit_tests)
+ return false;
+ dest_bbs.quick_push (sc->m_case_bb->index);
+ }
}
- return can_be_handled (range, bitmap_count_bits (dest_bbs));
+ return true;
}
/* Return true when COUNT of cases of UNIQ labels is beneficial for bit test
diff --git a/gcc/tree-switch-conversion.h b/gcc/tree-switch-conversion.h
index 9ebcf109319..dbfd9eecba2 100644
--- a/gcc/tree-switch-conversion.h
+++ b/gcc/tree-switch-conversion.h
@@ -84,11 +84,10 @@ public:
then return 0. */
static unsigned HOST_WIDE_INT get_range (tree low, tree high)
{
- tree r = fold_build2 (MINUS_EXPR, TREE_TYPE (low), high, low);
- if (!tree_fits_uhwi_p (r))
+ wide_int w = wi::to_wide (high) - wi::to_wide (low);
+ if (wi::neg_p (w, TYPE_SIGN (TREE_TYPE (low))) || !wi::fits_uhwi_p (w))
return 0;
-
- return tree_to_uhwi (r) + 1;
+ return w.to_uhwi () + 1;
}
/* Case label. */
^ permalink raw reply [flat|nested] 5+ messages in thread
* [gcc(refs/users/marxin/heads/PR96979-switch-clusters-speed)] switch conversion: make a rapid speed up
@ 2020-09-25 9:08 Martin Liska
0 siblings, 0 replies; 5+ messages in thread
From: Martin Liska @ 2020-09-25 9:08 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:dc4c1d129a50c7f51d28235506479f29d51dae07
commit dc4c1d129a50c7f51d28235506479f29d51dae07
Author: Martin Liska <mliska@suse.cz>
Date: Thu Sep 24 13:34:13 2020 +0200
switch conversion: make a rapid speed up
gcc/ChangeLog:
PR tree-optimization/96979
* tree-switch-conversion.c (jump_table_cluster::can_be_handled):
Make a fast bail out.
(bit_test_cluster::can_be_handled): Likewise here.
* tree-switch-conversion.h (get_range): Use wi::to_wide instead
of a folding.
gcc/testsuite/ChangeLog:
PR tree-optimization/96979
* g++.dg/tree-ssa/pr96979.C: New test.
Diff:
---
gcc/testsuite/g++.dg/tree-ssa/pr96979.C | 48 +++++++++++++++++++++++++++++++++
gcc/tree-switch-conversion.c | 32 +++++++++++++++-------
gcc/tree-switch-conversion.h | 7 +++--
3 files changed, 74 insertions(+), 13 deletions(-)
diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr96979.C b/gcc/testsuite/g++.dg/tree-ssa/pr96979.C
new file mode 100644
index 00000000000..ec0f57a8548
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/pr96979.C
@@ -0,0 +1,48 @@
+/* PR tree-optimization/96979 */
+/* { dg-do compile } */
+/* { dg-options "-std=c++17 -O2" } */
+
+using u64 = unsigned long long;
+
+constexpr inline u64
+foo (const char *str) noexcept
+{
+ u64 value = 0xcbf29ce484222325ULL;
+ for (u64 i = 0; str[i]; i++)
+ value = (value ^ u64(str[i])) * 0x100000001b3ULL;
+ return value;
+}
+
+struct V
+{
+ enum W
+ {
+#define A(n) n,
+#define B(n) A(n##0) A(n##1) A(n##2) A(n##3) A(n##4) A(n##5) A(n##6) A(n##7) A(n##8) A(n##9)
+#define C(n) B(n##0) B(n##1) B(n##2) B(n##3) B(n##4) B(n##5) B(n##6) B(n##7) B(n##8) B(n##9)
+#define D(n) C(n##0) C(n##1) C(n##2) C(n##3) C(n##4) C(n##5) C(n##6) C(n##7) C(n##8) C(n##9)
+#define E D(foo1) D(foo2) D(foo3)
+ E
+ last
+ };
+
+ constexpr static W
+ bar (const u64 h) noexcept
+ {
+ switch (h)
+ {
+#undef A
+#define F(n) #n
+#define A(n) case foo (F(n)): return n;
+ E
+ }
+ return last;
+ }
+};
+
+int
+baz (const char *s)
+{
+ const u64 h = foo (s);
+ return V::bar (h);
+}
diff --git a/gcc/tree-switch-conversion.c b/gcc/tree-switch-conversion.c
index 186411ff3c4..3212e964b84 100644
--- a/gcc/tree-switch-conversion.c
+++ b/gcc/tree-switch-conversion.c
@@ -1268,6 +1268,15 @@ jump_table_cluster::can_be_handled (const vec<cluster *> &clusters,
if (range == 0)
return false;
+ unsigned HOST_WIDE_INT lhs = 100 * range;
+ if (lhs < range)
+ return false;
+
+ /* First make quick guess as each cluster
+ can add at maximum 2 to the comparison_count. */
+ if (lhs > 2 * max_ratio * (end - start + 1))
+ return false;
+
unsigned HOST_WIDE_INT comparison_count = 0;
for (unsigned i = start; i <= end; i++)
{
@@ -1275,10 +1284,6 @@ jump_table_cluster::can_be_handled (const vec<cluster *> &clusters,
comparison_count += sc->m_range_p ? 2 : 1;
}
- unsigned HOST_WIDE_INT lhs = 100 * range;
- if (lhs < range)
- return false;
-
return lhs <= max_ratio * comparison_count;
}
@@ -1364,12 +1369,12 @@ bit_test_cluster::can_be_handled (unsigned HOST_WIDE_INT range,
{
/* Check overflow. */
if (range == 0)
- return 0;
+ return false;
if (range >= GET_MODE_BITSIZE (word_mode))
return false;
- return uniq <= 3;
+ return uniq <= m_max_case_bit_tests;
}
/* Return true when cluster starting at START and ending at END (inclusive)
@@ -1379,6 +1384,7 @@ bool
bit_test_cluster::can_be_handled (const vec<cluster *> &clusters,
unsigned start, unsigned end)
{
+ auto_vec<int> dest_bbs;
/* For algorithm correctness, bit test for a single case must return
true. We bail out in is_beneficial if it's called just for
a single case. */
@@ -1387,15 +1393,23 @@ bit_test_cluster::can_be_handled (const vec<cluster *> &clusters,
unsigned HOST_WIDE_INT range = get_range (clusters[start]->get_low (),
clusters[end]->get_high ());
- auto_bitmap dest_bbs;
+
+ /* Make a guess first. */
+ if (!can_be_handled (range, m_max_case_bit_tests))
+ return false;
for (unsigned i = start; i <= end; i++)
{
simple_cluster *sc = static_cast<simple_cluster *> (clusters[i]);
- bitmap_set_bit (dest_bbs, sc->m_case_bb->index);
+ if (!dest_bbs.contains (sc->m_case_bb->index))
+ {
+ dest_bbs.safe_push (sc->m_case_bb->index);
+ if (dest_bbs.length () > m_max_case_bit_tests)
+ return false;
+ }
}
- return can_be_handled (range, bitmap_count_bits (dest_bbs));
+ return true;
}
/* Return true when COUNT of cases of UNIQ labels is beneficial for bit test
diff --git a/gcc/tree-switch-conversion.h b/gcc/tree-switch-conversion.h
index 9ebcf109319..dbfd9eecba2 100644
--- a/gcc/tree-switch-conversion.h
+++ b/gcc/tree-switch-conversion.h
@@ -84,11 +84,10 @@ public:
then return 0. */
static unsigned HOST_WIDE_INT get_range (tree low, tree high)
{
- tree r = fold_build2 (MINUS_EXPR, TREE_TYPE (low), high, low);
- if (!tree_fits_uhwi_p (r))
+ wide_int w = wi::to_wide (high) - wi::to_wide (low);
+ if (wi::neg_p (w, TYPE_SIGN (TREE_TYPE (low))) || !wi::fits_uhwi_p (w))
return 0;
-
- return tree_to_uhwi (r) + 1;
+ return w.to_uhwi () + 1;
}
/* Case label. */
^ permalink raw reply [flat|nested] 5+ messages in thread
* [gcc(refs/users/marxin/heads/PR96979-switch-clusters-speed)] switch conversion: make a rapid speed up
@ 2020-09-24 14:43 Martin Liska
0 siblings, 0 replies; 5+ messages in thread
From: Martin Liska @ 2020-09-24 14:43 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:d2d7e295cc0195350ecf1e362da56473bfe2850e
commit d2d7e295cc0195350ecf1e362da56473bfe2850e
Author: Martin Liska <mliska@suse.cz>
Date: Thu Sep 24 13:34:13 2020 +0200
switch conversion: make a rapid speed up
gcc/ChangeLog:
PR tree-optimization/96979
* tree-switch-conversion.c (jump_table_cluster::can_be_handled):
Make a fast bail out.
(bit_test_cluster::can_be_handled): Likewise here.
* tree-switch-conversion.h (get_range): Use wi::to_wide instead
of a folding.
gcc/testsuite/ChangeLog:
PR tree-optimization/96979
* g++.dg/tree-ssa/pr96979.C: New test.
Diff:
---
gcc/testsuite/g++.dg/tree-ssa/pr96979.C | 50 +++++++++++++++++++++++++++++++++
gcc/tree-switch-conversion.c | 33 ++++++++++++++++------
gcc/tree-switch-conversion.h | 7 ++---
3 files changed, 77 insertions(+), 13 deletions(-)
diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr96979.C b/gcc/testsuite/g++.dg/tree-ssa/pr96979.C
new file mode 100644
index 00000000000..85c703a140d
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/pr96979.C
@@ -0,0 +1,50 @@
+/* PR tree-optimization/96979 */
+/* { dg-do compile } */
+/* { dg-options "-std=c++17 -O2 -fdump-tree-switchlower1" } */
+
+using u64 = unsigned long long;
+
+constexpr inline u64
+foo (const char *str) noexcept
+{
+ u64 value = 0xcbf29ce484222325ULL;
+ for (u64 i = 0; str[i]; i++)
+ value = (value ^ u64(str[i])) * 0x100000001b3ULL;
+ return value;
+}
+
+struct V
+{
+ enum W
+ {
+#define A(n) n,
+#define B(n) A(n##0) A(n##1) A(n##2) A(n##3) A(n##4) A(n##5) A(n##6) A(n##7) A(n##8) A(n##9)
+#define C(n) B(n##0) B(n##1) B(n##2) B(n##3) B(n##4) B(n##5) B(n##6) B(n##7) B(n##8) B(n##9)
+#define D(n) C(n##0) C(n##1) C(n##2) C(n##3) C(n##4) C(n##5) C(n##6) C(n##7) C(n##8) C(n##9)
+#define E D(foo1) D(foo2) D(foo3)
+ E
+ last
+ };
+
+ constexpr static W
+ bar (const u64 h) noexcept
+ {
+ switch (h)
+ {
+#undef A
+#define F(n) #n
+#define A(n) case foo (F(n)): return n;
+ E
+ }
+ return last;
+ }
+};
+
+int
+baz (const char *s)
+{
+ const u64 h = foo (s);
+ return V::bar (h);
+}
+
+/* { dg-final { scan-tree-dump-times ";; Bail out: --param=max-switch-clustering-attempts reached" 2 "switchlower1" } } */
diff --git a/gcc/tree-switch-conversion.c b/gcc/tree-switch-conversion.c
index 186411ff3c4..46fba13419b 100644
--- a/gcc/tree-switch-conversion.c
+++ b/gcc/tree-switch-conversion.c
@@ -1268,6 +1268,15 @@ jump_table_cluster::can_be_handled (const vec<cluster *> &clusters,
if (range == 0)
return false;
+ unsigned HOST_WIDE_INT lhs = 100 * range;
+ if (lhs < range)
+ return false;
+
+ /* First make quick guess as each cluster
+ can add at maximum 2 to the comparison_count. */
+ if (lhs > 2 * max_ratio * (end - start + 1))
+ return false;
+
unsigned HOST_WIDE_INT comparison_count = 0;
for (unsigned i = start; i <= end; i++)
{
@@ -1275,10 +1284,6 @@ jump_table_cluster::can_be_handled (const vec<cluster *> &clusters,
comparison_count += sc->m_range_p ? 2 : 1;
}
- unsigned HOST_WIDE_INT lhs = 100 * range;
- if (lhs < range)
- return false;
-
return lhs <= max_ratio * comparison_count;
}
@@ -1364,12 +1369,12 @@ bit_test_cluster::can_be_handled (unsigned HOST_WIDE_INT range,
{
/* Check overflow. */
if (range == 0)
- return 0;
+ return false;
if (range >= GET_MODE_BITSIZE (word_mode))
return false;
- return uniq <= 3;
+ return uniq <= m_max_case_bit_tests;
}
/* Return true when cluster starting at START and ending at END (inclusive)
@@ -1379,6 +1384,7 @@ bool
bit_test_cluster::can_be_handled (const vec<cluster *> &clusters,
unsigned start, unsigned end)
{
+ auto_vec<int> dest_bbs;
/* For algorithm correctness, bit test for a single case must return
true. We bail out in is_beneficial if it's called just for
a single case. */
@@ -1387,15 +1393,24 @@ bit_test_cluster::can_be_handled (const vec<cluster *> &clusters,
unsigned HOST_WIDE_INT range = get_range (clusters[start]->get_low (),
clusters[end]->get_high ());
- auto_bitmap dest_bbs;
+
+ /* Make a guess first. */
+ if (!can_be_handled (range, m_max_case_bit_tests))
+ return false;
for (unsigned i = start; i <= end; i++)
{
simple_cluster *sc = static_cast<simple_cluster *> (clusters[i]);
- bitmap_set_bit (dest_bbs, sc->m_case_bb->index);
+ if (!dest_bbs.contains (sc->m_case_bb->index))
+ {
+ dest_bbs.safe_push (sc->m_case_bb->index);
+ if (dest_bbs.length () > m_max_case_bit_tests)
+ return false;
+ }
}
- return can_be_handled (range, bitmap_count_bits (dest_bbs));
+ /* Check for uniq is already done here. */
+ return can_be_handled (range, UINT_MAX);
}
/* Return true when COUNT of cases of UNIQ labels is beneficial for bit test
diff --git a/gcc/tree-switch-conversion.h b/gcc/tree-switch-conversion.h
index 9ebcf109319..dbfd9eecba2 100644
--- a/gcc/tree-switch-conversion.h
+++ b/gcc/tree-switch-conversion.h
@@ -84,11 +84,10 @@ public:
then return 0. */
static unsigned HOST_WIDE_INT get_range (tree low, tree high)
{
- tree r = fold_build2 (MINUS_EXPR, TREE_TYPE (low), high, low);
- if (!tree_fits_uhwi_p (r))
+ wide_int w = wi::to_wide (high) - wi::to_wide (low);
+ if (wi::neg_p (w, TYPE_SIGN (TREE_TYPE (low))) || !wi::fits_uhwi_p (w))
return 0;
-
- return tree_to_uhwi (r) + 1;
+ return w.to_uhwi () + 1;
}
/* Case label. */
^ permalink raw reply [flat|nested] 5+ messages in thread
* [gcc(refs/users/marxin/heads/PR96979-switch-clusters-speed)] switch conversion: make a rapid speed up
@ 2020-09-24 11:48 Martin Liska
0 siblings, 0 replies; 5+ messages in thread
From: Martin Liska @ 2020-09-24 11:48 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:5bea5bdabcbe0b1b385127fe071b6a6cb939b71c
commit 5bea5bdabcbe0b1b385127fe071b6a6cb939b71c
Author: Martin Liska <mliska@suse.cz>
Date: Thu Sep 24 13:34:13 2020 +0200
switch conversion: make a rapid speed up
gcc/ChangeLog:
PR tree-optimization/96979
* tree-switch-conversion.c (jump_table_cluster::can_be_handled):
Make a fast bail out.
(bit_test_cluster::can_be_handled): Likewise here.
* tree-switch-conversion.h (get_range): Use wi::to_wide instead
of a folding.
gcc/testsuite/ChangeLog:
PR tree-optimization/96979
* g++.dg/tree-ssa/pr96979.C: New test.
Diff:
---
gcc/testsuite/g++.dg/tree-ssa/pr96979.C | 50 +++++++++++++++++++++++++++++++++
gcc/tree-switch-conversion.c | 33 ++++++++++++++++------
gcc/tree-switch-conversion.h | 7 ++---
3 files changed, 77 insertions(+), 13 deletions(-)
diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr96979.C b/gcc/testsuite/g++.dg/tree-ssa/pr96979.C
new file mode 100644
index 00000000000..85c703a140d
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/pr96979.C
@@ -0,0 +1,50 @@
+/* PR tree-optimization/96979 */
+/* { dg-do compile } */
+/* { dg-options "-std=c++17 -O2 -fdump-tree-switchlower1" } */
+
+using u64 = unsigned long long;
+
+constexpr inline u64
+foo (const char *str) noexcept
+{
+ u64 value = 0xcbf29ce484222325ULL;
+ for (u64 i = 0; str[i]; i++)
+ value = (value ^ u64(str[i])) * 0x100000001b3ULL;
+ return value;
+}
+
+struct V
+{
+ enum W
+ {
+#define A(n) n,
+#define B(n) A(n##0) A(n##1) A(n##2) A(n##3) A(n##4) A(n##5) A(n##6) A(n##7) A(n##8) A(n##9)
+#define C(n) B(n##0) B(n##1) B(n##2) B(n##3) B(n##4) B(n##5) B(n##6) B(n##7) B(n##8) B(n##9)
+#define D(n) C(n##0) C(n##1) C(n##2) C(n##3) C(n##4) C(n##5) C(n##6) C(n##7) C(n##8) C(n##9)
+#define E D(foo1) D(foo2) D(foo3)
+ E
+ last
+ };
+
+ constexpr static W
+ bar (const u64 h) noexcept
+ {
+ switch (h)
+ {
+#undef A
+#define F(n) #n
+#define A(n) case foo (F(n)): return n;
+ E
+ }
+ return last;
+ }
+};
+
+int
+baz (const char *s)
+{
+ const u64 h = foo (s);
+ return V::bar (h);
+}
+
+/* { dg-final { scan-tree-dump-times ";; Bail out: --param=max-switch-clustering-attempts reached" 2 "switchlower1" } } */
diff --git a/gcc/tree-switch-conversion.c b/gcc/tree-switch-conversion.c
index 186411ff3c4..6c75325f569 100644
--- a/gcc/tree-switch-conversion.c
+++ b/gcc/tree-switch-conversion.c
@@ -1268,6 +1268,15 @@ jump_table_cluster::can_be_handled (const vec<cluster *> &clusters,
if (range == 0)
return false;
+ unsigned HOST_WIDE_INT lhs = 100 * range;
+ if (lhs < range)
+ return false;
+
+ /* First make quick guess as each cluster
+ can add at maximum 2 to the comparison_count. */
+ if (lhs > 2 * max_ratio * (end - start + 1))
+ return false;
+
unsigned HOST_WIDE_INT comparison_count = 0;
for (unsigned i = start; i <= end; i++)
{
@@ -1275,10 +1284,6 @@ jump_table_cluster::can_be_handled (const vec<cluster *> &clusters,
comparison_count += sc->m_range_p ? 2 : 1;
}
- unsigned HOST_WIDE_INT lhs = 100 * range;
- if (lhs < range)
- return false;
-
return lhs <= max_ratio * comparison_count;
}
@@ -1364,12 +1369,12 @@ bit_test_cluster::can_be_handled (unsigned HOST_WIDE_INT range,
{
/* Check overflow. */
if (range == 0)
- return 0;
+ return false;
if (range >= GET_MODE_BITSIZE (word_mode))
return false;
- return uniq <= 3;
+ return uniq <= m_max_case_bit_tests;
}
/* Return true when cluster starting at START and ending at END (inclusive)
@@ -1379,6 +1384,7 @@ bool
bit_test_cluster::can_be_handled (const vec<cluster *> &clusters,
unsigned start, unsigned end)
{
+ auto_vec<int> dest_bbs;
/* For algorithm correctness, bit test for a single case must return
true. We bail out in is_beneficial if it's called just for
a single case. */
@@ -1387,15 +1393,24 @@ bit_test_cluster::can_be_handled (const vec<cluster *> &clusters,
unsigned HOST_WIDE_INT range = get_range (clusters[start]->get_low (),
clusters[end]->get_high ());
- auto_bitmap dest_bbs;
+
+ /* Make a guess first. */
+ if (!can_be_handled (range, UINT_MAX))
+ return false;
for (unsigned i = start; i <= end; i++)
{
simple_cluster *sc = static_cast<simple_cluster *> (clusters[i]);
- bitmap_set_bit (dest_bbs, sc->m_case_bb->index);
+ if (!dest_bbs.contains (sc->m_case_bb->index))
+ {
+ dest_bbs.safe_push (sc->m_case_bb->index);
+ if (dest_bbs.length () > m_max_case_bit_tests)
+ return false;
+ }
}
- return can_be_handled (range, bitmap_count_bits (dest_bbs));
+ /* Check for uniq is already done here. */
+ return can_be_handled (range, UINT_MAX);
}
/* Return true when COUNT of cases of UNIQ labels is beneficial for bit test
diff --git a/gcc/tree-switch-conversion.h b/gcc/tree-switch-conversion.h
index 9ebcf109319..dbfd9eecba2 100644
--- a/gcc/tree-switch-conversion.h
+++ b/gcc/tree-switch-conversion.h
@@ -84,11 +84,10 @@ public:
then return 0. */
static unsigned HOST_WIDE_INT get_range (tree low, tree high)
{
- tree r = fold_build2 (MINUS_EXPR, TREE_TYPE (low), high, low);
- if (!tree_fits_uhwi_p (r))
+ wide_int w = wi::to_wide (high) - wi::to_wide (low);
+ if (wi::neg_p (w, TYPE_SIGN (TREE_TYPE (low))) || !wi::fits_uhwi_p (w))
return 0;
-
- return tree_to_uhwi (r) + 1;
+ return w.to_uhwi () + 1;
}
/* Case label. */
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2020-09-25 14:16 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-25 8:09 [gcc(refs/users/marxin/heads/PR96979-switch-clusters-speed)] switch conversion: make a rapid speed up Martin Liska
-- strict thread matches above, loose matches on Subject: below --
2020-09-25 14:16 Martin Liska
2020-09-25 9:08 Martin Liska
2020-09-24 14:43 Martin Liska
2020-09-24 11:48 Martin Liska
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).