public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-5453] tree-optimization/103345: Improved load merging.
@ 2021-11-22 18:17 Roger Sayle
0 siblings, 0 replies; only message in thread
From: Roger Sayle @ 2021-11-22 18:17 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:a944b5dec3adb28ed199234d2116145ca9010d6a
commit r12-5453-ga944b5dec3adb28ed199234d2116145ca9010d6a
Author: Roger Sayle <roger@nextmovesoftware.com>
Date: Mon Nov 22 18:15:36 2021 +0000
tree-optimization/103345: Improved load merging.
This patch implements PR tree-optimization/103345 to merge adjacent
loads when combined with addition or bitwise xor. The current code
in gimple-ssa-store-merging.c's find_bswap_or_nop alreay handles ior,
so that all that's required is to treat PLUS_EXPR and BIT_XOR_EXPR in
the same way at BIT_IOR_EXPR. Many thanks to Andrew Pinski for
pointing out that this also resolves PR target/98953.
2021-11-22 Roger Sayle <roger@nextmovesoftware.com>
gcc/ChangeLog
PR tree-optimization/98953
PR tree-optimization/103345
* gimple-ssa-store-merging.c (find_bswap_or_nop_1): Handle
BIT_XOR_EXPR and PLUS_EXPR the same as BIT_IOR_EXPR.
(pass_optimize_bswap::execute): Likewise.
gcc/testsuite/ChangeLog
PR tree-optimization/98953
PR tree-optimization/103345
* gcc.dg/tree-ssa/pr98953.c: New test case.
* gcc.dg/tree-ssa/pr103345.c: New test case.
Diff:
---
gcc/gimple-ssa-store-merging.c | 9 +++---
gcc/testsuite/gcc.dg/tree-ssa/pr103345.c | 53 ++++++++++++++++++++++++++++++++
gcc/testsuite/gcc.dg/tree-ssa/pr98953.c | 14 +++++++++
3 files changed, 72 insertions(+), 4 deletions(-)
diff --git a/gcc/gimple-ssa-store-merging.c b/gcc/gimple-ssa-store-merging.c
index 4efa200428a..1740c9ee1c1 100644
--- a/gcc/gimple-ssa-store-merging.c
+++ b/gcc/gimple-ssa-store-merging.c
@@ -742,10 +742,7 @@ find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit)
struct symbolic_number n1, n2;
gimple *source_stmt, *source_stmt2;
- if (code != BIT_IOR_EXPR)
- return NULL;
-
- if (TREE_CODE (rhs2) != SSA_NAME)
+ if (!rhs2 || TREE_CODE (rhs2) != SSA_NAME)
return NULL;
rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
@@ -753,6 +750,8 @@ find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit)
switch (code)
{
case BIT_IOR_EXPR:
+ case BIT_XOR_EXPR:
+ case PLUS_EXPR:
source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
if (!source_stmt1)
@@ -1495,6 +1494,8 @@ pass_optimize_bswap::execute (function *fun)
continue;
/* Fall through. */
case BIT_IOR_EXPR:
+ case BIT_XOR_EXPR:
+ case PLUS_EXPR:
break;
case CONSTRUCTOR:
{
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr103345.c b/gcc/testsuite/gcc.dg/tree-ssa/pr103345.c
new file mode 100644
index 00000000000..94388b541c1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr103345.c
@@ -0,0 +1,53 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-bswap-details" } */
+
+typedef unsigned int uint32_t;
+typedef unsigned char uint8_t;
+
+uint32_t load_le_32_or(const uint8_t *ptr)
+{
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ return ((uint32_t)ptr[0]) |
+ ((uint32_t)ptr[1] << 8) |
+ ((uint32_t)ptr[2] << 16) |
+ ((uint32_t)ptr[3] << 24);
+#else
+ return ((uint32_t)ptr[3]) |
+ ((uint32_t)ptr[2] << 8) |
+ ((uint32_t)ptr[1] << 16) |
+ ((uint32_t)ptr[0] << 24);
+#endif
+}
+
+uint32_t load_le_32_add(const uint8_t *ptr)
+{
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ return ((uint32_t)ptr[0]) +
+ ((uint32_t)ptr[1] << 8) +
+ ((uint32_t)ptr[2] << 16) +
+ ((uint32_t)ptr[3] << 24);
+#else
+ return ((uint32_t)ptr[3]) +
+ ((uint32_t)ptr[2] << 8) +
+ ((uint32_t)ptr[1] << 16) +
+ ((uint32_t)ptr[0] << 24);
+#endif
+}
+
+uint32_t load_le_32_xor(const uint8_t *ptr)
+{
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ return ((uint32_t)ptr[0]) ^
+ ((uint32_t)ptr[1] << 8) ^
+ ((uint32_t)ptr[2] << 16) ^
+ ((uint32_t)ptr[3] << 24);
+#else
+ return ((uint32_t)ptr[0]) ^
+ ((uint32_t)ptr[1] << 8) ^
+ ((uint32_t)ptr[2] << 16) ^
+ ((uint32_t)ptr[3] << 24);
+#endif
+}
+
+/* { dg-final { scan-tree-dump-times "32 bit load in target endianness found" 3 "bswap" } } */
+
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr98953.c b/gcc/testsuite/gcc.dg/tree-ssa/pr98953.c
new file mode 100644
index 00000000000..7687dc2871d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr98953.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-bswap-details" } */
+
+int foo(unsigned char *ptr)
+{
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ return ptr[0] + (ptr[1] << 8);
+#else
+ return ptr[1] + (ptr[0] << 8);
+#endif
+}
+
+/* { dg-final { scan-tree-dump "16 bit load in target endianness found" "bswap" } } */
+
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2021-11-22 18:17 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-11-22 18:17 [gcc r12-5453] tree-optimization/103345: Improved load merging Roger Sayle
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).