public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH take #3] match.pd: Simplify popcount/parity of bswap/rotate.
@ 2023-05-10 14:46 Roger Sayle
  2023-05-10 15:47 ` Bernhard Reutner-Fischer
  0 siblings, 1 reply; 3+ messages in thread
From: Roger Sayle @ 2023-05-10 14:46 UTC (permalink / raw)
  To: 'GCC Patches'; +Cc: 'Marc Glisse'

[-- Attachment #1: Type: text/plain, Size: 1004 bytes --]


This is the latest iteration of my patch from August 2020
https://gcc.gnu.org/pipermail/gcc-patches/2020-August/552391.html
incorporating feedback and suggestions from reviewers.

This patch to match.pd optimizes away bit permutation operations,
specifically bswap and rotate, in calls to popcount and parity.

This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check, both with and without --target_board=unix{-m32}
with no new failures.  Ok for mainline?


2023-05-10  Roger Sayle  <roger@nextmovesoftware.com>

gcc/ChangeLog
	* match.pd <popcount optimizations>: Simplify popcount(bswap(x))
	as popcount(x).  Simplify popcount(rotate(x,y)) as popcount(x).
	<parity optimizations>:  Simplify parity(bswap(x)) as parity(x).
	Simplify parity(rotate(x,y)) as parity(x).

gcc/testsuite/ChangeLog
	* gcc.dg/fold-parity-6.c: New test.
	* gcc.dg/fold-parity-7.c: New test.
	* gcc.dg/fold-popcount-6.c: New test.
	* gcc.dg/fold-popcount-7.c: New test.


Thanks again,
Roger
--


[-- Attachment #2: patcha.txt --]
[-- Type: text/plain, Size: 7030 bytes --]

diff --git a/gcc/match.pd b/gcc/match.pd
index ceae1c3..bc083be 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -7766,6 +7766,32 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
       (cmp (popcount @0) integer_zerop)
       (rep @0 { build_zero_cst (TREE_TYPE (@0)); }))))
 
+/* popcount(bswap(x)) is popcount(x).  */
+(for popcount (POPCOUNT)
+  (for bswap (BUILT_IN_BSWAP16 BUILT_IN_BSWAP32
+	      BUILT_IN_BSWAP64 BUILT_IN_BSWAP128)
+    (simplify
+      (popcount (convert?@0 (bswap:s@1 @2)))
+      (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+	   && INTEGRAL_TYPE_P (TREE_TYPE (@1)))
+	(with { unsigned int prec0 = TYPE_PRECISION (TREE_TYPE (@0));
+		unsigned int prec1 = TYPE_PRECISION (TREE_TYPE (@1)); }
+	  (if (prec0 == prec1 || (prec0 > prec1 && TYPE_UNSIGNED (@1)))
+	    (popcount @2)))))))
+
+/* popcount(rotate(X Y)) is popcount(X).  */
+(for popcount (POPCOUNT)
+  (for rot (lrotate rrotate)
+    (simplify
+      (popcount (convert?@0 (rot:s@1 @2 @3)))
+      (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+	   && INTEGRAL_TYPE_P (TREE_TYPE (@1))	
+	   && (GIMPLE || !TREE_SIDE_EFFECTS (@3)))
+	(with { unsigned int prec0 = TYPE_PRECISION (TREE_TYPE (@0));
+		unsigned int prec1 = TYPE_PRECISION (TREE_TYPE (@1)); }
+	  (if (prec0 == prec1 || (prec0 > prec1 && TYPE_UNSIGNED (@1)))
+	    (popcount @2)))))))
+
 /* Canonicalize POPCOUNT(x)&1 as PARITY(X).  */
 (simplify
   (bit_and (POPCOUNT @0) integer_onep)
@@ -7777,6 +7803,30 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (PARITY (bit_not @0))
   (PARITY @0))
 
+/* parity(bswap(x)) is parity(x).  */
+(for parity (PARITY)
+  (for bswap (BUILT_IN_BSWAP16 BUILT_IN_BSWAP32
+	      BUILT_IN_BSWAP64 BUILT_IN_BSWAP128)
+    (simplify
+      (parity (convert?@0 (bswap:s@1 @2)))
+      (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+	   && INTEGRAL_TYPE_P (TREE_TYPE (@1))
+	   && TYPE_PRECISION (TREE_TYPE (@0))
+	      >= TYPE_PRECISION (TREE_TYPE (@1)))
+	(parity @2)))))
+
+/* parity(rotate(X Y)) is parity(X).  */
+(for parity (PARITY)
+  (for rot (lrotate rrotate)
+    (simplify
+      (parity (convert?@0 (rot:s@1 @2 @3)))
+      (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+	   && INTEGRAL_TYPE_P (TREE_TYPE (@1))
+	   && (GIMPLE || !TREE_SIDE_EFFECTS (@3))
+	   && TYPE_PRECISION (TREE_TYPE (@0))
+	      >= TYPE_PRECISION (TREE_TYPE (@1)))
+	(parity @2)))))
+
 /* parity(X)^parity(Y) is parity(X^Y).  */
 (simplify
   (bit_xor (PARITY:s @0) (PARITY:s @1))
diff --git a/gcc/testsuite/gcc.dg/fold-parity-6.c b/gcc/testsuite/gcc.dg/fold-parity-6.c
new file mode 100644
index 0000000..623afb9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/fold-parity-6.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+int foo(unsigned int x)
+{
+#if __SIZEOF_INT__ == 4
+  return __builtin_parity (__builtin_bswap32(x));
+#elif __SIZEOF_INT__ == 2
+  return __builtin_parity (__builtin_bswap16(x));
+#else
+  return x;
+#endif
+}
+
+int bar(unsigned long x)
+{
+#if __SIZEOF_LONG__ == 8
+  return __builtin_parityl (__builtin_bswap64(x));
+#elif __SIZEOF_LONG__ == 4
+  return __builtin_parityl (__builtin_bswap32(x));
+#else
+  return x;
+#endif
+}
+
+int baz(unsigned long long x)
+{
+#if __SIZEOF_LONG_LONG__ == 8
+  return __builtin_parityll (__builtin_bswap64(x));
+#elif __SIZEOF_LONG_LONG__ == 4
+  return __builtin_parityll (__builtin_bswap32(x));
+#else
+  return x;
+#endif
+}
+
+/* { dg-final { scan-tree-dump-times "bswap" 0 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/fold-parity-7.c b/gcc/testsuite/gcc.dg/fold-parity-7.c
new file mode 100644
index 0000000..c08cdee
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/fold-parity-7.c
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+int foo(unsigned int x)
+{
+#if __SIZEOF_INT__ == 4
+  unsigned int y = (x>>4) | (x<<28);
+  return __builtin_parity(y);
+#elif __SIZEOF_INT__ == 2
+  unsigned int y = (x>>4) | (x<<12);
+  return __builtin_parity(y);
+#else
+  return x;
+#endif
+}
+
+int bar(unsigned long x)
+{
+#if __SIZEOF_LONG__ == 8
+  unsigned long y = (x>>4) | (x<<60);
+  return __builtin_parityl (y);
+#elif __SIZEOF_LONG__ == 4
+  unsigned long y = (x>>4) | (x<<28);
+  return __builtin_parityl (y);
+#else
+  return x;
+#endif
+}
+
+int baz(unsigned long long x)
+{
+#if __SIZEOF_LONG_LONG__ == 8
+  unsigned long long y = (x>>4) | (x<<60);
+  return __builtin_parityll (y);
+#elif __SIZEOF_LONG_LONG__ == 4
+  unsigned long long y = (x>>4) | (x<<28);
+  return __builtin_parityll (y);
+#else
+  return x;
+#endif
+}
+
+/* { dg-final { scan-tree-dump-times " r>> " 0 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/fold-popcount-6.c b/gcc/testsuite/gcc.dg/fold-popcount-6.c
new file mode 100644
index 0000000..e7f38d0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/fold-popcount-6.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+int foo(unsigned int x)
+{
+#if __SIZEOF_INT__ == 4
+  return __builtin_popcount (__builtin_bswap32(x));
+#elif __SIZEOF_INT__ == 2
+  return __builtin_popcount (__builtin_bswap16(x));
+#else
+  return x;
+#endif
+}
+
+int bar(unsigned long x)
+{
+#if __SIZEOF_LONG__ == 8
+  return __builtin_popcountl (__builtin_bswap64(x));
+#elif __SIZEOF_LONG__ == 4
+  return __builtin_popcountl (__builtin_bswap32(x));
+#else
+  return x;
+#endif
+}
+
+int baz(unsigned long long x)
+{
+#if __SIZEOF_LONG_LONG__ == 8
+  return __builtin_popcountll (__builtin_bswap64(x));
+#elif __SIZEOF_LONG_LONG__ == 4
+  return __builtin_popcountll (__builtin_bswap32(x));
+#else
+  return x;
+#endif
+}
+
+/* { dg-final { scan-tree-dump-times "bswap" 0 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/fold-popcount-7.c b/gcc/testsuite/gcc.dg/fold-popcount-7.c
new file mode 100644
index 0000000..dcfdf5f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/fold-popcount-7.c
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+int foo(unsigned int x)
+{
+#if __SIZEOF_INT__ == 4
+  unsigned int y = (x>>4) | (x<<28);
+  return __builtin_popcount(y);
+#elif __SIZEOF_INT__ == 2
+  unsigned int y = (x>>4) | (x<<12);
+  return __builtin_popcount(y);
+#else
+  return x;
+#endif
+}
+
+int bar(unsigned long x)
+{
+#if __SIZEOF_LONG__ == 8
+  unsigned long y = (x>>4) | (x<<60);
+  return __builtin_popcountl (y);
+#elif __SIZEOF_LONG__ == 4
+  unsigned long y = (x>>4) | (x<<28);
+  return __builtin_popcountl (y);
+#else
+  return x;
+#endif
+}
+
+int baz(unsigned long long x)
+{
+#if __SIZEOF_LONG_LONG__ == 8
+  unsigned long long y = (x>>4) | (x<<60);
+  return __builtin_popcountll (y);
+#elif __SIZEOF_LONG_LONG__ == 4
+  unsigned long long y = (x>>4) | (x<<28);
+  return __builtin_popcountll (y);
+#else
+  return x;
+#endif
+}
+
+/* { dg-final { scan-tree-dump-times " r>> " 0 "optimized" } } */

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH take #3] match.pd: Simplify popcount/parity of bswap/rotate.
  2023-05-10 14:46 [PATCH take #3] match.pd: Simplify popcount/parity of bswap/rotate Roger Sayle
@ 2023-05-10 15:47 ` Bernhard Reutner-Fischer
  2023-05-10 23:15   ` Jeff Law
  0 siblings, 1 reply; 3+ messages in thread
From: Bernhard Reutner-Fischer @ 2023-05-10 15:47 UTC (permalink / raw)
  To: gcc-patches, Roger Sayle, 'GCC Patches'; +Cc: 'Marc Glisse'

Hi Roger!
On 10 May 2023 16:46:10 CEST, Roger Sayle <roger@nextmovesoftware.com> wrote:

Just a nit:

+/* { dg-final { scan-tree-dump-times "bswap" 0 "optimized" } } */

Can you please use scan-tree-dump-not instead?
thanks,

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH take #3] match.pd: Simplify popcount/parity of bswap/rotate.
  2023-05-10 15:47 ` Bernhard Reutner-Fischer
@ 2023-05-10 23:15   ` Jeff Law
  0 siblings, 0 replies; 3+ messages in thread
From: Jeff Law @ 2023-05-10 23:15 UTC (permalink / raw)
  To: Bernhard Reutner-Fischer, gcc-patches, Roger Sayle; +Cc: 'Marc Glisse'



On 5/10/23 09:47, Bernhard Reutner-Fischer via Gcc-patches wrote:
> Hi Roger!
> On 10 May 2023 16:46:10 CEST, Roger Sayle <roger@nextmovesoftware.com> wrote:
> 
> Just a nit:
> 
> +/* { dg-final { scan-tree-dump-times "bswap" 0 "optimized" } } */
> 
> Can you please use scan-tree-dump-not instead?
OK with that change.  I considered asking for the patterns to be merged, 
but I think it's easier to read with them separate.

jeff

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2023-05-10 23:15 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-10 14:46 [PATCH take #3] match.pd: Simplify popcount/parity of bswap/rotate Roger Sayle
2023-05-10 15:47 ` Bernhard Reutner-Fischer
2023-05-10 23:15   ` Jeff Law

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).