public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] PR tree-optimization/90836 Missing popcount pattern matching
@ 2019-09-05 15:35 Dmitrij Pochepko
  2019-09-06 10:23 ` Richard Biener
  2019-09-09 19:03 ` Dmitrij Pochepko
  0 siblings, 2 replies; 13+ messages in thread
From: Dmitrij Pochepko @ 2019-09-05 15:35 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 1252 bytes --]

This patch adds matching for Hamming weight (popcount) implementation. The following sources:

int
foo64 (unsigned long long a)
{
    unsigned long long b = a;
    b -= ((b>>1) & 0x5555555555555555ULL);
    b = ((b>>2) & 0x3333333333333333ULL) + (b & 0x3333333333333333ULL);
    b = ((b>>4) + b) & 0x0F0F0F0F0F0F0F0FULL;
    b *= 0x0101010101010101ULL;
    return (int)(b >> 56);
}

and

int
foo32 (unsigned int a)
{
    unsigned long b = a;
    b -= ((b>>1) & 0x55555555UL);
    b = ((b>>2) & 0x33333333UL) + (b & 0x33333333UL);
    b = ((b>>4) + b) & 0x0F0F0F0FUL;
    b *= 0x01010101UL;
    return (int)(b >> 24);
}

and equivalents are now recognized as popcount for platforms with hw popcount support. Bootstrapped and tested on x86_64-pc-linux-gnu and aarch64-linux-gnu systems with no regressions. 

(I have no write access to repo)

Thanks,
Dmitrij


gcc/ChangeLog:

	PR tree-optimization/90836

	* gcc/match.pd (popcount): New pattern.

gcc/testsuite/ChangeLog:

	PR tree-optimization/90836

	* lib/target-supports.exp (check_effective_target_popcount)
	(check_effective_target_popcountll): New effective targets.
	* gcc.dg/tree-ssa/popcount4.c: New test.
	* gcc.dg/tree-ssa/popcount4l.c: New test.
	* gcc.dg/tree-ssa/popcount4ll.c: New test.

[-- Attachment #2: popcount.diff --]
[-- Type: text/x-diff, Size: 6051 bytes --]

diff --git a/gcc/match.pd b/gcc/match.pd
index 0317bc7..b1867bf 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -5358,6 +5358,70 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
       (cmp (popcount @0) integer_zerop)
       (rep @0 { build_zero_cst (TREE_TYPE (@0)); }))))
 
+/* 64- and 32-bits branchless implementations of popcount are detected:
+
+   int popcount64c (uint64_t x)
+   {
+     x -= (x >> 1) & 0x5555555555555555ULL;
+     x = (x & 0x3333333333333333ULL) + ((x >> 2) & 0x3333333333333333ULL);
+     x = (x + (x >> 4)) & 0x0f0f0f0f0f0f0f0fULL;
+     return (x * 0x0101010101010101ULL) >> 56;
+   }
+
+   int popcount32c (uint32_t x)
+   {
+     x -= (x >> 1) & 0x55555555;
+     x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
+     x = (x + (x >> 4)) & 0x0f0f0f0f;
+     return (x * 0x01010101) >> 24;
+   }  */
+(simplify
+  (convert
+    (rshift
+      (mult
+	(bit_and:c
+	  (plus:c
+	    (rshift @8 INTEGER_CST@5)
+	    (plus:c@8
+	      (bit_and @6 INTEGER_CST@7)
+	      (bit_and
+		(rshift
+		  (minus@6
+		    @0
+		    (bit_and
+		      (rshift @0 INTEGER_CST@4)
+		      INTEGER_CST@11))
+		  INTEGER_CST@10)
+		INTEGER_CST@9)))
+	  INTEGER_CST@3)
+	INTEGER_CST@2)
+      INTEGER_CST@1))
+  /* Check constants and optab.  */
+  (with
+     {
+       tree argtype = TREE_TYPE (@0);
+       unsigned prec = TYPE_PRECISION (argtype);
+       int shift = TYPE_PRECISION (long_long_unsigned_type_node) - prec;
+       const unsigned long long c1 = 0x0101010101010101ULL >> shift,
+				c2 = 0x0F0F0F0F0F0F0F0FULL >> shift,
+				c3 = 0x3333333333333333ULL >> shift,
+				c4 = 0x5555555555555555ULL >> shift;
+     }
+    (if (types_match (type, integer_type_node) && tree_to_uhwi (@4) == 1
+	  && tree_to_uhwi (@10) == 2 && tree_to_uhwi (@5) == 4
+	  && tree_to_uhwi (@1) == prec - 8 && tree_to_uhwi (@2) == c1
+	  && tree_to_uhwi (@3) == c2 && tree_to_uhwi (@9) == c3
+	  && tree_to_uhwi (@7) == c3 && tree_to_uhwi (@11) == c4
+	  && optab_handler (popcount_optab, TYPE_MODE (argtype))
+	    != CODE_FOR_nothing)
+	(switch
+	    (if (types_match (argtype, long_long_unsigned_type_node))
+	      (BUILT_IN_POPCOUNTLL @0))
+	    (if (types_match (argtype, long_unsigned_type_node))
+	      (BUILT_IN_POPCOUNTL @0))
+	    (if (types_match (argtype, unsigned_type_node))
+	      (BUILT_IN_POPCOUNT @0))))))
+
 /* Simplify:
 
      a = a1 op a2
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount4.c b/gcc/testsuite/gcc.dg/tree-ssa/popcount4.c
new file mode 100644
index 0000000..9f759f8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount4.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target popcount } */
+/* { dg-require-effective-target int32plus } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+const unsigned m1  = 0x55555555UL;
+const unsigned m2  = 0x33333333UL;
+const unsigned m4  = 0x0F0F0F0FUL;
+const unsigned h01 = 0x01010101UL;
+const int shift = 24;
+
+int popcount64c(unsigned x)
+{
+    x -= (x >> 1) & m1;
+    x = (x & m2) + ((x >> 2) & m2);
+    x = (x + (x >> 4)) & m4;
+    return (x * h01) >> shift;
+}
+
+/* { dg-final { scan-tree-dump-times "__builtin_popcount" 1 "optimized" } } */
+
+
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount4l.c b/gcc/testsuite/gcc.dg/tree-ssa/popcount4l.c
new file mode 100644
index 0000000..ab33f79
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount4l.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target popcountl } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+#if __SIZEOF_LONG__ == 4
+const unsigned long m1  = 0x55555555UL;
+const unsigned long m2  = 0x33333333UL;
+const unsigned long m4  = 0x0F0F0F0FUL;
+const unsigned long h01 = 0x01010101UL;
+const int shift = 24;
+#else
+const unsigned long m1  = 0x5555555555555555UL;
+const unsigned long m2  = 0x3333333333333333UL;
+const unsigned long m4  = 0x0f0f0f0f0f0f0f0fUL;
+const unsigned long h01 = 0x0101010101010101UL;
+const int shift = 56;
+#endif
+
+
+int popcount64c(unsigned long x)
+{
+    x -= (x >> 1) & m1;
+    x = (x & m2) + ((x >> 2) & m2);
+    x = (x + (x >> 4)) & m4;
+    return (x * h01) >> shift;
+}
+
+/* { dg-final { scan-tree-dump-times "__builtin_popcountl" 1 "optimized" } } */
+
+
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount4ll.c b/gcc/testsuite/gcc.dg/tree-ssa/popcount4ll.c
new file mode 100644
index 0000000..f3755f1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount4ll.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target popcountll } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+const unsigned long long m1  = 0x5555555555555555ULL;
+const unsigned long long m2  = 0x3333333333333333ULL;
+const unsigned long long m4  = 0x0F0F0F0F0F0F0F0FULL;
+const unsigned long long h01 = 0x0101010101010101ULL;
+const int shift = 56;
+
+int popcount64c(unsigned long long x)
+{
+    x -= (x >> 1) & m1;
+    x = (x & m2) + ((x >> 2) & m2);
+    x = (x + (x >> 4)) & m4;
+    return (x * h01) >> shift;
+}
+
+/* { dg-final { scan-tree-dump-times "__builtin_popcountll" 1 "optimized" } } */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 3c50b89..32bbad7 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -6855,6 +6855,29 @@ proc check_effective_target_popcountl { } {
     } "" ]
 }
 
+# Return 1 if the target supports popcount on long long.
+
+proc check_effective_target_popcountll { } {
+    return [check_no_messages_and_pattern popcountll "!\\(call" rtl-expand {
+        int foo (long long b)
+          {
+            return __builtin_popcountll (b);
+          }
+    } "" ]
+}
+
+
+# Return 1 if the target supports popcount on int.
+
+proc check_effective_target_popcount { } {
+    return [check_no_messages_and_pattern popcount "!\\(call" rtl-expand {
+        int foo (int b)
+          {
+            return __builtin_popcount (b);
+          }
+    } "" ]
+}
+
 # Return 1 if the target supports atomic operations on "long long"
 # and can execute them.
 #

^ permalink raw reply	[flat|nested] 13+ messages in thread
* Re: [PATCH] PR tree-optimization/90836 Missing popcount pattern matching
@ 2019-09-06 12:13 Wilco Dijkstra
  2019-09-09  8:24 ` Richard Biener
  2019-09-09 18:59 ` Dmitrij Pochepko
  0 siblings, 2 replies; 13+ messages in thread
From: Wilco Dijkstra @ 2019-09-06 12:13 UTC (permalink / raw)
  To: Richard Biener, dmitrij.pochepko, GCC Patches; +Cc: nd

Hi,

+(simplify
+  (convert
+    (rshift
+      (mult

> is the outer convert really necessary?  That is, if we change
> the simplification result to

Indeed that should be "convert?" to make it optional.

> Is the Hamming weight popcount
> faster than the libgcc table-based approach?  I wonder if we really
> need to restrict this conversion to the case where the target
> has an expander.

Well libgcc uses the exact same sequence (not a table):

objdump -d ./aarch64-unknown-linux-gnu/libgcc/_popcountsi2.o

0000000000000000 <__popcountdi2>:
   0:	d341fc01 	lsr	x1, x0, #1
   4:	b200c3e3 	mov	x3, #0x101010101010101     	// #72340172838076673
   8:	9200f021 	and	x1, x1, #0x5555555555555555
   c:	cb010001 	sub	x1, x0, x1
  10:	9200e422 	and	x2, x1, #0x3333333333333333
  14:	d342fc21 	lsr	x1, x1, #2
  18:	9200e421 	and	x1, x1, #0x3333333333333333
  1c:	8b010041 	add	x1, x2, x1
  20:	8b411021 	add	x1, x1, x1, lsr #4
  24:	9200cc20 	and	x0, x1, #0xf0f0f0f0f0f0f0f
  28:	9b037c00 	mul	x0, x0, x3
  2c:	d378fc00 	lsr	x0, x0, #56
  30:	d65f03c0 	ret

So if you don't check for an expander you get an endless loop in libgcc since
the makefile doesn't appear to use -fno-builtin anywhere...

Wilco

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2019-12-29 19:49 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-09-05 15:35 [PATCH] PR tree-optimization/90836 Missing popcount pattern matching Dmitrij Pochepko
2019-09-06 10:23 ` Richard Biener
2019-09-09 18:54   ` Dmitrij Pochepko
2019-09-09 19:03 ` Dmitrij Pochepko
2019-09-24 15:29   ` Dmitrij Pochepko
2019-09-26  7:47     ` Richard Biener
2019-10-01 11:49       ` Dmitrij Pochepko
2019-10-07 10:04         ` Richard Biener
2019-10-08 22:14           ` Steve Ellcey
2019-12-29 22:27           ` Andrew Pinski
2019-09-06 12:13 Wilco Dijkstra
2019-09-09  8:24 ` Richard Biener
2019-09-09 18:59 ` Dmitrij Pochepko

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).