public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] loop-niter: Recognize popcount idioms even with char, short and __int128 [PR95771]
@ 2021-01-03  9:30 Jakub Jelinek
  2021-01-04 11:45 ` Richard Biener
  0 siblings, 1 reply; 2+ messages in thread
From: Jakub Jelinek @ 2021-01-03  9:30 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches

Hi!

As the testcase shows, we punt unnecessarily on popcount loop idioms if
the type is smaller than int or larger than long long.
Smaller type than int can be handled by zero-extending the argument to
unsigned int, and types twice as long as long long by doing
__builtin_popcountll on both halves of the __int128.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2020-01-03  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/95771
	* tree-ssa-loop-niter.c (number_of_iterations_popcount): Handle types
	with precision smaller than int's precision and types with precision
	twice as large as long long.  Formatting fixes.

	* gcc.target/i386/pr95771.c: New test.

--- gcc/tree-ssa-loop-niter.c.jj	2020-10-12 12:30:34.348813027 +0200
+++ gcc/tree-ssa-loop-niter.c	2021-01-02 19:38:17.858170445 +0100
@@ -2666,27 +2666,45 @@ number_of_iterations_popcount (loop_p lo
 
   /* We found a match. Get the corresponding popcount builtin.  */
   tree src = gimple_phi_arg_def (phi, loop_preheader_edge (loop)->dest_idx);
-  if (TYPE_PRECISION (TREE_TYPE (src)) == TYPE_PRECISION (integer_type_node))
+  if (TYPE_PRECISION (TREE_TYPE (src)) <= TYPE_PRECISION (integer_type_node))
     fn = builtin_decl_implicit (BUILT_IN_POPCOUNT);
-  else if (TYPE_PRECISION (TREE_TYPE (src)) == TYPE_PRECISION
-	   (long_integer_type_node))
+  else if (TYPE_PRECISION (TREE_TYPE (src))
+	   == TYPE_PRECISION (long_integer_type_node))
     fn = builtin_decl_implicit (BUILT_IN_POPCOUNTL);
-  else if (TYPE_PRECISION (TREE_TYPE (src)) == TYPE_PRECISION
-	   (long_long_integer_type_node))
+  else if (TYPE_PRECISION (TREE_TYPE (src))
+	   == TYPE_PRECISION (long_long_integer_type_node)
+	   || (TYPE_PRECISION (TREE_TYPE (src))
+	       == 2 * TYPE_PRECISION (long_long_integer_type_node)))
     fn = builtin_decl_implicit (BUILT_IN_POPCOUNTLL);
 
-  /* ??? Support promoting char/short to int.  */
   if (!fn)
     return false;
 
   /* Update NITER params accordingly  */
   tree utype = unsigned_type_for (TREE_TYPE (src));
   src = fold_convert (utype, src);
-  tree call = fold_convert (utype, build_call_expr (fn, 1, src));
+  if (TYPE_PRECISION (TREE_TYPE (src)) < TYPE_PRECISION (integer_type_node))
+    src = fold_convert (unsigned_type_node, src);
+  tree call;
+  if (TYPE_PRECISION (TREE_TYPE (src))
+      == 2 * TYPE_PRECISION (long_long_integer_type_node))
+    {
+      int prec = TYPE_PRECISION (long_long_integer_type_node);
+      tree src1 = fold_convert (long_long_unsigned_type_node,
+				fold_build2 (RSHIFT_EXPR, TREE_TYPE (src),
+					     unshare_expr (src),
+					     build_int_cst (integer_type_node,
+							    prec)));
+      tree src2 = fold_convert (long_long_unsigned_type_node, src);
+      call = build_call_expr (fn, 1, src1);
+      call = fold_build2 (PLUS_EXPR, TREE_TYPE (call), call,
+			  build_call_expr (fn, 1, src2));
+      call = fold_convert (utype, call);
+    }
+  else
+    call = fold_convert (utype, build_call_expr (fn, 1, src));
   if (adjust)
-    iter = fold_build2 (MINUS_EXPR, utype,
-			call,
-			build_int_cst (utype, 1));
+    iter = fold_build2 (MINUS_EXPR, utype, call, build_int_cst (utype, 1));
   else
     iter = call;
 
@@ -2703,10 +2721,9 @@ number_of_iterations_popcount (loop_p lo
   if (adjust)
     {
       tree may_be_zero = fold_build2 (EQ_EXPR, boolean_type_node, src,
-				      build_zero_cst
-				      (TREE_TYPE (src)));
-      niter->may_be_zero =
-	simplify_using_initial_conditions (loop, may_be_zero);
+				      build_zero_cst (TREE_TYPE (src)));
+      niter->may_be_zero
+	= simplify_using_initial_conditions (loop, may_be_zero);
     }
   else
     niter->may_be_zero = boolean_false_node;
--- gcc/testsuite/gcc.target/i386/pr95771.c.jj	2021-01-02 19:53:27.499768266 +0100
+++ gcc/testsuite/gcc.target/i386/pr95771.c	2021-01-02 19:53:12.782936545 +0100
@@ -0,0 +1,67 @@
+/* PR tree-optimization/95771 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mpopcnt -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times " = __builtin_popcount" 6 "optimized" { target int128 } } } */
+/* { dg-final { scan-tree-dump-times " = __builtin_popcount" 4 "optimized" { target { ! int128 } } } } */
+
+int
+foo (unsigned char x)
+{
+  int i = 0;
+  while (x)
+    {
+      x &= x - 1;
+      ++i;
+    }
+  return i;
+}
+
+int
+bar (unsigned short x)
+{
+  int i = 0;
+  while (x)
+    {
+      x &= x - 1;
+      ++i;
+    }
+  return i;
+}
+
+int
+baz (unsigned int x)
+{
+  int i = 0;
+  while (x)
+    {
+      x &= x - 1;
+      ++i;
+    }
+  return i;
+}
+
+int
+qux (unsigned long long x)
+{
+  int i = 0;
+  while (x)
+    {
+      x &= x - 1;
+      ++i;
+    }
+  return i;
+}
+
+#ifdef __SIZEOF_INT128__
+int
+corge (unsigned __int128 x)
+{
+  int i = 0;
+  while (x)
+    {
+      x &= x - 1;
+      ++i;
+    }
+  return i;
+}
+#endif

	Jakub


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] loop-niter: Recognize popcount idioms even with char, short and __int128 [PR95771]
  2021-01-03  9:30 [PATCH] loop-niter: Recognize popcount idioms even with char, short and __int128 [PR95771] Jakub Jelinek
@ 2021-01-04 11:45 ` Richard Biener
  0 siblings, 0 replies; 2+ messages in thread
From: Richard Biener @ 2021-01-04 11:45 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches

On Sun, 3 Jan 2021, Jakub Jelinek wrote:

> Hi!
> 
> As the testcase shows, we punt unnecessarily on popcount loop idioms if
> the type is smaller than int or larger than long long.
> Smaller type than int can be handled by zero-extending the argument to
> unsigned int, and types twice as long as long long by doing
> __builtin_popcountll on both halves of the __int128.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Thanks,
Richard.

> 2020-01-03  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR tree-optimization/95771
> 	* tree-ssa-loop-niter.c (number_of_iterations_popcount): Handle types
> 	with precision smaller than int's precision and types with precision
> 	twice as large as long long.  Formatting fixes.
> 
> 	* gcc.target/i386/pr95771.c: New test.
> 
> --- gcc/tree-ssa-loop-niter.c.jj	2020-10-12 12:30:34.348813027 +0200
> +++ gcc/tree-ssa-loop-niter.c	2021-01-02 19:38:17.858170445 +0100
> @@ -2666,27 +2666,45 @@ number_of_iterations_popcount (loop_p lo
>  
>    /* We found a match. Get the corresponding popcount builtin.  */
>    tree src = gimple_phi_arg_def (phi, loop_preheader_edge (loop)->dest_idx);
> -  if (TYPE_PRECISION (TREE_TYPE (src)) == TYPE_PRECISION (integer_type_node))
> +  if (TYPE_PRECISION (TREE_TYPE (src)) <= TYPE_PRECISION (integer_type_node))
>      fn = builtin_decl_implicit (BUILT_IN_POPCOUNT);
> -  else if (TYPE_PRECISION (TREE_TYPE (src)) == TYPE_PRECISION
> -	   (long_integer_type_node))
> +  else if (TYPE_PRECISION (TREE_TYPE (src))
> +	   == TYPE_PRECISION (long_integer_type_node))
>      fn = builtin_decl_implicit (BUILT_IN_POPCOUNTL);
> -  else if (TYPE_PRECISION (TREE_TYPE (src)) == TYPE_PRECISION
> -	   (long_long_integer_type_node))
> +  else if (TYPE_PRECISION (TREE_TYPE (src))
> +	   == TYPE_PRECISION (long_long_integer_type_node)
> +	   || (TYPE_PRECISION (TREE_TYPE (src))
> +	       == 2 * TYPE_PRECISION (long_long_integer_type_node)))
>      fn = builtin_decl_implicit (BUILT_IN_POPCOUNTLL);
>  
> -  /* ??? Support promoting char/short to int.  */
>    if (!fn)
>      return false;
>  
>    /* Update NITER params accordingly  */
>    tree utype = unsigned_type_for (TREE_TYPE (src));
>    src = fold_convert (utype, src);
> -  tree call = fold_convert (utype, build_call_expr (fn, 1, src));
> +  if (TYPE_PRECISION (TREE_TYPE (src)) < TYPE_PRECISION (integer_type_node))
> +    src = fold_convert (unsigned_type_node, src);
> +  tree call;
> +  if (TYPE_PRECISION (TREE_TYPE (src))
> +      == 2 * TYPE_PRECISION (long_long_integer_type_node))
> +    {
> +      int prec = TYPE_PRECISION (long_long_integer_type_node);
> +      tree src1 = fold_convert (long_long_unsigned_type_node,
> +				fold_build2 (RSHIFT_EXPR, TREE_TYPE (src),
> +					     unshare_expr (src),
> +					     build_int_cst (integer_type_node,
> +							    prec)));
> +      tree src2 = fold_convert (long_long_unsigned_type_node, src);
> +      call = build_call_expr (fn, 1, src1);
> +      call = fold_build2 (PLUS_EXPR, TREE_TYPE (call), call,
> +			  build_call_expr (fn, 1, src2));
> +      call = fold_convert (utype, call);
> +    }
> +  else
> +    call = fold_convert (utype, build_call_expr (fn, 1, src));
>    if (adjust)
> -    iter = fold_build2 (MINUS_EXPR, utype,
> -			call,
> -			build_int_cst (utype, 1));
> +    iter = fold_build2 (MINUS_EXPR, utype, call, build_int_cst (utype, 1));
>    else
>      iter = call;
>  
> @@ -2703,10 +2721,9 @@ number_of_iterations_popcount (loop_p lo
>    if (adjust)
>      {
>        tree may_be_zero = fold_build2 (EQ_EXPR, boolean_type_node, src,
> -				      build_zero_cst
> -				      (TREE_TYPE (src)));
> -      niter->may_be_zero =
> -	simplify_using_initial_conditions (loop, may_be_zero);
> +				      build_zero_cst (TREE_TYPE (src)));
> +      niter->may_be_zero
> +	= simplify_using_initial_conditions (loop, may_be_zero);
>      }
>    else
>      niter->may_be_zero = boolean_false_node;
> --- gcc/testsuite/gcc.target/i386/pr95771.c.jj	2021-01-02 19:53:27.499768266 +0100
> +++ gcc/testsuite/gcc.target/i386/pr95771.c	2021-01-02 19:53:12.782936545 +0100
> @@ -0,0 +1,67 @@
> +/* PR tree-optimization/95771 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mpopcnt -fdump-tree-optimized" } */
> +/* { dg-final { scan-tree-dump-times " = __builtin_popcount" 6 "optimized" { target int128 } } } */
> +/* { dg-final { scan-tree-dump-times " = __builtin_popcount" 4 "optimized" { target { ! int128 } } } } */
> +
> +int
> +foo (unsigned char x)
> +{
> +  int i = 0;
> +  while (x)
> +    {
> +      x &= x - 1;
> +      ++i;
> +    }
> +  return i;
> +}
> +
> +int
> +bar (unsigned short x)
> +{
> +  int i = 0;
> +  while (x)
> +    {
> +      x &= x - 1;
> +      ++i;
> +    }
> +  return i;
> +}
> +
> +int
> +baz (unsigned int x)
> +{
> +  int i = 0;
> +  while (x)
> +    {
> +      x &= x - 1;
> +      ++i;
> +    }
> +  return i;
> +}
> +
> +int
> +qux (unsigned long long x)
> +{
> +  int i = 0;
> +  while (x)
> +    {
> +      x &= x - 1;
> +      ++i;
> +    }
> +  return i;
> +}
> +
> +#ifdef __SIZEOF_INT128__
> +int
> +corge (unsigned __int128 x)
> +{
> +  int i = 0;
> +  while (x)
> +    {
> +      x &= x - 1;
> +      ++i;
> +    }
> +  return i;
> +}
> +#endif
> 
> 	Jakub
> 
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2021-01-04 11:45 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-01-03  9:30 [PATCH] loop-niter: Recognize popcount idioms even with char, short and __int128 [PR95771] Jakub Jelinek
2021-01-04 11:45 ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).