From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1006) id 788763835419; Fri, 28 May 2021 08:44:14 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 788763835419 MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Joern Rennecke To: gcc-cvs@gcc.gnu.org Subject: [gcc r12-1107] Recognize popcount also when a double width operation is needed. X-Act-Checkin: gcc X-Git-Author: Joern Rennecke X-Git-Refname: refs/heads/master X-Git-Oldrev: c9114f2804b91690e030383de15a24e0b738e856 X-Git-Newrev: 5b43f6ace51c08dc2bae3c91a2a11300356c573d Message-Id: <20210528084414.788763835419@sourceware.org> Date: Fri, 28 May 2021 08:44:14 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 28 May 2021 08:44:14 -0000 https://gcc.gnu.org/g:5b43f6ace51c08dc2bae3c91a2a11300356c573d commit r12-1107-g5b43f6ace51c08dc2bae3c91a2a11300356c573d Author: Joern Rennecke Date: Fri May 28 09:34:07 2021 +0100 Recognize popcount also when a double width operation is needed. 2021-05-28 Joern Rennecke gcc/ * match.pd : When generating popcount directly fails, try doing it in two halves. gcc/testsuite/ * gcc.dg/tree-ssa/popcount4ll.c: Remove lp64 condition. Adjust scanning pattern for !lp64. * gcc.dg/tree-ssa/popcount5ll.c: Likewise. * gcc.dg/tree-ssa/popcount4l.c: Adjust scanning pattern for ! int32plus. Co-Authored-By: Richard Biener Diff: --- gcc/match.pd | 29 +++++++++++++++++++++++++---- gcc/testsuite/gcc.dg/tree-ssa/popcount4l.c | 3 ++- gcc/testsuite/gcc.dg/tree-ssa/popcount4ll.c | 5 +++-- gcc/testsuite/gcc.dg/tree-ssa/popcount5ll.c | 5 +++-- 4 files changed, 33 insertions(+), 9 deletions(-) diff --git a/gcc/match.pd b/gcc/match.pd index b60e2703f60..d06ff170684 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -6642,10 +6642,31 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) && tree_to_uhwi (@3) == c2 && tree_to_uhwi (@9) == c3 && tree_to_uhwi (@7) == c3 - && tree_to_uhwi (@11) == c4 - && direct_internal_fn_supported_p (IFN_POPCOUNT, type, - OPTIMIZE_FOR_BOTH)) - (convert (IFN_POPCOUNT:type @0))))) + && tree_to_uhwi (@11) == c4) + (if (direct_internal_fn_supported_p (IFN_POPCOUNT, type, + OPTIMIZE_FOR_BOTH)) + (convert (IFN_POPCOUNT:type @0)) + /* Try to do popcount in two halves. PREC must be at least + five bits for this to work without extension before adding. */ + (with { + tree half_type = NULL_TREE; + opt_machine_mode m = mode_for_size ((prec + 1) / 2, MODE_INT, 1); + int half_prec = 8; + if (m.exists () + && m.require () != TYPE_MODE (type)) + { + half_prec = GET_MODE_PRECISION (as_a (m)); + half_type = build_nonstandard_integer_type (half_prec, 1); + } + gcc_assert (half_prec > 2); + } + (if (half_type != NULL_TREE + && direct_internal_fn_supported_p (IFN_POPCOUNT, half_type, + OPTIMIZE_FOR_BOTH)) + (convert (plus + (IFN_POPCOUNT:half_type (convert @0)) + (IFN_POPCOUNT:half_type (convert (rshift @0 + { build_int_cst (integer_type_node, half_prec); } ))))))))))) /* __builtin_ffs needs to deal on many targets with the possible zero argument. If we know the argument is always non-zero, __builtin_ctz + 1 diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount4l.c b/gcc/testsuite/gcc.dg/tree-ssa/popcount4l.c index 69fb2d1134d..269e56e90f9 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/popcount4l.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount4l.c @@ -25,6 +25,7 @@ int popcount64c(unsigned long x) return (x * h01) >> shift; } -/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" { target int32plus } } } */ +/* { dg-final { scan-tree-dump "\.POPCOUNT" "optimized" { target { ! int32plus } } } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount4ll.c b/gcc/testsuite/gcc.dg/tree-ssa/popcount4ll.c index c1588be68e4..7abadf6df04 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/popcount4ll.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount4ll.c @@ -1,4 +1,4 @@ -/* { dg-do compile { target { lp64 } } } */ +/* { dg-do compile } */ /* { dg-require-effective-target popcountll } */ /* { dg-options "-O2 -fdump-tree-optimized" } */ @@ -16,4 +16,5 @@ int popcount64c(unsigned long long x) return (x * h01) >> shift; } -/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" { target { lp64 } } } } */ +/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 2 "optimized" { target { ! lp64 } } } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount5ll.c b/gcc/testsuite/gcc.dg/tree-ssa/popcount5ll.c index edb191bf894..2afe08124fe 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/popcount5ll.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount5ll.c @@ -1,5 +1,5 @@ /* PR tree-optimization/94800 */ -/* { dg-do compile { target { lp64 } } } */ +/* { dg-do compile } */ /* { dg-require-effective-target popcountll } */ /* { dg-options "-O2 -fdump-tree-optimized" } */ @@ -19,4 +19,5 @@ int popcount64c(unsigned long long x) return x >> shift; } -/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" { target { lp64 } } } } */ +/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 2 "optimized" { target { ! lp64 } } } } */