From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from EUR04-VI1-obe.outbound.protection.outlook.com (mail-eopbgr80057.outbound.protection.outlook.com [40.107.8.57]) by sourceware.org (Postfix) with ESMTPS id 2EF923858D1E for ; Fri, 11 Nov 2022 19:05:32 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 2EF923858D1E Authentication-Results: sourceware.org; dmarc=pass (p=none dis=none) header.from=arm.com Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=arm.com ARC-Seal: i=2; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=pass; b=XqJJar3ficrPHObNluZ2j1mzDoezKDmk3u42AuW9DFMKT8Xdl30wdo5AF8suovkGEheYLEsHPYWgDrcAkKnCuVgxHNMOQMuEKXd3/GW8EgFPabcOFnh3R6xoYGOs20NSMN7skvyOduV81bL7+LXtQPSH2GdBzUZiFCA3xODgNwFoo7gUvP88uTsTokrU+QtIjt9qnAolPue91rPKNLRCGkH94J0vBmJ79TVsgTXZOrMf54ll1q/F+FjQsL82YB02Kj7nBV3Tonu9IcRmIK2ml5Oy80CJ6nFpzwEZgXFhq/xfpeLexaqXrPrE9IJocmAqj7knihvX4nvyiZrXrihxwQ== ARC-Message-Signature: i=2; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector9901; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1; bh=lS1+rFqSJCAXjTDxVAJDqxa3H/yOyEoTMjh1KIkTGQI=; b=Tuc+JQocA/G6I0KXZyRr8+DjJbsFxEH1Qs+hloI+xxutGwpSloE6gn53L/PdUC/+d0UYcpVsUZ8g1T/FjNNlkxPr1OKtXeC9JG5trcs7AdZa6n5V5ADs98Vdei/VsbDpku5xl0sCiUbu1LOXoVjnL84QxY2xtte45HiP3tOMuWnuGin5juQofj6Sv+0foS000ZjI6v/vIBFd5FMbFjZujVouPXmgm/9YKMIVVwuq5H2kHL6MUFXa9KywTxesD/L6+RE5WLlrb5Q0isg087HVEoLNoDK3HqByQWFmIGk4dF6zBJHeuBMtH+iyyugT7l8jrPsuP1W5/D6RLHHxMUTUdQ== ARC-Authentication-Results: i=2; mx.microsoft.com 1; spf=pass (sender ip is 63.35.35.123) smtp.rcpttodomain=gcc.gnu.org smtp.mailfrom=arm.com; dmarc=pass (p=none sp=none pct=100) action=none header.from=arm.com; dkim=pass (signature was verified) header.d=armh.onmicrosoft.com; arc=pass (0 oda=1 ltdi=1 spf=[1,1,smtp.mailfrom=arm.com] dkim=[1,1,header.d=arm.com] dmarc=[1,1,header.from=arm.com]) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=armh.onmicrosoft.com; s=selector2-armh-onmicrosoft-com; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-SenderADCheck; bh=lS1+rFqSJCAXjTDxVAJDqxa3H/yOyEoTMjh1KIkTGQI=; b=EARMVBDyRpP+WSNU/Qc9K1Rtu15zvcCwGPdQ9KeyGfhrAYSYLcNU8SGA/6Re11DqXIsruArjySIploqNucVIy8rNDsT/uN7Bxo3Hx4Go0+7y3yvMNbw1TkiyrwMP9k3qnw21UaVwuYTPE9FeYw64A0VKLiAy6ZVvdV3oR15yNW0= Received: from AM6P191CA0078.EURP191.PROD.OUTLOOK.COM (2603:10a6:209:8a::19) by VI1PR08MB5358.eurprd08.prod.outlook.com (2603:10a6:803:13c::8) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.20.5813.13; Fri, 11 Nov 2022 19:05:25 +0000 Received: from VI1EUR03FT049.eop-EUR03.prod.protection.outlook.com (2603:10a6:209:8a:cafe::d7) by AM6P191CA0078.outlook.office365.com (2603:10a6:209:8a::19) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.20.5791.27 via Frontend Transport; Fri, 11 Nov 2022 19:05:24 +0000 X-MS-Exchange-Authentication-Results: spf=pass (sender IP is 63.35.35.123) smtp.mailfrom=arm.com; dkim=pass (signature was verified) header.d=armh.onmicrosoft.com;dmarc=pass action=none header.from=arm.com; Received-SPF: Pass (protection.outlook.com: domain of arm.com designates 63.35.35.123 as permitted sender) receiver=protection.outlook.com; client-ip=63.35.35.123; helo=64aa7808-outbound-1.mta.getcheckrecipient.com; pr=C Received: from 64aa7808-outbound-1.mta.getcheckrecipient.com (63.35.35.123) by VI1EUR03FT049.mail.protection.outlook.com (100.127.144.168) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.20.5813.12 via Frontend Transport; Fri, 11 Nov 2022 19:05:24 +0000 Received: ("Tessian outbound b4aebcc5bc64:v130"); Fri, 11 Nov 2022 19:05:24 +0000 X-CheckRecipientChecked: true X-CR-MTA-CID: ddae8e65732d6958 X-CR-MTA-TID: 64aa7808 Received: from 9edbbafed9fb.1 by 64aa7808-outbound-1.mta.getcheckrecipient.com id EC0836C1-B6CB-46B9-8AE8-0C36166596A7.1; Fri, 11 Nov 2022 19:05:17 +0000 Received: from EUR04-HE1-obe.outbound.protection.outlook.com by 64aa7808-outbound-1.mta.getcheckrecipient.com with ESMTPS id 9edbbafed9fb.1 (version=TLSv1.2 cipher=ECDHE-RSA-AES256-GCM-SHA384); Fri, 11 Nov 2022 19:05:17 +0000 ARC-Seal: i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none; b=DggiMHnPyApE4P8f2SUahzOKcWcWbLFidYxUm9XP9sgzLZXQuo4dcd+u14raoHZonFsOmWD/2b+dWgMp0dnlzlAC2l4sjoH96ZzLDSVgn7cDBhxBShYdaY9FCaCR+Mi6YYp9ucHGFj5Y+BuUrC8tCsy06Ed0tXiRb8+986+S1b5euF1o29Pv7QJ3PXnt7D0BeRr2sRKqPI2EVwUWrViYrE0J5mBnZ/CtcdxGk9jsIX/Y6bjAvgP4yahQlk7qsKAEMvJg/V507t1dv6aurV8of/sEJD3KVcng3VfaNyJf1f/NcUcagLKsujHDHgXmPQAOpaTRa38lrlbWIetzWp6Qgw== ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector9901; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1; bh=lS1+rFqSJCAXjTDxVAJDqxa3H/yOyEoTMjh1KIkTGQI=; b=e+REEyfviBb+B+dRohvvDTPwSfHHruZcD8tUj/hZPCo6USqYjItUB+55rgAeaKayxORrzh5jkz+fp1sHcI1FuVHcf6tFHHkuUK2Tu4Jp0BDRXr7GfUOzZ5znZiqH6ctJAcHHe6SMGCrFFsEv4JnDMuhzwlnAXW34jv8XrxIHs891G4/2jEDlGUbixzffv9k89hUsotxdkkcIvBD7Erqps6RN3lQJ5SbkT6liSecxqKYtamR+FlVXPA0XOcOnkh1ktgUQne6hJVOuNM3SaxqEcHifs0Dlb9MfcqQwfCmsq6ygf2roGJVTm0O5kow4kRJYTduiS8lNOzmZkF+6B6K+xQ== ARC-Authentication-Results: i=1; mx.microsoft.com 1; spf=pass smtp.mailfrom=arm.com; dmarc=pass action=none header.from=arm.com; dkim=pass header.d=arm.com; arc=none DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=armh.onmicrosoft.com; s=selector2-armh-onmicrosoft-com; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-SenderADCheck; bh=lS1+rFqSJCAXjTDxVAJDqxa3H/yOyEoTMjh1KIkTGQI=; b=EARMVBDyRpP+WSNU/Qc9K1Rtu15zvcCwGPdQ9KeyGfhrAYSYLcNU8SGA/6Re11DqXIsruArjySIploqNucVIy8rNDsT/uN7Bxo3Hx4Go0+7y3yvMNbw1TkiyrwMP9k3qnw21UaVwuYTPE9FeYw64A0VKLiAy6ZVvdV3oR15yNW0= Authentication-Results-Original: dkim=none (message not signed) header.d=none;dmarc=none action=none header.from=arm.com; Received: from PAXPR08MB6686.eurprd08.prod.outlook.com (2603:10a6:102:13e::8) by VI1PR08MB5311.eurprd08.prod.outlook.com (2603:10a6:803:13a::19) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.20.5813.13; Fri, 11 Nov 2022 19:05:14 +0000 Received: from PAXPR08MB6686.eurprd08.prod.outlook.com ([fe80::44db:f442:b299:a701]) by PAXPR08MB6686.eurprd08.prod.outlook.com ([fe80::44db:f442:b299:a701%3]) with mapi id 15.20.5813.013; Fri, 11 Nov 2022 19:05:14 +0000 Date: Fri, 11 Nov 2022 19:01:10 +0000 From: Andrew Carlotti To: gcc-patches@gcc.gnu.org Subject: [PATCH 7/8] middle-end: Add c[lt]z idiom recognition Message-ID: References: Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: X-ClientProxiedBy: LO2P265CA0495.GBRP265.PROD.OUTLOOK.COM (2603:10a6:600:13a::20) To PAXPR08MB6686.eurprd08.prod.outlook.com (2603:10a6:102:13e::8) MIME-Version: 1.0 X-MS-TrafficTypeDiagnostic: PAXPR08MB6686:EE_|VI1PR08MB5311:EE_|VI1EUR03FT049:EE_|VI1PR08MB5358:EE_ X-MS-Office365-Filtering-Correlation-Id: b4bff7da-9eb9-4662-16bf-08dac417b054 x-checkrecipientrouted: true NoDisclaimer: true X-MS-Exchange-SenderADCheck: 1 X-MS-Exchange-AntiSpam-Relay: 0 X-Microsoft-Antispam-Untrusted: BCL:0; X-Microsoft-Antispam-Message-Info-Original: +AmX+qT2itEojFLFpgP7BW/3fVW8P6wNF8NpoPBY+rzOAo+BKkwjuNGjNrG5EQyVZc+FB2Hhc3FXYiXsDQwPNi7Gp/Zk2DYoucX/id07SDuKxcnRUy1kyvbU8y/+OwETB+IJaffNsutuVxclz2AfV04CVAgJcJKuRMYn9AXdLdUQ4bEyRm6fM3jK0Y6MlXBINpyIsYv4VUVfKCKFcCxTdoc4p5XR96hXzhHGrMHo3qp3PW8rwhCWxkkjo2v51YllAWWJ+YR6VQ1TGIvlUcAAsrT5v9mmZdicD28Hu1bvtBdkb8OsZmrjSVdF61L7GChP8FJqSRq/9AKkmfHpR9r6pZn5urPIqXw9EIO1GacOHWVnrnUOLAP9X/wGcI5PxWRJrZ4LB2R9Kb5MKOeS39RNasY5C/Xhk+zKW3StH3cr2R9+U4j3J/4x4nCc0Arv/2UrJ1A+Ji8AdYmrS5o+FrVjAcFQHbhZz/wGxUR3Bim7Ee95kE73DQi0Gtx47WcMnKHqH6Baye0euJGxTQzrVA8foXD/oCOFQsBDni5VTzljUTZzy77Yal8eElOWhu3HSUxzlFFsI+pEEzGHSy7kei7TO1j8vp1HMldb9lmWHPU7t1YHf7yzz9MqDEYDrLRIWFOXEaTZmizxRrRgLNHyiXn1XdwfjTfAio9yBDL4QxqCQXPIqwzXuANC6k9m0NgddNo0sqC21WabdOMsrn+jcfAXJfTundrvsJVdTupL8B/rXQIOhjhwS28rm8gsG9X7wK+hq3ThG6XPpUR8ukvWpL92nQzlWPRgoHcKTAoaNVeDVTZWWhqc33KgVP71++JTlSVI7Se5t+qVSol7IUA6awpRcA== X-Forefront-Antispam-Report-Untrusted: CIP:255.255.255.255;CTRY:;LANG:en;SCL:1;SRV:;IPV:NLI;SFV:NSPM;H:PAXPR08MB6686.eurprd08.prod.outlook.com;PTR:;CAT:NONE;SFS:(13230022)(4636009)(346002)(39860400002)(396003)(136003)(366004)(376002)(451199015)(84970400001)(5660300002)(316002)(66946007)(66556008)(66476007)(6916009)(86362001)(8936002)(8676002)(38100700002)(41300700001)(6512007)(478600001)(6486002)(6506007)(26005)(6666004)(44832011)(30864003)(2906002)(186003)(67856001);DIR:OUT;SFP:1101; X-MS-Exchange-Transport-CrossTenantHeadersStamped: VI1PR08MB5311 Original-Authentication-Results: dkim=none (message not signed) header.d=none;dmarc=none action=none header.from=arm.com; X-EOPAttributedMessage: 0 X-MS-Exchange-Transport-CrossTenantHeadersStripped: VI1EUR03FT049.eop-EUR03.prod.protection.outlook.com X-MS-PublicTrafficType: Email X-MS-Office365-Filtering-Correlation-Id-Prvs: cb5dd558-2847-40e0-7213-08dac417a8cf X-Microsoft-Antispam: BCL:0; X-Microsoft-Antispam-Message-Info: DEWp0EOCwS1lqFTzFeRERaqg8/61Io4losOkPw+Sr++OmgwXUhU5FTTfWmIAB1jtaaVKj/1C+JALSNJcYPUtWxZpgE/LvfRLJ5eYvZb47vZaxP/tKQUtHWIo9kWBg0AgDyr8R969l3+5cvhBYVtroRYlTJ60Z1JdX/L9oO591VJW3X8FPmu9Tsgihs5Zoc1B0Vy2/4CZ5euHLYujyxM+g0G5rzxLYNm+Lp0MgsKnr+/LomPxvby1wGqtmqGCTAB5RZ+ZVOpCr9e1USW2ekAmiBljG2DL6KByjcshPyy+HKwxT+WW4WpsusVxZ7RwRgDz753IAVazzJld/ONN18B3UQeV1uEdN6xKvkPRQHByxCurU4/8ihMKZfouUcJIOtuABKk/7J1WB2oEC/Q19ddupkQYYBKJz2utMplbh0UzvcNo5dnx/RxC6xT7tsl6LFkWl9tI15Yvo/H7SqmDqidxFqgRohR7EhgvQ5b8e4UkKiDIPRooOTEh7TsGpdFacMjIJW7FItXD/dcgIQ0cwM4bJs7RhmCe8Drre7qA2q79e4ZZheAabifW+HQj7lhfiHfpSqzc6iN0yrgZsvbhvy5xiYo1fBpeHN92tVwrK9JKsXyLnL3802MCue/WblU/0mYTdeKaa0m+eVdOHQ/v4GUG29iIcU/GI89D+AI340NNRPsIAuitDj37hN25atbIB0Wdgg7/yt+lXcZT1P0CFYaZTvwsPnN5cr3EYciTXNyGxWx0UCx21f8scS/9pGK6tASNqxbr9f2pQAYqJPv0/CtmtWjBXkYCYF+tAvBV6MufOeU40hAeOWSW4M91y7zhk0vXVuv7NOY/Ohn3CQkFYYxBAhMIAXAh9GaBQ4wW5/xML4qySgh3Bjlpi9xYfzKGbEe4 X-Forefront-Antispam-Report: CIP:63.35.35.123;CTRY:IE;LANG:en;SCL:1;SRV:;IPV:CAL;SFV:NSPM;H:64aa7808-outbound-1.mta.getcheckrecipient.com;PTR:ec2-63-35-35-123.eu-west-1.compute.amazonaws.com;CAT:NONE;SFS:(13230022)(4636009)(396003)(39860400002)(136003)(346002)(376002)(451199015)(40470700004)(46966006)(36840700001)(84970400001)(44832011)(2906002)(70586007)(70206006)(8936002)(30864003)(5660300002)(8676002)(41300700001)(316002)(40480700001)(356005)(81166007)(6486002)(478600001)(6512007)(26005)(82310400005)(6666004)(6506007)(186003)(36860700001)(82740400003)(86362001)(40460700003)(336012)(6916009)(47076005)(67856001);DIR:OUT;SFP:1101; X-OriginatorOrg: arm.com X-MS-Exchange-CrossTenant-OriginalArrivalTime: 11 Nov 2022 19:05:24.6533 (UTC) X-MS-Exchange-CrossTenant-Network-Message-Id: b4bff7da-9eb9-4662-16bf-08dac417b054 X-MS-Exchange-CrossTenant-Id: f34e5979-57d9-4aaa-ad4d-b122a662184d X-MS-Exchange-CrossTenant-OriginalAttributedTenantConnectingIp: TenantId=f34e5979-57d9-4aaa-ad4d-b122a662184d;Ip=[63.35.35.123];Helo=[64aa7808-outbound-1.mta.getcheckrecipient.com] X-MS-Exchange-CrossTenant-AuthSource: VI1EUR03FT049.eop-EUR03.prod.protection.outlook.com X-MS-Exchange-CrossTenant-AuthAs: Anonymous X-MS-Exchange-CrossTenant-FromEntityHeader: HybridOnPrem X-MS-Exchange-Transport-CrossTenantHeadersStamped: VI1PR08MB5358 X-Spam-Status: No, score=-13.0 required=5.0 tests=BAYES_00,DKIM_SIGNED,DKIM_VALID,FORGED_SPF_HELO,GIT_PATCH_0,KAM_DMARC_NONE,KAM_LOTSOFHASH,RCVD_IN_DNSWL_NONE,RCVD_IN_MSPIKE_H2,SPF_HELO_PASS,SPF_NONE,TXREP,UNPARSEABLE_RELAY autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org List-Id: This recognises the patterns of the form: while (n & 1) { n >>= 1 } Unfortunately there are currently two issues relating to this patch. Firstly, simplify_using_initial_conditions does not recognise that (n != 0) and ((n & 1) == 0) implies that ((n >> 1) != 0). This preconditions arise following the loop copy-header pass, and the assumptions returned by number_of_iterations_exit_assumptions then prevent final value replacement from using the niter result. I'm not sure what is the best way to fix this - one approach could be to modify simplify_using_initial_conditions to handle this sort of case, but it seems that it basically wants the information that ranger could give anway, so would something like that be a better option? The second issue arises in the vectoriser, which is able to determine that the niter->assumptions are always true. When building with -march=armv8.4-a+sve -S -O3, we get this codegen: foo (unsigned int b) { int c = 0; if (b == 0) return PREC; while (!(b & (1 << (PREC - 1)))) { b <<= 1; c++; } return c; } foo: .LFB0: .cfi_startproc cmp w0, 0 cbz w0, .L6 blt .L7 lsl w1, w0, 1 clz w2, w1 cmp w2, 14 bls .L8 mov x0, 0 cntw x3 add w1, w2, 1 index z1.s, #0, #1 whilelo p0.s, wzr, w1 .L4: add x0, x0, x3 mov p1.b, p0.b mov z0.d, z1.d whilelo p0.s, w0, w1 incw z1.s b.any .L4 add z0.s, z0.s, #1 lastb w0, p1, z0.s ret .p2align 2,,3 .L8: mov w0, 0 b .L3 .p2align 2,,3 .L13: lsl w1, w1, 1 .L3: add w0, w0, 1 tbz w1, #31, .L13 ret .p2align 2,,3 .L6: mov w0, 32 ret .p2align 2,,3 .L7: mov w0, 0 ret .cfi_endproc In essence, the vectoriser uses the niter information to determine exactly how many iterations of the loop it needs to run. It then uses SVE whilelo instructions to run this number of iterations. The original loop counter is also vectorised, despite only being used in the final iteration, and then the final value of this counter is used as the return value (which is the same as the number of iterations it computed in the first place). This vectorisation is obviously bad, and I think it exposes a latent bug in the vectoriser, rather than being an issue caused by this specific patch. gcc/ChangeLog: * tree-ssa-loop-niter.cc (number_of_iterations_cltz): New. (number_of_iterations_bitcount): Add call to the above. (number_of_iterations_exit_assumptions): Add EQ_EXPR case for c[lt]z idiom recognition. gcc/testsuite/ChangeLog: * gcc.dg/tree-ssa/cltz-max.c: New test. * gcc.dg/tree-ssa/clz-char.c: New test. * gcc.dg/tree-ssa/clz-int.c: New test. * gcc.dg/tree-ssa/clz-long-long.c: New test. * gcc.dg/tree-ssa/clz-long.c: New test. * gcc.dg/tree-ssa/ctz-char.c: New test. * gcc.dg/tree-ssa/ctz-int.c: New test. * gcc.dg/tree-ssa/ctz-long-long.c: New test. * gcc.dg/tree-ssa/ctz-long.c: New test. -- diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cltz-max.c b/gcc/testsuite/gcc.dg/tree-ssa/cltz-max.c new file mode 100644 index 0000000000000000000000000000000000000000..a6bea3d338940efee2e7e1c95a5941525945af9e --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/cltz-max.c @@ -0,0 +1,72 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-tree-loop-optimize -fdump-tree-optimized" } */ + +#define PREC (__CHAR_BIT__) + +int clz_count1 (unsigned char b) { + int c = 0; + + if (b == 0) + return 0; + + while (!(b & (1 << (PREC - 1)))) { + b <<= 1; + c++; + } + if (c <= PREC - 1) + return 0; + else + return 34567; +} + +int clz_count2 (unsigned char b) { + int c = 0; + + if (b == 0) + return 0; + + while (!(b & (1 << PREC - 1))) { + b <<= 1; + c++; + } + if (c <= PREC - 2) + return 0; + else + return 76543; +} + +int ctz_count1 (unsigned char b) { + int c = 0; + + if (b == 0) + return 0; + + while (!(b & 1)) { + b >>= 1; + c++; + } + if (c <= PREC - 1) + return 0; + else + return 23456; +} + +int ctz_count2 (unsigned char b) { + int c = 0; + + if (b == 0) + return 0; + + while (!(b & 1)) { + b >>= 1; + c++; + } + if (c <= PREC - 2) + return 0; + else + return 65432; +} +/* { dg-final { scan-tree-dump-times "34567" 0 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "76543" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "23456" 0 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "65432" 1 "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/clz-char.c b/gcc/testsuite/gcc.dg/tree-ssa/clz-char.c new file mode 100644 index 0000000000000000000000000000000000000000..4a122db95bbb576b4ade706bd3b1ca809d2f1e3b --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/clz-char.c @@ -0,0 +1,34 @@ +/* { dg-do run } */ +/* { dg-require-effective-target clzl } */ +/* { dg-options "-O2 -fno-tree-ch -fdump-tree-optimized" } */ + +#define PREC (__CHAR_BIT__) + +int +__attribute__ ((noinline, noclone)) +foo (unsigned char b) { + int c = 0; + + if (b == 0) + return PREC; + + while (!(b & (1 << (PREC - 1)))) { + b <<= 1; + c++; + } + + return c; +} + +int main() +{ + if (foo(0) != PREC) + __builtin_abort (); + if (foo(1 << (PREC - 1)) != 0) + __builtin_abort (); + if (foo(35) != PREC - 6) + __builtin_abort (); + return 0; +} + +/* { dg-final { scan-tree-dump-times "__builtin_clz|\\.CLZ" 1 "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/clz-int.c b/gcc/testsuite/gcc.dg/tree-ssa/clz-int.c new file mode 100644 index 0000000000000000000000000000000000000000..96646f8e19cd5b2342acb88949b3ef6e3e2abd5a --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/clz-int.c @@ -0,0 +1,34 @@ +/* { dg-do run } */ +/* { dg-require-effective-target clzl } */ +/* { dg-options "-O2 -fno-tree-ch -fdump-tree-optimized" } */ + +#define PREC (__CHAR_BIT__ * __SIZEOF_INT__) + +int +__attribute__ ((noinline, noclone)) +foo (unsigned int b) { + int c = 0; + + if (b == 0) + return PREC; + + while (!(b & (1 << (PREC - 1)))) { + b <<= 1; + c++; + } + + return c; +} + +int main() +{ + if (foo(0) != PREC) + __builtin_abort (); + if (foo(1 << (PREC - 1)) != 0) + __builtin_abort (); + if (foo(35) != PREC - 6) + __builtin_abort (); + return 0; +} + +/* { dg-final { scan-tree-dump-times "__builtin_clz|\\.CLZ" 1 "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/clz-long-long.c b/gcc/testsuite/gcc.dg/tree-ssa/clz-long-long.c new file mode 100644 index 0000000000000000000000000000000000000000..80d3edc1dab2e74fc3271ba9d97640839b3a3786 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/clz-long-long.c @@ -0,0 +1,34 @@ +/* { dg-do run } */ +/* { dg-require-effective-target clzll } */ +/* { dg-options "-O2 -fno-tree-ch -fdump-tree-optimized" } */ + +#define PREC (__CHAR_BIT__ * __SIZEOF_LONG_LONG__) + +int +__attribute__ ((noinline, noclone)) +foo (unsigned long long b) { + int c = 0; + + if (b == 0) + return PREC; + + while (!(b & (1LL << (PREC - 1)))) { + b <<= 1; + c++; + } + + return c; +} + +int main() +{ + if (foo(0) != PREC) + __builtin_abort (); + if (foo(1LL << (PREC - 1)) != 0) + __builtin_abort (); + if (foo(35) != PREC - 6) + __builtin_abort (); + return 0; +} + +/* { dg-final { scan-tree-dump-times "__builtin_clz|\\.CLZ" 1 "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/clz-long.c b/gcc/testsuite/gcc.dg/tree-ssa/clz-long.c new file mode 100644 index 0000000000000000000000000000000000000000..1c8037f93b9c9d42f580a172267c65723a46ef8b --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/clz-long.c @@ -0,0 +1,34 @@ +/* { dg-do run } */ +/* { dg-require-effective-target clzl } */ +/* { dg-options "-O2 -fno-tree-ch -fdump-tree-optimized" } */ + +#define PREC (__CHAR_BIT__ * __SIZEOF_LONG__) + +int +__attribute__ ((noinline, noclone)) +foo (unsigned long b) { + int c = 0; + + if (b == 0) + return PREC; + + while (!(b & (1L << (PREC - 1)))) { + b <<= 1; + c++; +} + + return c; +} + +int main() +{ + if (foo(0) != PREC) + __builtin_abort (); + if (foo(1L << (PREC - 1)) != 0) + __builtin_abort (); + if (foo(35) != PREC - 6) + __builtin_abort (); + return 0; +} + +/* { dg-final { scan-tree-dump-times "__builtin_clz|\\.CLZ" 1 "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ctz-char.c b/gcc/testsuite/gcc.dg/tree-ssa/ctz-char.c new file mode 100644 index 0000000000000000000000000000000000000000..3cd166acbd4670e175d79a2403de2d5a4fd38665 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/ctz-char.c @@ -0,0 +1,36 @@ +/* { dg-do run } */ +/* { dg-require-effective-target ctz } */ +/* { dg-options "-O2 -fno-tree-ch -fdump-tree-optimized" } */ + +#define PREC (__CHAR_BIT__) + +int +__attribute__ ((noinline, noclone)) +foo (unsigned char b) { + int c = 0; + + if (b == 0) + return PREC; + + while (!(b & 1)) { + b >>= 1; + c++; + } + + return c; +} + +int main() +{ + if (foo(0) != PREC) + __builtin_abort (); + if (foo(128) != 7) + __builtin_abort (); + if (foo(96) != 5) + __builtin_abort (); + if (foo(35) != 0) + __builtin_abort (); + return 0; +} + +/* { dg-final { scan-tree-dump-times "__builtin_ctz|\\.CTZ" 1 "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ctz-int.c b/gcc/testsuite/gcc.dg/tree-ssa/ctz-int.c new file mode 100644 index 0000000000000000000000000000000000000000..7f63493eb7389a18516f8f126c3c55dc80f0bde6 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/ctz-int.c @@ -0,0 +1,36 @@ +/* { dg-do run } */ +/* { dg-require-effective-target ctz } */ +/* { dg-options "-O2 -fno-tree-ch -fdump-tree-optimized" } */ + +#define PREC (__CHAR_BIT__ * __SIZEOF_INT__) + +int +__attribute__ ((noinline, noclone)) +foo (unsigned int b) { + int c = 0; + + if (b == 0) + return PREC; + + while (!(b & 1)) { + b >>= 1; + c++; + } + + return c; +} + +int main() +{ + if (foo(0) != PREC) + __builtin_abort (); + if (foo(1 << (PREC - 1)) != PREC - 1) + __builtin_abort (); + if (foo(96) != 5) + __builtin_abort (); + if (foo(35) != 0) + __builtin_abort (); + return 0; +} + +/* { dg-final { scan-tree-dump-times "__builtin_ctz|\\.CTZ" 1 "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ctz-long-long.c b/gcc/testsuite/gcc.dg/tree-ssa/ctz-long-long.c new file mode 100644 index 0000000000000000000000000000000000000000..924f61b76f01c77a40b9fff64af3b629ab1418c0 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/ctz-long-long.c @@ -0,0 +1,36 @@ +/* { dg-do run } */ +/* { dg-require-effective-target ctzll } */ +/* { dg-options "-O2 -fno-tree-ch -fdump-tree-optimized" } */ + +#define PREC (__CHAR_BIT__ * __SIZEOF_LONG_LONG__) + +int +__attribute__ ((noinline, noclone)) +foo (unsigned long long b) { + int c = 0; + + if (b == 0) + return PREC; + + while (!(b & 1)) { + b >>= 1; + c++; + } + + return c; +} + +int main() +{ + if (foo(0) != PREC) + __builtin_abort (); + if (foo(1LL << (PREC - 1)) != PREC - 1) + __builtin_abort (); + if (foo(96) != 5) + __builtin_abort (); + if (foo(35) != 0) + __builtin_abort (); + return 0; +} + +/* { dg-final { scan-tree-dump-times "__builtin_ctz|\\.CTZ" 1 "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ctz-long.c b/gcc/testsuite/gcc.dg/tree-ssa/ctz-long.c new file mode 100644 index 0000000000000000000000000000000000000000..178945daa8a2697989f1a1a0804ce33d768dcc55 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/ctz-long.c @@ -0,0 +1,36 @@ +/* { dg-do run } */ +/* { dg-require-effective-target ctzl } */ +/* { dg-options "-O2 -fno-tree-ch -fdump-tree-optimized" } */ + +#define PREC (__CHAR_BIT__ * __SIZEOF_LONG__) + +int +__attribute__ ((noinline, noclone)) +foo (unsigned long b) { + int c = 0; + + if (b == 0) + return PREC; + + while (!(b & 1)) { + b >>= 1; + c++; + } + + return c; +} + +int main() +{ + if (foo(0) != PREC) + __builtin_abort (); + if (foo(1L << (PREC - 1)) != PREC - 1) + __builtin_abort (); + if (foo(96) != 5) + __builtin_abort (); + if (foo(35) != 0) + __builtin_abort (); + return 0; +} + +/* { dg-final { scan-tree-dump-times "__builtin_ctz|\\.CTZ" 1 "optimized" } } */ diff --git a/gcc/tree-ssa-loop-niter.cc b/gcc/tree-ssa-loop-niter.cc index 16e8e25919d808cea27adbd72f0be01ae2f0e547..87e6fe81d68fc3352e450688ef79e6fc68854d8a 100644 --- a/gcc/tree-ssa-loop-niter.cc +++ b/gcc/tree-ssa-loop-niter.cc @@ -2274,6 +2274,167 @@ build_cltz_expr (tree src, bool leading, bool defined_at_zero) return call; } +/* See comment below for number_of_iterations_bitcount. + For c[lt]z, we have: + + modify: + iv_2 = iv_1 << 1 OR iv_1 >> 1 + + test: + if (iv & 1 << (prec-1)) OR (iv & 1) + + modification count: + src precision - c[lt]z (src) + + */ + +static bool +number_of_iterations_cltz (loop_p loop, edge exit, + enum tree_code code, + class tree_niter_desc *niter) +{ + bool modify_before_test = true; + HOST_WIDE_INT max; + int checked_bit; + tree iv_2; + + /* Check that condition for staying inside the loop is like + if (iv == 0). */ + gimple *cond_stmt = last_stmt (exit->src); + if (!cond_stmt + || gimple_code (cond_stmt) != GIMPLE_COND + || (code != EQ_EXPR && code != GE_EXPR) + || !integer_zerop (gimple_cond_rhs (cond_stmt)) + || TREE_CODE (gimple_cond_lhs (cond_stmt)) != SSA_NAME) + return false; + + if (code == EQ_EXPR) + { + /* Make sure we check a bitwise and with a suitable constant */ + gimple *and_stmt = SSA_NAME_DEF_STMT (gimple_cond_lhs (cond_stmt)); + if (!is_gimple_assign (and_stmt) + || gimple_assign_rhs_code (and_stmt) != BIT_AND_EXPR + || !integer_pow2p (gimple_assign_rhs2 (and_stmt))) + return false; + + checked_bit = tree_log2 (gimple_assign_rhs2 (and_stmt)); + + iv_2 = gimple_assign_rhs1 (and_stmt); + } + else + { + /* We have a GE_EXPR - a signed comparison with zero is equivalent to + testing the leading bit, so check for this pattern too. */ + + iv_2 = gimple_cond_lhs (cond_stmt); + tree test_value_type = TREE_TYPE (iv_2); + + if (TYPE_UNSIGNED (test_value_type)) + return false; + + gimple *test_value_stmt = SSA_NAME_DEF_STMT (iv_2); + + if (is_gimple_assign (test_value_stmt) + && gimple_assign_rhs_code (test_value_stmt) == NOP_EXPR) + { + /* If the test value comes from a NOP_EXPR, then we need to unwrap + this. We conservatively require that both types have the same + precision. */ + iv_2 = gimple_assign_rhs1 (test_value_stmt); + tree rhs_type = TREE_TYPE (iv_2); + if (TREE_CODE (rhs_type) != INTEGER_TYPE + || (TYPE_PRECISION (rhs_type) + != TYPE_PRECISION (test_value_type))) + return false; + } + + checked_bit = TYPE_PRECISION (test_value_type) - 1; + } + + gimple *iv_2_stmt = SSA_NAME_DEF_STMT (iv_2); + + /* If the test comes before the iv modification, then these will actually be + iv_1 and a phi node. */ + if (gimple_code (iv_2_stmt) == GIMPLE_PHI + && gimple_bb (iv_2_stmt) == loop->header + && gimple_phi_num_args (iv_2_stmt) == 2 + && (TREE_CODE (gimple_phi_arg_def (iv_2_stmt, + loop_latch_edge (loop)->dest_idx)) + == SSA_NAME)) + { + /* iv_2 is actually one of the inputs to the phi. */ + iv_2 = gimple_phi_arg_def (iv_2_stmt, loop_latch_edge (loop)->dest_idx); + iv_2_stmt = SSA_NAME_DEF_STMT (iv_2); + modify_before_test = false; + } + + /* Make sure iv_2_stmt is a logical shift by one stmt: + iv_2 = iv_1 {<<|>>} 1 */ + if (!is_gimple_assign (iv_2_stmt) + || (gimple_assign_rhs_code (iv_2_stmt) != LSHIFT_EXPR + && (gimple_assign_rhs_code (iv_2_stmt) != RSHIFT_EXPR + || !TYPE_UNSIGNED (TREE_TYPE (gimple_assign_lhs (iv_2_stmt))))) + || !integer_onep (gimple_assign_rhs2 (iv_2_stmt))) + return false; + + bool left_shift = (gimple_assign_rhs_code (iv_2_stmt) == LSHIFT_EXPR); + + tree iv_1 = gimple_assign_rhs1 (iv_2_stmt); + + /* Check the recurrence. */ + gimple *phi = SSA_NAME_DEF_STMT (iv_1); + if (gimple_code (phi) != GIMPLE_PHI + || (gimple_bb (phi) != loop_latch_edge (loop)->dest) + || (iv_2 != gimple_phi_arg_def (phi, loop_latch_edge (loop)->dest_idx))) + return false; + + /* We found a match. */ + tree src = gimple_phi_arg_def (phi, loop_preheader_edge (loop)->dest_idx); + int src_precision = TYPE_PRECISION (TREE_TYPE (src)); + + /* Apply any needed preprocessing to src. */ + int num_ignored_bits; + if (left_shift) + num_ignored_bits = src_precision - checked_bit - 1; + else + num_ignored_bits = checked_bit; + + if (modify_before_test) + num_ignored_bits++; + + if (num_ignored_bits != 0) + src = fold_build2 (left_shift ? LSHIFT_EXPR : RSHIFT_EXPR, + TREE_TYPE (src), src, + build_int_cst (integer_type_node, num_ignored_bits)); + + /* Get the corresponding c[lt]z builtin. */ + tree expr = build_cltz_expr (src, left_shift, false); + + if (!expr) + return false; + + max = src_precision - num_ignored_bits - 1; + + expr = fold_convert (unsigned_type_node, expr); + + tree assumptions = fold_build2 (NE_EXPR, boolean_type_node, src, + build_zero_cst (TREE_TYPE (src))); + + niter->assumptions = simplify_using_initial_conditions (loop, assumptions); + niter->may_be_zero = boolean_false_node; + niter->niter = simplify_using_initial_conditions (loop, expr); + + if (TREE_CODE (niter->niter) == INTEGER_CST) + niter->max = tree_to_uhwi (niter->niter); + else + niter->max = max; + + niter->bound = NULL_TREE; + niter->cmp = ERROR_MARK; + + return true; +} + /* See comment below for number_of_iterations_bitcount. For c[lt]z complement, we have: @@ -2434,6 +2595,7 @@ number_of_iterations_bitcount (loop_p loop, edge exit, class tree_niter_desc *niter) { return (number_of_iterations_popcount (loop, exit, code, niter) + || number_of_iterations_cltz (loop, exit, code, niter) || number_of_iterations_cltz_complement (loop, exit, code, niter)); } @@ -2960,6 +3122,9 @@ number_of_iterations_exit_assumptions (class loop *loop, edge exit, case NE_EXPR: break; + case EQ_EXPR: + return number_of_iterations_cltz (loop, exit, code, niter); + default: return false; }