From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.133.124]) by sourceware.org (Postfix) with ESMTPS id 6C0143858C2B for ; Fri, 4 Nov 2022 14:19:13 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 6C0143858C2B Authentication-Results: sourceware.org; dmarc=pass (p=none dis=none) header.from=redhat.com Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=redhat.com DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1667571553; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding; bh=lAGhS3lF77pwdYmXoVixiOxqs6hvndVnAY6Brf/ak84=; b=LPFFIPl7aNuzV1DjltnIYer3TYFftK3B/V6K4/I9lz91UyL+0LEmMCSok90l2nAIVV7ge8 24PZPxlNQ23NMYqATIwmPt9PYhxcIrhDAIDXGcVgSJ/qfyQzgc4dkd7OjdxpRZuiLBIdO9 AfJ49zdvHWJ3Ai6zyeOg1ZnYLIRXvSQ= Received: from mimecast-mx02.redhat.com (mimecast-mx02.redhat.com [66.187.233.88]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id us-mta-48-xOfnTVzAN3a0KMPA5sIkAg-1; Fri, 04 Nov 2022 10:19:11 -0400 X-MC-Unique: xOfnTVzAN3a0KMPA5sIkAg-1 Received: from smtp.corp.redhat.com (int-mx08.intmail.prod.int.rdu2.redhat.com [10.11.54.8]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id AE84E185A7AB for ; Fri, 4 Nov 2022 14:19:11 +0000 (UTC) Received: from abulafia.quesejoda.com (unknown [10.39.193.173]) by smtp.corp.redhat.com (Postfix) with ESMTPS id 5634DC15BA4; Fri, 4 Nov 2022 14:19:11 +0000 (UTC) Received: from abulafia.quesejoda.com (localhost [127.0.0.1]) by abulafia.quesejoda.com (8.17.1/8.17.1) with ESMTPS id 2A4EJ8wO312092 (version=TLSv1.3 cipher=TLS_AES_256_GCM_SHA384 bits=256 verify=NOT); Fri, 4 Nov 2022 15:19:08 +0100 Received: (from aldyh@localhost) by abulafia.quesejoda.com (8.17.1/8.17.1/Submit) id 2A4EJ8qd312091; Fri, 4 Nov 2022 15:19:08 +0100 From: Aldy Hernandez To: GCC patches Cc: Andrew MacLeod , Aldy Hernandez Subject: [COMMITTED] Set nonzero bits for multiplication and divisions by a power of 2. Date: Fri, 4 Nov 2022 15:19:05 +0100 Message-Id: <20221104141905.312059-1-aldyh@redhat.com> MIME-Version: 1.0 X-Scanned-By: MIMEDefang 3.1 on 10.11.54.8 X-Mimecast-Spam-Score: 0 X-Mimecast-Originator: redhat.com Content-Transfer-Encoding: 8bit Content-Type: text/plain; charset="US-ASCII"; x-default=true X-Spam-Status: No, score=-11.9 required=5.0 tests=BAYES_00,DKIMWL_WL_HIGH,DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF,GIT_PATCH_0,RCVD_IN_DNSWL_NONE,RCVD_IN_MSPIKE_H2,SPF_HELO_NONE,SPF_NONE,TXREP autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org List-Id: We're missing a lot of TLC in keeping track of nonzero bits across range-ops. It isn't an oversight, but just limited amount of hours to implement stuff. This patch keeps better track of the nonzero mask (really maybe_nonzero bits as discussed) across multiplication and division when the RHS is a power of 2. It fixes PR107342 and also touches on PR55157. In the latter, the nonzero mask is being set quite late (CCP2) but could be set by evrp time if we enhanced range-ops. I have added tests from both PRs. Tested PR tree-optimization/107342 gcc/ChangeLog: * range-op.cc (operator_mult::fold_range): New. (operator_div::fold_range): New. gcc/testsuite/ChangeLog: * gcc.dg/tree-ssa/vrp122.c: New test. * gcc.dg/tree-ssa/vrp123.c: New test. --- gcc/range-op.cc | 59 ++++++++++++++++++++++++++ gcc/testsuite/gcc.dg/tree-ssa/vrp122.c | 19 +++++++++ gcc/testsuite/gcc.dg/tree-ssa/vrp123.c | 18 ++++++++ 3 files changed, 96 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/vrp122.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/vrp123.c diff --git a/gcc/range-op.cc b/gcc/range-op.cc index 49ee7be3d3b..25c004d8287 100644 --- a/gcc/range-op.cc +++ b/gcc/range-op.cc @@ -1742,9 +1742,13 @@ cross_product_operator::wi_cross_product (irange &r, tree type, class operator_mult : public cross_product_operator { + using range_operator::fold_range; using range_operator::op1_range; using range_operator::op2_range; public: + virtual bool fold_range (irange &r, tree type, + const irange &lh, const irange &rh, + relation_trio = TRIO_VARYING) const final override; virtual void wi_fold (irange &r, tree type, const wide_int &lh_lb, const wide_int &lh_ub, @@ -1762,6 +1766,32 @@ public: relation_trio) const; } op_mult; +bool +operator_mult::fold_range (irange &r, tree type, + const irange &lh, const irange &rh, + relation_trio trio) const +{ + if (!cross_product_operator::fold_range (r, type, lh, rh, trio)) + return false; + + if (lh.undefined_p ()) + return true; + + tree t; + if (rh.singleton_p (&t)) + { + wide_int w = wi::to_wide (t); + int shift = wi::exact_log2 (w); + if (shift != -1) + { + wide_int nz = lh.get_nonzero_bits (); + nz = wi::lshift (nz, shift); + r.set_nonzero_bits (nz); + } + } + return true; +} + bool operator_mult::op1_range (irange &r, tree type, const irange &lhs, const irange &op2, @@ -1902,10 +1932,39 @@ public: const wide_int &rh_ub) const; virtual bool wi_op_overflows (wide_int &res, tree type, const wide_int &, const wide_int &) const; + virtual bool fold_range (irange &r, tree type, + const irange &lh, const irange &rh, + relation_trio trio) const final override; private: enum tree_code code; }; +bool +operator_div::fold_range (irange &r, tree type, + const irange &lh, const irange &rh, + relation_trio trio) const +{ + if (!cross_product_operator::fold_range (r, type, lh, rh, trio)) + return false; + + if (lh.undefined_p ()) + return true; + + tree t; + if (rh.singleton_p (&t)) + { + wide_int wi = wi::to_wide (t); + int shift = wi::exact_log2 (wi); + if (shift != -1) + { + wide_int nz = lh.get_nonzero_bits (); + nz = wi::rshift (nz, shift, TYPE_SIGN (type)); + r.set_nonzero_bits (nz); + } + } + return true; +} + bool operator_div::wi_op_overflows (wide_int &res, tree type, const wide_int &w0, const wide_int &w1) const diff --git a/gcc/testsuite/gcc.dg/tree-ssa/vrp122.c b/gcc/testsuite/gcc.dg/tree-ssa/vrp122.c new file mode 100644 index 00000000000..b2ddcda023c --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/vrp122.c @@ -0,0 +1,19 @@ +// { dg-do compile } +// { dg-options "-O2 -fdump-tree-evrp-details" } + +void gg(void); +int f(unsigned t) +{ + unsigned g = t*16; + if (g==0) return 1; + gg(); + gg(); + gg(); + gg(); + gg(); + gg(); + if (g<=4) return 1; + return 0; +} + +// { dg-final { scan-tree-dump "Global Exported: g_.* NONZERO 0x.*fff0" "evrp" } } diff --git a/gcc/testsuite/gcc.dg/tree-ssa/vrp123.c b/gcc/testsuite/gcc.dg/tree-ssa/vrp123.c new file mode 100644 index 00000000000..1ad3caa4384 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/vrp123.c @@ -0,0 +1,18 @@ +// { dg-options "-O1 -fdump-tree-dom3-raw" } + +extern int +__attribute__((const)) +foo4b (int); + +int f4b (unsigned int r) +{ + if (foo4b (r)) + r *= 8U; + + if ((r / 2U) & 2U) + r += foo4b (r); + + return r; +} + +// { dg-final { scan-tree-dump-times {gimple_call