From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 7886) id 8CC3B3858290; Tue, 20 Sep 2022 06:36:07 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 8CC3B3858290 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1663655767; bh=1rtXUeHEIss8rP6AsOH+bfPRLp3SJK8s73bIVgz10QU=; h=From:To:Subject:Date:From; b=WTNyOoVLmvC46GL2uB+KCM4lEFTCGmgkOWbtcExFLjWIHIhAUCA0qdyxY1lsHospQ KG1G/L/+PXyfgzDh3Yq2PfrwXhHU9MY30vhfNMIfPdF9JzxvdeoSNLSWWcmwq6vvPE a0YdaY+Q3S4Pa1F4238hhCQwdMPrj1WcG0UNpoY4= MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Kong Lingling To: gcc-cvs@gcc.gnu.org Subject: [gcc r13-2729] middle-end: handle bitop with an invariant induction.[PR105735] X-Act-Checkin: gcc X-Git-Author: konglin1 X-Git-Refname: refs/heads/master X-Git-Oldrev: 90d3e27f3a6add0cd6892bdf07c6a2538bb709e4 X-Git-Newrev: 3a035f1932eeb26f997cf28a5c752617dd09cb91 Message-Id: <20220920063607.8CC3B3858290@sourceware.org> Date: Tue, 20 Sep 2022 06:36:07 +0000 (GMT) List-Id: https://gcc.gnu.org/g:3a035f1932eeb26f997cf28a5c752617dd09cb91 commit r13-2729-g3a035f1932eeb26f997cf28a5c752617dd09cb91 Author: konglin1 Date: Tue Sep 20 13:58:35 2022 +0800 middle-end: handle bitop with an invariant induction.[PR105735] Enhance final_value_replacement_loop to handle bitop with an invariant induction. This patch will enable below optimization: { - long unsigned int bit; - - [local count: 32534376]: - - [local count: 1041207449]: - # tmp_10 = PHI - # bit_12 = PHI - tmp_7 = bit2_6(D) & tmp_10; - bit_8 = bit_12 + 1; - if (bit_8 != 32) - goto ; [96.97%] - else - goto ; [3.03%] - - [local count: 1009658865]: - goto ; [100.00%] - - [local count: 32534376]: - # tmp_11 = PHI - return tmp_11; + tmp_11 = tmp_4 (D) & bit2_6 (D); + return tmp_11; } gcc/ChangeLog: PR middle-end/105735 * tree-scalar-evolution.cc (analyze_and_compute_bitop_with_inv_effect): New function. (final_value_replacement_loop): Enhanced to handle bitop with inv induction. gcc/testsuite/ChangeLog: * gcc.target/i386/pr105735-1.c: New test. * gcc.target/i386/pr105735-2.c: New test. Diff: --- gcc/testsuite/gcc.target/i386/pr105735-1.c | 88 ++++++++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr105735-2.c | 28 +++++++++ gcc/tree-scalar-evolution.cc | 93 ++++++++++++++++++++++++++---- 3 files changed, 199 insertions(+), 10 deletions(-) diff --git a/gcc/testsuite/gcc.target/i386/pr105735-1.c b/gcc/testsuite/gcc.target/i386/pr105735-1.c new file mode 100644 index 00000000000..69de6b2911a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr105735-1.c @@ -0,0 +1,88 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -fdump-tree-sccp-details" } */ +/* { dg-final { scan-tree-dump-times {final value replacement} 8 "sccp" } } */ + +unsigned int +__attribute__((noipa)) +foo (unsigned int tmp, unsigned int bit2) +{ + for (int bit = 0; bit < 64; bit++) + tmp &= bit2; + return tmp; +} + +unsigned int +__attribute__((noipa)) +foo1 (unsigned int tmp, unsigned int bit2) +{ + for (int bit = 63; bit >= 0; bit -=3) + tmp &= bit2; + return tmp; +} + +unsigned int +__attribute__((noipa)) +foo2 (unsigned int tmp, unsigned int bit2) +{ + for (int bit = 0; bit < 64; bit++) + tmp |= bit2; + return tmp; +} + +unsigned int +__attribute__((noipa)) +foo3 (unsigned int tmp, unsigned int bit2) +{ + for (int bit = 63; bit >= 0; bit -=3) + tmp |= bit2; + return tmp; +} + +unsigned int +__attribute__((noipa)) +foo4 (unsigned int tmp, unsigned int bit2) +{ + for (int bit = 0; bit < 64; bit++) + tmp ^= bit2; + return tmp; +} + +unsigned int +__attribute__((noipa)) +foo5 (unsigned int tmp, unsigned int bit2) +{ + for (int bit = 0; bit < 63; bit++) + tmp ^= bit2; + return tmp; +} + +unsigned int +__attribute__((noipa)) +f (unsigned int tmp, int bit, unsigned int bit2) +{ + unsigned int res = tmp; + for (int i = 0; i < bit; i++) + res &= bit2; + return res; +} + +unsigned int +__attribute__((noipa)) +f1 (unsigned int tmp, int bit, unsigned int bit2) +{ + unsigned int res = tmp; + for (int i = 0; i < bit; i++) + res |= bit2; + return res; +} + +unsigned int +__attribute__((noipa)) +f2 (unsigned int tmp, int bit, unsigned int bit2) +{ + unsigned int res = tmp; + for (int i = 0; i < bit; i++) + res ^= bit2; + return res; +} + diff --git a/gcc/testsuite/gcc.target/i386/pr105735-2.c b/gcc/testsuite/gcc.target/i386/pr105735-2.c new file mode 100644 index 00000000000..66cc5fba1e7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr105735-2.c @@ -0,0 +1,28 @@ +/* { dg-do run } */ +/* { dg-options "-O1" } */ + +#include "pr105735-1.c" + +int main() +{ + unsigned int tmp = 0x1101; + unsigned int bit2 = 0x111101; + if (foo (tmp, bit2) != 0x1101) + __builtin_abort (); + if (foo1 (tmp, bit2) != 0x1101) + __builtin_abort (); + if (foo2 (tmp, bit2) != 0x111101) + __builtin_abort (); + if (foo3 (tmp, bit2) != 0x111101) + __builtin_abort (); + if (foo4 (tmp, bit2) != 0x1101) + __builtin_abort (); + if (foo5 (tmp, bit2) != 0x110000) + __builtin_abort (); + if (f (tmp, 64, bit2) != 0x1101) + __builtin_abort (); + if (f1 (tmp, 64, bit2) != 0x111101) + __builtin_abort (); + if (f2 (tmp, 64, bit2) != 0x1101) + __builtin_abort (); +} diff --git a/gcc/tree-scalar-evolution.cc b/gcc/tree-scalar-evolution.cc index fc59d035b19..9f30f78cb5d 100644 --- a/gcc/tree-scalar-evolution.cc +++ b/gcc/tree-scalar-evolution.cc @@ -3635,6 +3635,64 @@ enum bit_op_kind return fold_build2 (code1, type, inv, wide_int_to_tree (type, bits)); } +/* Match.pd function to match bitop with invariant expression + .i.e. + tmp_7 = _0 & _1; */ +extern bool gimple_bitop_with_inv_p (tree, tree *, tree (*)(tree)); + +/* Return the inductive expression of bitop with invariant if possible, + otherwise returns DEF. */ +static tree +analyze_and_compute_bitop_with_inv_effect (class loop* loop, tree phidef, + tree niter) +{ + tree match_op[2],inv; + tree type = TREE_TYPE (phidef); + gphi* header_phi = NULL; + enum tree_code code; + /* match thing like op0 (match[0]), op1 (match[1]), phidef (PHIDEF) + + op1 = PHI + phidef = op0 & op1 + if op0 is an invariant, it could change to + phidef = op0 & inv. */ + gimple *def; + def = SSA_NAME_DEF_STMT (phidef); + if (!(is_gimple_assign (def) + && ((code = gimple_assign_rhs_code (def)), true) + && (code == BIT_AND_EXPR || code == BIT_IOR_EXPR + || code == BIT_XOR_EXPR))) + return NULL_TREE; + + match_op[0] = gimple_assign_rhs1 (def); + match_op[1] = gimple_assign_rhs2 (def); + + if (TREE_CODE (match_op[1]) != SSA_NAME + || !expr_invariant_in_loop_p (loop, match_op[0]) + || !(header_phi = dyn_cast (SSA_NAME_DEF_STMT (match_op[1]))) + || gimple_phi_num_args (header_phi) != 2) + return NULL_TREE; + + if (PHI_ARG_DEF_FROM_EDGE (header_phi, loop_latch_edge (loop)) != phidef) + return NULL_TREE; + + enum tree_code code1 + = gimple_assign_rhs_code (def); + + if (code1 == BIT_XOR_EXPR) + { + if (!tree_fits_uhwi_p (niter)) + return NULL_TREE; + unsigned HOST_WIDE_INT niter_num; + niter_num = tree_to_uhwi (niter); + if (niter_num % 2 != 0) + match_op[0] = build_zero_cst (type); + } + + inv = PHI_ARG_DEF_FROM_EDGE (header_phi, loop_preheader_edge (loop)); + return fold_build2 (code1, type, inv, match_op[0]); +} + /* Do final value replacement for LOOP, return true if we did anything. */ bool @@ -3685,7 +3743,24 @@ final_value_replacement_loop (class loop *loop) bool folded_casts; def = analyze_scalar_evolution_in_loop (ex_loop, loop, def, &folded_casts); - def = compute_overall_effect_of_inner_loop (ex_loop, def); + + tree bitinv_def, bit_def; + unsigned HOST_WIDE_INT niter_num; + + if (def != chrec_dont_know) + def = compute_overall_effect_of_inner_loop (ex_loop, def); + + /* Handle bitop with invariant induction expression. + + .i.e + for (int i =0 ;i < 32; i++) + tmp &= bit2; + if bit2 is an invariant in loop which could simple to + tmp &= bit2. */ + else if ((bitinv_def + = analyze_and_compute_bitop_with_inv_effect (loop, + phidef, niter))) + def = bitinv_def; /* Handle bitwise induction expression. @@ -3697,15 +3772,13 @@ final_value_replacement_loop (class loop *loop) expressible, but in fact final value of RES can be replaced by RES & CONSTANT where CONSTANT all ones with bit {0,3,6,9,... ,63} being cleared, similar for BIT_IOR_EXPR/BIT_XOR_EXPR. */ - unsigned HOST_WIDE_INT niter_num; - tree bit_def; - if (tree_fits_uhwi_p (niter) - && (niter_num = tree_to_uhwi (niter)) != 0 - && niter_num < TYPE_PRECISION (TREE_TYPE (phidef)) - && (bit_def - = analyze_and_compute_bitwise_induction_effect (loop, - phidef, - niter_num))) + else if (tree_fits_uhwi_p (niter) + && (niter_num = tree_to_uhwi (niter)) != 0 + && niter_num < TYPE_PRECISION (TREE_TYPE (phidef)) + && (bit_def + = analyze_and_compute_bitwise_induction_effect (loop, + phidef, + niter_num))) def = bit_def; if (!tree_does_not_contain_chrecs (def)