From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 44108 invoked by alias); 18 Jan 2019 16:37:55 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Received: (qmail 44095 invoked by uid 89); 18 Jan 2019 16:37:54 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-11.1 required=5.0 tests=BAYES_00,GIT_PATCH_2,GIT_PATCH_3,KAM_ASCII_DIVIDERS,SPF_PASS autolearn=ham version=3.3.2 spammy=identified, sk:get_inn X-HELO: foss.arm.com Received: from usa-sjc-mx-foss1.foss.arm.com (HELO foss.arm.com) (217.140.101.70) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Fri, 18 Jan 2019 16:37:49 +0000 Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.72.51.249]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 254D380D for ; Fri, 18 Jan 2019 08:37:48 -0800 (PST) Received: from localhost (unknown [10.32.98.35]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPSA id 9E14F3F557 for ; Fri, 18 Jan 2019 08:37:47 -0800 (PST) From: Richard Sandiford To: gcc-patches@gcc.gnu.org Mail-Followup-To: gcc-patches@gcc.gnu.org, richard.sandiford@arm.com Subject: Claw back some of the code size regression in 548.exchange2_r Date: Fri, 18 Jan 2019 16:37:00 -0000 Message-ID: <87o98drkf9.fsf@arm.com> User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/26.1 (gnu/linux) MIME-Version: 1.0 Content-Type: text/plain X-SW-Source: 2019-01/txt/msg01083.txt.bz2 This patch tries harder to detect cases in which the inner dimension of an array access is invariant, such as: x(i, :) = 100 It fixes some of the code size regression in 548.exchange2_r, with size improving by 5% compared to before the patch. Of the two other SPEC 2017 tests affected by loop versioning, 554.roms_r improved by a trivial amount (0.3%) and 549.fotonik3d_r didn't change. All three results are with -Ofast -flto. Tested on aarch64-linux-gnu, aarch64_be-elf and x86_64-linux-gnu. OK to install? Richard 2019-01-18 Richard Sandiford gcc/ * gimple-loop-versioning.cc (loop_versioning::dump_inner_likelihood): New function, split out from... (loop_versioning::analyze_stride): ...here. (loop_versioning::find_per_loop_multiplication): Use gassign. (loop_versioning::analyze_term_using_scevs): Return a success code. (loop_versioning::analyze_arbitrary_term): New function. (loop_versioning::analyze_address_fragment): Use analyze_arbitrary_term if all else fails. gcc/testsuite/ * gfortran.dg/loop_versioning_1.f90: Bump the number of identified inner strides. * gfortran.dg/loop_versioning_9.f90: New test. * gfortran.dg/loop_versioning_10.f90: Likewise. Index: gcc/gimple-loop-versioning.cc =================================================================== --- gcc/gimple-loop-versioning.cc 2019-01-04 11:39:25.918257505 +0000 +++ gcc/gimple-loop-versioning.cc 2019-01-18 16:36:13.172064883 +0000 @@ -294,10 +294,12 @@ private: bool acceptable_type_p (tree, unsigned HOST_WIDE_INT *); bool multiply_term_by (address_term_info &, tree); inner_likelihood get_inner_likelihood (tree, unsigned HOST_WIDE_INT); + void dump_inner_likelihood (address_info &, address_term_info &); void analyze_stride (address_info &, address_term_info &, tree, struct loop *); bool find_per_loop_multiplication (address_info &, address_term_info &); - void analyze_term_using_scevs (address_info &, address_term_info &); + bool analyze_term_using_scevs (address_info &, address_term_info &); + void analyze_arbitrary_term (address_info &, address_term_info &); void analyze_address_fragment (address_info &); void record_address_fragment (gimple *, unsigned HOST_WIDE_INT, tree, unsigned HOST_WIDE_INT, HOST_WIDE_INT); @@ -803,6 +805,24 @@ loop_versioning::get_inner_likelihood (t return unlikely_p ? INNER_UNLIKELY : INNER_DONT_KNOW; } +/* Dump the likelihood that TERM's stride is for the innermost dimension. + ADDRESS is the address that contains TERM. */ + +void +loop_versioning::dump_inner_likelihood (address_info &address, + address_term_info &term) +{ + if (term.inner_likelihood == INNER_LIKELY) + dump_printf_loc (MSG_NOTE, address.stmt, "%T is likely to be the" + " innermost dimension\n", term.stride); + else if (term.inner_likelihood == INNER_UNLIKELY) + dump_printf_loc (MSG_NOTE, address.stmt, "%T is probably not the" + " innermost dimension\n", term.stride); + else + dump_printf_loc (MSG_NOTE, address.stmt, "cannot tell whether %T" + " is the innermost dimension\n", term.stride); +} + /* The caller has identified that STRIDE is the stride of interest in TERM, and that the stride is applied in OP_LOOP. Record this information in TERM, deciding whether STRIDE is likely to be for @@ -818,17 +838,7 @@ loop_versioning::analyze_stride (address term.inner_likelihood = get_inner_likelihood (stride, term.multiplier); if (dump_enabled_p ()) - { - if (term.inner_likelihood == INNER_LIKELY) - dump_printf_loc (MSG_NOTE, address.stmt, "%T is likely to be the" - " innermost dimension\n", stride); - else if (term.inner_likelihood == INNER_UNLIKELY) - dump_printf_loc (MSG_NOTE, address.stmt, "%T is probably not the" - " innermost dimension\n", stride); - else - dump_printf_loc (MSG_NOTE, address.stmt, "cannot tell whether %T" - " is the innermost dimension\n", stride); - } + dump_inner_likelihood (address, term); /* To be a versioning opportunity we require: @@ -879,7 +889,7 @@ bool loop_versioning::find_per_loop_multiplication (address_info &address, address_term_info &term) { - gimple *mult = maybe_get_assign (term.expr); + gassign *mult = maybe_get_assign (term.expr); if (!mult || gimple_assign_rhs_code (mult) != MULT_EXPR) return false; @@ -909,7 +919,7 @@ loop_versioning::find_per_loop_multiplic } /* Try to use scalar evolutions to find an address stride for TERM, - which belongs to ADDRESS. + which belongs to ADDRESS. Return true and update TERM if so. Here we are interested in any evolution information we can find, not just evolutions wrt ADDRESS->LOOP. For example, if we find that @@ -917,17 +927,17 @@ loop_versioning::find_per_loop_multiplic that information can help us eliminate worthless versioning opportunities in inner loops. */ -void +bool loop_versioning::analyze_term_using_scevs (address_info &address, address_term_info &term) { gimple *setter = maybe_get_stmt (term.expr); if (!setter) - return; + return false; struct loop *wrt_loop = loop_containing_stmt (setter); if (!loop_outer (wrt_loop)) - return; + return false; tree chrec = strip_casts (analyze_scalar_evolution (wrt_loop, term.expr)); if (TREE_CODE (chrec) == POLYNOMIAL_CHREC) @@ -955,7 +965,53 @@ loop_versioning::analyze_term_using_scev } analyze_stride (address, term, stride, get_chrec_loop (chrec)); + return true; + } + + return false; +} + +/* Address term TERM is an arbitrary term that provides no versioning + opportunities. Analyze it to see whether it contains any likely + inner strides, so that we don't mistakenly version for other + (less likely) candidates. + + This copes with invariant innermost indices such as: + + x(i, :) = 100 + + where the "i" component of the address is invariant in the loop + but provides the real inner stride. + + ADDRESS is the address that contains TERM. */ + +void +loop_versioning::analyze_arbitrary_term (address_info &address, + address_term_info &term) + +{ + /* A multiplication offers two potential strides. Pick the one that + is most likely to be an innermost stride. */ + tree expr = term.expr, alt = NULL_TREE; + gassign *mult = maybe_get_assign (expr); + if (mult && gimple_assign_rhs_code (mult) == MULT_EXPR) + { + expr = strip_casts (gimple_assign_rhs1 (mult)); + alt = strip_casts (gimple_assign_rhs2 (mult)); + } + term.stride = expr; + term.inner_likelihood = get_inner_likelihood (expr, term.multiplier); + if (alt) + { + inner_likelihood alt_l = get_inner_likelihood (alt, term.multiplier); + if (alt_l > term.inner_likelihood) + { + term.stride = alt; + term.inner_likelihood = alt_l; + } } + if (dump_enabled_p ()) + dump_inner_likelihood (address, term); } /* Try to identify loop strides in ADDRESS and try to choose realistic @@ -1038,8 +1094,10 @@ loop_versioning::analyze_address_fragmen find_per_loop_multiplication and analyze_term_using_scevs can handle, but the former is much cheaper than SCEV analysis, so try it first. */ for (unsigned int i = 0; i < address.terms.length (); ++i) - if (!find_per_loop_multiplication (address, address.terms[i])) - analyze_term_using_scevs (address, address.terms[i]); + if (!find_per_loop_multiplication (address, address.terms[i]) + && !analyze_term_using_scevs (address, address.terms[i]) + && !POINTER_TYPE_P (TREE_TYPE (address.terms[i].expr))) + analyze_arbitrary_term (address, address.terms[i]); /* Check for strides that are likely to be for the innermost dimension. Index: gcc/testsuite/gfortran.dg/loop_versioning_1.f90 =================================================================== --- gcc/testsuite/gfortran.dg/loop_versioning_1.f90 2018-12-17 10:05:18.091771024 +0000 +++ gcc/testsuite/gfortran.dg/loop_versioning_1.f90 2019-01-18 16:36:13.172064883 +0000 @@ -23,6 +23,6 @@ subroutine f3(x, limit, step) end do end subroutine f3 -! { dg-final { scan-tree-dump-times {likely to be the innermost dimension} 1 "lversion" } } +! { dg-final { scan-tree-dump-times {likely to be the innermost dimension} 2 "lversion" } } ! { dg-final { scan-tree-dump-times {want to version containing loop} 3 "lversion" } } ! { dg-final { scan-tree-dump-times {versioned this loop} 3 "lversion" } } Index: gcc/testsuite/gfortran.dg/loop_versioning_9.f90 =================================================================== --- /dev/null 2018-12-31 11:20:29.178325188 +0000 +++ gcc/testsuite/gfortran.dg/loop_versioning_9.f90 2019-01-18 16:36:13.172064883 +0000 @@ -0,0 +1,31 @@ +! { dg-options "-O3 -fdump-tree-lversion-details" } + +subroutine f1(x) + real :: x(:, :) + x(1, :) = 100 +end subroutine f1 + +subroutine f2(x, i) + real :: x(:, :) + integer :: i + x(i, :) = 100 +end subroutine f2 + +subroutine f3(x) + real :: x(:, :) + do j = lbound(x, 2), ubound(x, 2) + x(1, j) = 100 + end do +end subroutine f3 + +subroutine f4(x, i) + real :: x(:, :) + integer :: i + do j = lbound(x, 2), ubound(x, 2) + x(i, j) = 100 + end do +end subroutine f4 + +! { dg-final { scan-tree-dump-times {likely to be the innermost dimension} 4 "lversion" } } +! { dg-final { scan-tree-dump-not {want to version} "lversion" } } +! { dg-final { scan-tree-dump-not {versioned} "lversion" } } Index: gcc/testsuite/gfortran.dg/loop_versioning_10.f90 =================================================================== --- /dev/null 2018-12-31 11:20:29.178325188 +0000 +++ gcc/testsuite/gfortran.dg/loop_versioning_10.f90 2019-01-18 16:36:13.172064883 +0000 @@ -0,0 +1,31 @@ +! { dg-options "-O3 -fdump-tree-lversion-details" } + +subroutine f1(x) + real :: x(:, :) + x(:, 1) = 100 +end subroutine f1 + +subroutine f2(x, i) + real :: x(:, :) + integer :: i + x(:, i) = 100 +end subroutine f2 + +subroutine f3(x) + real :: x(:, :) + do j = lbound(x, 1), ubound(x, 1) + x(j, 1) = 100 + end do +end subroutine f3 + +subroutine f4(x, i) + real :: x(:, :) + integer :: i + do j = lbound(x, 1), ubound(x, 1) + x(j, i) = 100 + end do +end subroutine f4 + +! { dg-final { scan-tree-dump-times {likely to be the innermost dimension} 6 "lversion" } } +! { dg-final { scan-tree-dump-times {want to version} 4 "lversion" } } +! { dg-final { scan-tree-dump-times {versioned} 4 "lversion" } }