From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by sourceware.org (Postfix) with ESMTP id 39AFF385781C for ; Mon, 13 Sep 2021 11:44:06 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 39AFF385781C Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id C478231B; Mon, 13 Sep 2021 04:44:05 -0700 (PDT) Received: from localhost (e121540-lin.manchester.arm.com [10.32.98.126]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPSA id 4F7233F59C; Mon, 13 Sep 2021 04:44:05 -0700 (PDT) From: Richard Sandiford To: apinski--- via Gcc-patches Mail-Followup-To: apinski--- via Gcc-patches , apinski@marvell.com, richard.sandiford@arm.com Cc: apinski@marvell.com Subject: Re: [PATCHv2] [aarch64] Fix target/95969: __builtin_aarch64_im_lane_boundsi interferes with gimple References: <1630724560-13362-1-git-send-email-apinski@marvell.com> Date: Mon, 13 Sep 2021 12:44:04 +0100 In-Reply-To: <1630724560-13362-1-git-send-email-apinski@marvell.com> (apinski's message of "Fri, 3 Sep 2021 20:02:40 -0700") Message-ID: User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/26.3 (gnu/linux) MIME-Version: 1.0 Content-Type: text/plain X-Spam-Status: No, score=-12.3 required=5.0 tests=BAYES_00, GIT_PATCH_0, KAM_DMARC_STATUS, KAM_SHORT, SPF_HELO_NONE, SPF_PASS, TXREP autolearn=ham autolearn_force=no version=3.4.4 X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on server2.sourceware.org X-BeenThere: gcc-patches@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-patches mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 13 Sep 2021 11:44:09 -0000 apinski--- via Gcc-patches writes: > From: Andrew Pinski > > This patch adds simple folding of __builtin_aarch64_im_lane_boundsi where > we are not going to error out. It fixes the problem by the removal > of the function from the IR. > > OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions. > > gcc/ChangeLog: > > PR target/95969 > * config/aarch64/aarch64-builtins.c (aarch64_fold_builtin_lane_check): > New function. > (aarch64_general_fold_builtin): Handle AARCH64_SIMD_BUILTIN_LANE_CHECK. > (aarch64_general_gimple_fold_builtin): Likewise. > > gcc/testsuite/ChangeLog: > > PR target/95969 > * gcc.target/aarch64/lane-bound-1.c: New test. > * gcc.target/aarch64/lane-bound-2.c: New test. OK, thanks. Sorry for the slow reply, was away last week. Richard > --- > gcc/config/aarch64/aarch64-builtins.c | 35 +++++++++++++++++++ > .../gcc.target/aarch64/lane-bound-1.c | 14 ++++++++ > .../gcc.target/aarch64/lane-bound-2.c | 10 ++++++ > 3 files changed, 59 insertions(+) > create mode 100644 gcc/testsuite/gcc.target/aarch64/lane-bound-1.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/lane-bound-2.c > > diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c > index eef9fc0f444..119f67d4e4c 100644 > --- a/gcc/config/aarch64/aarch64-builtins.c > +++ b/gcc/config/aarch64/aarch64-builtins.c > @@ -29,6 +29,7 @@ > #include "rtl.h" > #include "tree.h" > #include "gimple.h" > +#include "ssa.h" > #include "memmodel.h" > #include "tm_p.h" > #include "expmed.h" > @@ -2333,6 +2334,27 @@ aarch64_general_builtin_rsqrt (unsigned int fn) > return NULL_TREE; > } > > +/* Return true if the lane check can be removed as there is no > + error going to be emitted. */ > +static bool > +aarch64_fold_builtin_lane_check (tree arg0, tree arg1, tree arg2) > +{ > + if (TREE_CODE (arg0) != INTEGER_CST) > + return false; > + if (TREE_CODE (arg1) != INTEGER_CST) > + return false; > + if (TREE_CODE (arg2) != INTEGER_CST) > + return false; > + > + auto totalsize = wi::to_widest (arg0); > + auto elementsize = wi::to_widest (arg1); > + if (totalsize == 0 || elementsize == 0) > + return false; > + auto lane = wi::to_widest (arg2); > + auto high = wi::udiv_trunc (totalsize, elementsize); > + return wi::ltu_p (lane, high); > +} > + > #undef VAR1 > #define VAR1(T, N, MAP, FLAG, A) \ > case AARCH64_SIMD_BUILTIN_##T##_##N##A: > @@ -2353,6 +2375,11 @@ aarch64_general_fold_builtin (unsigned int fcode, tree type, > VAR1 (UNOP, floatv4si, 2, ALL, v4sf) > VAR1 (UNOP, floatv2di, 2, ALL, v2df) > return fold_build1 (FLOAT_EXPR, type, args[0]); > + case AARCH64_SIMD_BUILTIN_LANE_CHECK: > + gcc_assert (n_args == 3); > + if (aarch64_fold_builtin_lane_check (args[0], args[1], args[2])) > + return void_node; > + break; > default: > break; > } > @@ -2440,6 +2467,14 @@ aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt) > } > break; > } > + case AARCH64_SIMD_BUILTIN_LANE_CHECK: > + if (aarch64_fold_builtin_lane_check (args[0], args[1], args[2])) > + { > + unlink_stmt_vdef (stmt); > + release_defs (stmt); > + new_stmt = gimple_build_nop (); > + } > + break; > default: > break; > } > diff --git a/gcc/testsuite/gcc.target/aarch64/lane-bound-1.c b/gcc/testsuite/gcc.target/aarch64/lane-bound-1.c > new file mode 100644 > index 00000000000..bbbe679fd80 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/lane-bound-1.c > @@ -0,0 +1,14 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-optimized" } */ > +#include > + > +void > +f (float32x4_t **ptr) > +{ > + float32x4_t res = vsetq_lane_f32 (0.0f, **ptr, 0); > + **ptr = res; > +} > +/* GCC should be able to remove the call to "__builtin_aarch64_im_lane_boundsi" > + and optimize out the second load from *ptr. */ > +/* { dg-final { scan-tree-dump-times "__builtin_aarch64_im_lane_boundsi" 0 "optimized" } } */ > +/* { dg-final { scan-tree-dump-times " = \\\*ptr_" 1 "optimized" } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/lane-bound-2.c b/gcc/testsuite/gcc.target/aarch64/lane-bound-2.c > new file mode 100644 > index 00000000000..923c94687c6 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/lane-bound-2.c > @@ -0,0 +1,10 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-original" } */ > +void > +f (void) > +{ > + __builtin_aarch64_im_lane_boundsi (16, 4, 0); > + __builtin_aarch64_im_lane_boundsi (8, 8, 0); > +} > +/* GCC should be able to optimize these out before gimplification. */ > +/* { dg-final { scan-tree-dump-times "__builtin_aarch64_im_lane_boundsi" 0 "original" } } */