* [PATCHv2] [aarch64] Fix target/95969: __builtin_aarch64_im_lane_boundsi interferes with gimple
@ 2021-09-04 3:02 apinski
2021-09-13 11:44 ` Richard Sandiford
0 siblings, 1 reply; 2+ messages in thread
From: apinski @ 2021-09-04 3:02 UTC (permalink / raw)
To: gcc-patches; +Cc: Andrew Pinski
From: Andrew Pinski <apinski@marvell.com>
This patch adds simple folding of __builtin_aarch64_im_lane_boundsi where
we are not going to error out. It fixes the problem by the removal
of the function from the IR.
OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions.
gcc/ChangeLog:
PR target/95969
* config/aarch64/aarch64-builtins.c (aarch64_fold_builtin_lane_check):
New function.
(aarch64_general_fold_builtin): Handle AARCH64_SIMD_BUILTIN_LANE_CHECK.
(aarch64_general_gimple_fold_builtin): Likewise.
gcc/testsuite/ChangeLog:
PR target/95969
* gcc.target/aarch64/lane-bound-1.c: New test.
* gcc.target/aarch64/lane-bound-2.c: New test.
---
gcc/config/aarch64/aarch64-builtins.c | 35 +++++++++++++++++++
.../gcc.target/aarch64/lane-bound-1.c | 14 ++++++++
.../gcc.target/aarch64/lane-bound-2.c | 10 ++++++
3 files changed, 59 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/aarch64/lane-bound-1.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/lane-bound-2.c
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index eef9fc0f444..119f67d4e4c 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -29,6 +29,7 @@
#include "rtl.h"
#include "tree.h"
#include "gimple.h"
+#include "ssa.h"
#include "memmodel.h"
#include "tm_p.h"
#include "expmed.h"
@@ -2333,6 +2334,27 @@ aarch64_general_builtin_rsqrt (unsigned int fn)
return NULL_TREE;
}
+/* Return true if the lane check can be removed as there is no
+ error going to be emitted. */
+static bool
+aarch64_fold_builtin_lane_check (tree arg0, tree arg1, tree arg2)
+{
+ if (TREE_CODE (arg0) != INTEGER_CST)
+ return false;
+ if (TREE_CODE (arg1) != INTEGER_CST)
+ return false;
+ if (TREE_CODE (arg2) != INTEGER_CST)
+ return false;
+
+ auto totalsize = wi::to_widest (arg0);
+ auto elementsize = wi::to_widest (arg1);
+ if (totalsize == 0 || elementsize == 0)
+ return false;
+ auto lane = wi::to_widest (arg2);
+ auto high = wi::udiv_trunc (totalsize, elementsize);
+ return wi::ltu_p (lane, high);
+}
+
#undef VAR1
#define VAR1(T, N, MAP, FLAG, A) \
case AARCH64_SIMD_BUILTIN_##T##_##N##A:
@@ -2353,6 +2375,11 @@ aarch64_general_fold_builtin (unsigned int fcode, tree type,
VAR1 (UNOP, floatv4si, 2, ALL, v4sf)
VAR1 (UNOP, floatv2di, 2, ALL, v2df)
return fold_build1 (FLOAT_EXPR, type, args[0]);
+ case AARCH64_SIMD_BUILTIN_LANE_CHECK:
+ gcc_assert (n_args == 3);
+ if (aarch64_fold_builtin_lane_check (args[0], args[1], args[2]))
+ return void_node;
+ break;
default:
break;
}
@@ -2440,6 +2467,14 @@ aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt)
}
break;
}
+ case AARCH64_SIMD_BUILTIN_LANE_CHECK:
+ if (aarch64_fold_builtin_lane_check (args[0], args[1], args[2]))
+ {
+ unlink_stmt_vdef (stmt);
+ release_defs (stmt);
+ new_stmt = gimple_build_nop ();
+ }
+ break;
default:
break;
}
diff --git a/gcc/testsuite/gcc.target/aarch64/lane-bound-1.c b/gcc/testsuite/gcc.target/aarch64/lane-bound-1.c
new file mode 100644
index 00000000000..bbbe679fd80
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/lane-bound-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+#include <arm_neon.h>
+
+void
+f (float32x4_t **ptr)
+{
+ float32x4_t res = vsetq_lane_f32 (0.0f, **ptr, 0);
+ **ptr = res;
+}
+/* GCC should be able to remove the call to "__builtin_aarch64_im_lane_boundsi"
+ and optimize out the second load from *ptr. */
+/* { dg-final { scan-tree-dump-times "__builtin_aarch64_im_lane_boundsi" 0 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " = \\\*ptr_" 1 "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/lane-bound-2.c b/gcc/testsuite/gcc.target/aarch64/lane-bound-2.c
new file mode 100644
index 00000000000..923c94687c6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/lane-bound-2.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-original" } */
+void
+f (void)
+{
+ __builtin_aarch64_im_lane_boundsi (16, 4, 0);
+ __builtin_aarch64_im_lane_boundsi (8, 8, 0);
+}
+/* GCC should be able to optimize these out before gimplification. */
+/* { dg-final { scan-tree-dump-times "__builtin_aarch64_im_lane_boundsi" 0 "original" } } */
--
2.17.1
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [PATCHv2] [aarch64] Fix target/95969: __builtin_aarch64_im_lane_boundsi interferes with gimple
2021-09-04 3:02 [PATCHv2] [aarch64] Fix target/95969: __builtin_aarch64_im_lane_boundsi interferes with gimple apinski
@ 2021-09-13 11:44 ` Richard Sandiford
0 siblings, 0 replies; 2+ messages in thread
From: Richard Sandiford @ 2021-09-13 11:44 UTC (permalink / raw)
To: apinski--- via Gcc-patches; +Cc: apinski
apinski--- via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
> From: Andrew Pinski <apinski@marvell.com>
>
> This patch adds simple folding of __builtin_aarch64_im_lane_boundsi where
> we are not going to error out. It fixes the problem by the removal
> of the function from the IR.
>
> OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions.
>
> gcc/ChangeLog:
>
> PR target/95969
> * config/aarch64/aarch64-builtins.c (aarch64_fold_builtin_lane_check):
> New function.
> (aarch64_general_fold_builtin): Handle AARCH64_SIMD_BUILTIN_LANE_CHECK.
> (aarch64_general_gimple_fold_builtin): Likewise.
>
> gcc/testsuite/ChangeLog:
>
> PR target/95969
> * gcc.target/aarch64/lane-bound-1.c: New test.
> * gcc.target/aarch64/lane-bound-2.c: New test.
OK, thanks. Sorry for the slow reply, was away last week.
Richard
> ---
> gcc/config/aarch64/aarch64-builtins.c | 35 +++++++++++++++++++
> .../gcc.target/aarch64/lane-bound-1.c | 14 ++++++++
> .../gcc.target/aarch64/lane-bound-2.c | 10 ++++++
> 3 files changed, 59 insertions(+)
> create mode 100644 gcc/testsuite/gcc.target/aarch64/lane-bound-1.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/lane-bound-2.c
>
> diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
> index eef9fc0f444..119f67d4e4c 100644
> --- a/gcc/config/aarch64/aarch64-builtins.c
> +++ b/gcc/config/aarch64/aarch64-builtins.c
> @@ -29,6 +29,7 @@
> #include "rtl.h"
> #include "tree.h"
> #include "gimple.h"
> +#include "ssa.h"
> #include "memmodel.h"
> #include "tm_p.h"
> #include "expmed.h"
> @@ -2333,6 +2334,27 @@ aarch64_general_builtin_rsqrt (unsigned int fn)
> return NULL_TREE;
> }
>
> +/* Return true if the lane check can be removed as there is no
> + error going to be emitted. */
> +static bool
> +aarch64_fold_builtin_lane_check (tree arg0, tree arg1, tree arg2)
> +{
> + if (TREE_CODE (arg0) != INTEGER_CST)
> + return false;
> + if (TREE_CODE (arg1) != INTEGER_CST)
> + return false;
> + if (TREE_CODE (arg2) != INTEGER_CST)
> + return false;
> +
> + auto totalsize = wi::to_widest (arg0);
> + auto elementsize = wi::to_widest (arg1);
> + if (totalsize == 0 || elementsize == 0)
> + return false;
> + auto lane = wi::to_widest (arg2);
> + auto high = wi::udiv_trunc (totalsize, elementsize);
> + return wi::ltu_p (lane, high);
> +}
> +
> #undef VAR1
> #define VAR1(T, N, MAP, FLAG, A) \
> case AARCH64_SIMD_BUILTIN_##T##_##N##A:
> @@ -2353,6 +2375,11 @@ aarch64_general_fold_builtin (unsigned int fcode, tree type,
> VAR1 (UNOP, floatv4si, 2, ALL, v4sf)
> VAR1 (UNOP, floatv2di, 2, ALL, v2df)
> return fold_build1 (FLOAT_EXPR, type, args[0]);
> + case AARCH64_SIMD_BUILTIN_LANE_CHECK:
> + gcc_assert (n_args == 3);
> + if (aarch64_fold_builtin_lane_check (args[0], args[1], args[2]))
> + return void_node;
> + break;
> default:
> break;
> }
> @@ -2440,6 +2467,14 @@ aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt)
> }
> break;
> }
> + case AARCH64_SIMD_BUILTIN_LANE_CHECK:
> + if (aarch64_fold_builtin_lane_check (args[0], args[1], args[2]))
> + {
> + unlink_stmt_vdef (stmt);
> + release_defs (stmt);
> + new_stmt = gimple_build_nop ();
> + }
> + break;
> default:
> break;
> }
> diff --git a/gcc/testsuite/gcc.target/aarch64/lane-bound-1.c b/gcc/testsuite/gcc.target/aarch64/lane-bound-1.c
> new file mode 100644
> index 00000000000..bbbe679fd80
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/lane-bound-1.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +#include <arm_neon.h>
> +
> +void
> +f (float32x4_t **ptr)
> +{
> + float32x4_t res = vsetq_lane_f32 (0.0f, **ptr, 0);
> + **ptr = res;
> +}
> +/* GCC should be able to remove the call to "__builtin_aarch64_im_lane_boundsi"
> + and optimize out the second load from *ptr. */
> +/* { dg-final { scan-tree-dump-times "__builtin_aarch64_im_lane_boundsi" 0 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times " = \\\*ptr_" 1 "optimized" } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/lane-bound-2.c b/gcc/testsuite/gcc.target/aarch64/lane-bound-2.c
> new file mode 100644
> index 00000000000..923c94687c6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/lane-bound-2.c
> @@ -0,0 +1,10 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-original" } */
> +void
> +f (void)
> +{
> + __builtin_aarch64_im_lane_boundsi (16, 4, 0);
> + __builtin_aarch64_im_lane_boundsi (8, 8, 0);
> +}
> +/* GCC should be able to optimize these out before gimplification. */
> +/* { dg-final { scan-tree-dump-times "__builtin_aarch64_im_lane_boundsi" 0 "original" } } */
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2021-09-13 11:44 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-04 3:02 [PATCHv2] [aarch64] Fix target/95969: __builtin_aarch64_im_lane_boundsi interferes with gimple apinski
2021-09-13 11:44 ` Richard Sandiford
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).