diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc
index c21476d7ae9..cfcd9117ce3 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -47,6 +47,7 @@
#include "stringpool.h"
#include "attribs.h"
#include "gimple-fold.h"
+#include "aarch64-builtins.h"
#define v8qi_UP E_V8QImode
#define v8di_UP E_V8DImode
@@ -128,46 +129,6 @@
#define SIMD_MAX_BUILTIN_ARGS 5
-enum aarch64_type_qualifiers
-{
- /* T foo. */
- qualifier_none = 0x0,
- /* unsigned T foo. */
- qualifier_unsigned = 0x1, /* 1 << 0 */
- /* const T foo. */
- qualifier_const = 0x2, /* 1 << 1 */
- /* T *foo. */
- qualifier_pointer = 0x4, /* 1 << 2 */
- /* Used when expanding arguments if an operand could
- be an immediate. */
- qualifier_immediate = 0x8, /* 1 << 3 */
- qualifier_maybe_immediate = 0x10, /* 1 << 4 */
- /* void foo (...). */
- qualifier_void = 0x20, /* 1 << 5 */
- /* Some patterns may have internal operands, this qualifier is an
- instruction to the initialisation code to skip this operand. */
- qualifier_internal = 0x40, /* 1 << 6 */
- /* Some builtins should use the T_*mode* encoded in a simd_builtin_datum
- rather than using the type of the operand. */
- qualifier_map_mode = 0x80, /* 1 << 7 */
- /* qualifier_pointer | qualifier_map_mode */
- qualifier_pointer_map_mode = 0x84,
- /* qualifier_const | qualifier_pointer | qualifier_map_mode */
- qualifier_const_pointer_map_mode = 0x86,
- /* Polynomial types. */
- qualifier_poly = 0x100,
- /* Lane indices - must be in range, and flipped for bigendian. */
- qualifier_lane_index = 0x200,
- /* Lane indices for single lane structure loads and stores. */
- qualifier_struct_load_store_lane_index = 0x400,
- /* Lane indices selected in pairs. - must be in range, and flipped for
- bigendian. */
- qualifier_lane_pair_index = 0x800,
- /* Lane indices selected in quadtuplets. - must be in range, and flipped for
- bigendian. */
- qualifier_lane_quadtup_index = 0x1000,
-};
-
/* Flags that describe what a function might do. */
const unsigned int FLAG_NONE = 0U;
const unsigned int FLAG_READ_FPCR = 1U << 0;
@@ -671,44 +632,6 @@ const char *aarch64_scalar_builtin_types[] = {
NULL
};
-#define ENTRY(E, M, Q, G) E,
-enum aarch64_simd_type
-{
-#include "aarch64-simd-builtin-types.def"
- ARM_NEON_H_TYPES_LAST
-};
-#undef ENTRY
-
-struct GTY(()) aarch64_simd_type_info
-{
- enum aarch64_simd_type type;
-
- /* Internal type name. */
- const char *name;
-
- /* Internal type name(mangled). The mangled names conform to the
- AAPCS64 (see "Procedure Call Standard for the ARM 64-bit Architecture",
- Appendix A). To qualify for emission with the mangled names defined in
- that document, a vector type must not only be of the correct mode but also
- be of the correct internal AdvSIMD vector type (e.g. __Int8x8_t); these
- types are registered by aarch64_init_simd_builtin_types (). In other
- words, vector types defined in other ways e.g. via vector_size attribute
- will get default mangled names. */
- const char *mangle;
-
- /* Internal type. */
- tree itype;
-
- /* Element type. */
- tree eltype;
-
- /* Machine mode the internal type maps to. */
- enum machine_mode mode;
-
- /* Qualifiers. */
- enum aarch64_type_qualifiers q;
-};
-
#define ENTRY(E, M, Q, G) \
{E, "__" #E, #G "__" #E, NULL_TREE, NULL_TREE, E_##M##mode, qualifier_##Q},
static GTY(()) struct aarch64_simd_type_info aarch64_simd_types [] = {
@@ -2826,6 +2749,14 @@ get_mem_type_for_load_store (unsigned int fcode)
}
}
+/* Return aarch64_simd_type_info corresponding to TYPE. */
+
+aarch64_simd_type_info
+aarch64_get_simd_info_for_type (enum aarch64_simd_type type)
+{
+ return aarch64_simd_types[type];
+}
+
/* Try to fold STMT, given that it's a call to the built-in function with
subcode FCODE. Return the new statement on success and null on
failure. */
diff --git a/gcc/config/aarch64/aarch64-builtins.h b/gcc/config/aarch64/aarch64-builtins.h
new file mode 100644
index 00000000000..4d155566dc5
--- /dev/null
+++ b/gcc/config/aarch64/aarch64-builtins.h
@@ -0,0 +1,101 @@
+/* Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ . */
+
+#ifndef AARCH64_BUILTINS_H
+#define AARCH64_BUILTINS_H
+
+#define ENTRY(E, M, Q, G) E,
+enum aarch64_simd_type
+{
+#include "aarch64-simd-builtin-types.def"
+ ARM_NEON_H_TYPES_LAST
+};
+#undef ENTRY
+
+enum aarch64_type_qualifiers
+{
+ /* T foo. */
+ qualifier_none = 0x0,
+ /* unsigned T foo. */
+ qualifier_unsigned = 0x1, /* 1 << 0 */
+ /* const T foo. */
+ qualifier_const = 0x2, /* 1 << 1 */
+ /* T *foo. */
+ qualifier_pointer = 0x4, /* 1 << 2 */
+ /* Used when expanding arguments if an operand could
+ be an immediate. */
+ qualifier_immediate = 0x8, /* 1 << 3 */
+ qualifier_maybe_immediate = 0x10, /* 1 << 4 */
+ /* void foo (...). */
+ qualifier_void = 0x20, /* 1 << 5 */
+ /* Some patterns may have internal operands, this qualifier is an
+ instruction to the initialisation code to skip this operand. */
+ qualifier_internal = 0x40, /* 1 << 6 */
+ /* Some builtins should use the T_*mode* encoded in a simd_builtin_datum
+ rather than using the type of the operand. */
+ qualifier_map_mode = 0x80, /* 1 << 7 */
+ /* qualifier_pointer | qualifier_map_mode */
+ qualifier_pointer_map_mode = 0x84,
+ /* qualifier_const | qualifier_pointer | qualifier_map_mode */
+ qualifier_const_pointer_map_mode = 0x86,
+ /* Polynomial types. */
+ qualifier_poly = 0x100,
+ /* Lane indices - must be in range, and flipped for bigendian. */
+ qualifier_lane_index = 0x200,
+ /* Lane indices for single lane structure loads and stores. */
+ qualifier_struct_load_store_lane_index = 0x400,
+ /* Lane indices selected in pairs. - must be in range, and flipped for
+ bigendian. */
+ qualifier_lane_pair_index = 0x800,
+ /* Lane indices selected in quadtuplets. - must be in range, and flipped for
+ bigendian. */
+ qualifier_lane_quadtup_index = 0x1000,
+};
+
+struct GTY(()) aarch64_simd_type_info
+{
+ enum aarch64_simd_type type;
+
+ /* Internal type name. */
+ const char *name;
+
+ /* Internal type name(mangled). The mangled names conform to the
+ AAPCS64 (see "Procedure Call Standard for the ARM 64-bit Architecture",
+ Appendix A). To qualify for emission with the mangled names defined in
+ that document, a vector type must not only be of the correct mode but also
+ be of the correct internal AdvSIMD vector type (e.g. __Int8x8_t); these
+ types are registered by aarch64_init_simd_builtin_types (). In other
+ words, vector types defined in other ways e.g. via vector_size attribute
+ will get default mangled names. */
+ const char *mangle;
+
+ /* Internal type. */
+ tree itype;
+
+ /* Element type. */
+ tree eltype;
+
+ /* Machine mode the internal type maps to. */
+ enum machine_mode mode;
+
+ /* Qualifiers. */
+ enum aarch64_type_qualifiers q;
+};
+
+aarch64_simd_type_info aarch64_get_simd_info_for_type (enum aarch64_simd_type);
+
+#endif /* AARCH64_BUILTINS_H */
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index c24c0548724..1ef4ea2087b 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -44,6 +44,14 @@
#include "aarch64-sve-builtins-shapes.h"
#include "aarch64-sve-builtins-base.h"
#include "aarch64-sve-builtins-functions.h"
+#include "aarch64-builtins.h"
+#include "gimple-ssa.h"
+#include "tree-phinodes.h"
+#include "tree-ssa-operands.h"
+#include "ssa-iterators.h"
+#include "stringpool.h"
+#include "value-range.h"
+#include "tree-ssanames.h"
using namespace aarch64_sve;
@@ -1207,6 +1215,56 @@ public:
insn_code icode = code_for_aarch64_sve_ld1rq (e.vector_mode (0));
return e.use_contiguous_load_insn (icode);
}
+
+ gimple *
+ fold (gimple_folder &f) const OVERRIDE
+ {
+ tree arg0 = gimple_call_arg (f.call, 0);
+ tree arg1 = gimple_call_arg (f.call, 1);
+
+ /* Transform:
+ lhs = svld1rq ({-1, -1, ... }, arg1)
+ into:
+ tmp = mem_ref [(int * {ref-all}) arg1]
+ lhs = vec_perm_expr.
+ on little endian target. */
+
+ if (!BYTES_BIG_ENDIAN
+ && integer_all_onesp (arg0))
+ {
+ tree lhs = gimple_call_lhs (f.call);
+ auto simd_type = aarch64_get_simd_info_for_type (Int32x4_t);
+
+ tree elt_ptr_type
+ = build_pointer_type_for_mode (simd_type.eltype, VOIDmode, true);
+ tree zero = build_zero_cst (elt_ptr_type);
+
+ /* Use element type alignment. */
+ tree access_type
+ = build_aligned_type (simd_type.itype, TYPE_ALIGN (simd_type.eltype));
+
+ tree tmp = make_ssa_name_fn (cfun, access_type, 0);
+ gimple *mem_ref_stmt
+ = gimple_build_assign (tmp, fold_build2 (MEM_REF, access_type, arg1, zero));
+ gsi_insert_before (f.gsi, mem_ref_stmt, GSI_SAME_STMT);
+
+ tree mem_ref_lhs = gimple_get_lhs (mem_ref_stmt);
+ tree vectype = TREE_TYPE (mem_ref_lhs);
+ tree lhs_type = TREE_TYPE (lhs);
+
+ int source_nelts = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
+ vec_perm_builder sel (TYPE_VECTOR_SUBPARTS (lhs_type), source_nelts, 1);
+ for (int i = 0; i < source_nelts; i++)
+ sel.quick_push (i);
+
+ vec_perm_indices indices (sel, 1, source_nelts);
+ gcc_checking_assert (can_vec_perm_const_p (TYPE_MODE (lhs_type), indices));
+ tree mask = vec_perm_indices_to_tree (lhs_type, indices);
+ return gimple_build_assign (lhs, VEC_PERM_EXPR, mem_ref_lhs, mem_ref_lhs, mask);
+ }
+
+ return NULL;
+ }
};
class svld1ro_impl : public load_replicate
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index f650abbc4ce..47810fec804 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -23969,6 +23969,35 @@ aarch64_evpc_sve_tbl (struct expand_vec_perm_d *d)
return true;
}
+/* Try to implement D using SVE dup instruction. */
+
+static bool
+aarch64_evpc_sve_dup (struct expand_vec_perm_d *d)
+{
+ if (BYTES_BIG_ENDIAN
+ || d->perm.length ().is_constant ()
+ || !d->one_vector_p
+ || d->target == NULL
+ || d->op0 == NULL
+ || GET_MODE_NUNITS (GET_MODE (d->target)).is_constant ()
+ || !GET_MODE_NUNITS (GET_MODE (d->op0)).is_constant ())
+ return false;
+
+ if (d->testing_p)
+ return true;
+
+ int npatterns = d->perm.encoding ().npatterns ();
+ if (!known_eq (npatterns, GET_MODE_NUNITS (GET_MODE (d->op0))))
+ return false;
+
+ for (int i = 0; i < npatterns; i++)
+ if (!known_eq (d->perm[i], i))
+ return false;
+
+ aarch64_expand_sve_dupq (d->target, GET_MODE (d->target), d->op0);
+ return true;
+}
+
/* Try to implement D using SVE SEL instruction. */
static bool
@@ -24129,7 +24158,12 @@ aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
else if (aarch64_evpc_reencode (d))
return true;
if (d->vec_flags == VEC_SVE_DATA)
- return aarch64_evpc_sve_tbl (d);
+ {
+ if (aarch64_evpc_sve_dup (d))
+ return true;
+ else if (aarch64_evpc_sve_tbl (d))
+ return true;
+ }
else if (d->vec_flags == VEC_ADVSIMD)
return aarch64_evpc_tbl (d);
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr96463.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr96463.c
new file mode 100644
index 00000000000..35100a9e01c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr96463.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+#include "arm_neon.h"
+#include "arm_sve.h"
+
+svint32_t f1 (int32x4_t x)
+{
+ return svld1rq (svptrue_b8 (), &x[0]);
+}
+
+svint32_t f2 (int *x)
+{
+ return svld1rq (svptrue_b8 (), x);
+}
+
+/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]} 2 { target aarch64_little_endian } } } */