diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c index 0d09fe9dd6d..656d39a741c 100644 --- a/gcc/config/aarch64/aarch64-builtins.c +++ b/gcc/config/aarch64/aarch64-builtins.c @@ -47,6 +47,7 @@ #include "stringpool.h" #include "attribs.h" #include "gimple-fold.h" +#include "aarch64-builtins.h" #define v8qi_UP E_V8QImode #define v8di_UP E_V8DImode @@ -128,46 +129,6 @@ #define SIMD_MAX_BUILTIN_ARGS 5 -enum aarch64_type_qualifiers -{ - /* T foo. */ - qualifier_none = 0x0, - /* unsigned T foo. */ - qualifier_unsigned = 0x1, /* 1 << 0 */ - /* const T foo. */ - qualifier_const = 0x2, /* 1 << 1 */ - /* T *foo. */ - qualifier_pointer = 0x4, /* 1 << 2 */ - /* Used when expanding arguments if an operand could - be an immediate. */ - qualifier_immediate = 0x8, /* 1 << 3 */ - qualifier_maybe_immediate = 0x10, /* 1 << 4 */ - /* void foo (...). */ - qualifier_void = 0x20, /* 1 << 5 */ - /* Some patterns may have internal operands, this qualifier is an - instruction to the initialisation code to skip this operand. */ - qualifier_internal = 0x40, /* 1 << 6 */ - /* Some builtins should use the T_*mode* encoded in a simd_builtin_datum - rather than using the type of the operand. */ - qualifier_map_mode = 0x80, /* 1 << 7 */ - /* qualifier_pointer | qualifier_map_mode */ - qualifier_pointer_map_mode = 0x84, - /* qualifier_const | qualifier_pointer | qualifier_map_mode */ - qualifier_const_pointer_map_mode = 0x86, - /* Polynomial types. */ - qualifier_poly = 0x100, - /* Lane indices - must be in range, and flipped for bigendian. */ - qualifier_lane_index = 0x200, - /* Lane indices for single lane structure loads and stores. */ - qualifier_struct_load_store_lane_index = 0x400, - /* Lane indices selected in pairs. - must be in range, and flipped for - bigendian. */ - qualifier_lane_pair_index = 0x800, - /* Lane indices selected in quadtuplets. - must be in range, and flipped for - bigendian. */ - qualifier_lane_quadtup_index = 0x1000, -}; - /* Flags that describe what a function might do. */ const unsigned int FLAG_NONE = 0U; const unsigned int FLAG_READ_FPCR = 1U << 0; @@ -671,44 +632,6 @@ const char *aarch64_scalar_builtin_types[] = { NULL }; -#define ENTRY(E, M, Q, G) E, -enum aarch64_simd_type -{ -#include "aarch64-simd-builtin-types.def" - ARM_NEON_H_TYPES_LAST -}; -#undef ENTRY - -struct GTY(()) aarch64_simd_type_info -{ - enum aarch64_simd_type type; - - /* Internal type name. */ - const char *name; - - /* Internal type name(mangled). The mangled names conform to the - AAPCS64 (see "Procedure Call Standard for the ARM 64-bit Architecture", - Appendix A). To qualify for emission with the mangled names defined in - that document, a vector type must not only be of the correct mode but also - be of the correct internal AdvSIMD vector type (e.g. __Int8x8_t); these - types are registered by aarch64_init_simd_builtin_types (). In other - words, vector types defined in other ways e.g. via vector_size attribute - will get default mangled names. */ - const char *mangle; - - /* Internal type. */ - tree itype; - - /* Element type. */ - tree eltype; - - /* Machine mode the internal type maps to. */ - enum machine_mode mode; - - /* Qualifiers. */ - enum aarch64_type_qualifiers q; -}; - #define ENTRY(E, M, Q, G) \ {E, "__" #E, #G "__" #E, NULL_TREE, NULL_TREE, E_##M##mode, qualifier_##Q}, static GTY(()) struct aarch64_simd_type_info aarch64_simd_types [] = { @@ -2796,6 +2719,14 @@ get_mem_type_for_load_store (unsigned int fcode) } } +/* Return aarch64_simd_type_info corresponding to TYPE. */ + +aarch64_simd_type_info +aarch64_get_simd_info_for_type (enum aarch64_simd_type type) +{ + return aarch64_simd_types[type]; +} + /* Try to fold STMT, given that it's a call to the built-in function with subcode FCODE. Return the new statement on success and null on failure. */ diff --git a/gcc/config/aarch64/aarch64-builtins.h b/gcc/config/aarch64/aarch64-builtins.h new file mode 100644 index 00000000000..b395402379c --- /dev/null +++ b/gcc/config/aarch64/aarch64-builtins.h @@ -0,0 +1,85 @@ +#ifndef AARCH64_BUILTINS_H +#define AARCH64_BUILTINS_H + +#define ENTRY(E, M, Q, G) E, +enum aarch64_simd_type +{ +#include "aarch64-simd-builtin-types.def" + ARM_NEON_H_TYPES_LAST +}; +#undef ENTRY + +enum aarch64_type_qualifiers +{ + /* T foo. */ + qualifier_none = 0x0, + /* unsigned T foo. */ + qualifier_unsigned = 0x1, /* 1 << 0 */ + /* const T foo. */ + qualifier_const = 0x2, /* 1 << 1 */ + /* T *foo. */ + qualifier_pointer = 0x4, /* 1 << 2 */ + /* Used when expanding arguments if an operand could + be an immediate. */ + qualifier_immediate = 0x8, /* 1 << 3 */ + qualifier_maybe_immediate = 0x10, /* 1 << 4 */ + /* void foo (...). */ + qualifier_void = 0x20, /* 1 << 5 */ + /* Some patterns may have internal operands, this qualifier is an + instruction to the initialisation code to skip this operand. */ + qualifier_internal = 0x40, /* 1 << 6 */ + /* Some builtins should use the T_*mode* encoded in a simd_builtin_datum + rather than using the type of the operand. */ + qualifier_map_mode = 0x80, /* 1 << 7 */ + /* qualifier_pointer | qualifier_map_mode */ + qualifier_pointer_map_mode = 0x84, + /* qualifier_const | qualifier_pointer | qualifier_map_mode */ + qualifier_const_pointer_map_mode = 0x86, + /* Polynomial types. */ + qualifier_poly = 0x100, + /* Lane indices - must be in range, and flipped for bigendian. */ + qualifier_lane_index = 0x200, + /* Lane indices for single lane structure loads and stores. */ + qualifier_struct_load_store_lane_index = 0x400, + /* Lane indices selected in pairs. - must be in range, and flipped for + bigendian. */ + qualifier_lane_pair_index = 0x800, + /* Lane indices selected in quadtuplets. - must be in range, and flipped for + bigendian. */ + qualifier_lane_quadtup_index = 0x1000, +}; + +struct GTY(()) aarch64_simd_type_info +{ + enum aarch64_simd_type type; + + /* Internal type name. */ + const char *name; + + /* Internal type name(mangled). The mangled names conform to the + AAPCS64 (see "Procedure Call Standard for the ARM 64-bit Architecture", + Appendix A). To qualify for emission with the mangled names defined in + that document, a vector type must not only be of the correct mode but also + be of the correct internal AdvSIMD vector type (e.g. __Int8x8_t); these + types are registered by aarch64_init_simd_builtin_types (). In other + words, vector types defined in other ways e.g. via vector_size attribute + will get default mangled names. */ + const char *mangle; + + /* Internal type. */ + tree itype; + + /* Element type. */ + tree eltype; + + /* Machine mode the internal type maps to. */ + enum machine_mode mode; + + /* Qualifiers. */ + enum aarch64_type_qualifiers q; +}; + +aarch64_simd_type_info aarch64_get_simd_info_for_type (enum aarch64_simd_type); + +#endif /* AARCH64_BUILTINS_H */ + diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc index 02e42a71e5e..51e6c1a9cc4 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc @@ -44,6 +44,14 @@ #include "aarch64-sve-builtins-shapes.h" #include "aarch64-sve-builtins-base.h" #include "aarch64-sve-builtins-functions.h" +#include "aarch64-builtins.h" +#include "gimple-ssa.h" +#include "tree-phinodes.h" +#include "tree-ssa-operands.h" +#include "ssa-iterators.h" +#include "stringpool.h" +#include "value-range.h" +#include "tree-ssanames.h" using namespace aarch64_sve; @@ -1207,6 +1215,56 @@ public: insn_code icode = code_for_aarch64_sve_ld1rq (e.vector_mode (0)); return e.use_contiguous_load_insn (icode); } + + gimple * + fold (gimple_folder &f) const OVERRIDE + { + tree arg0 = gimple_call_arg (f.call, 0); + tree arg1 = gimple_call_arg (f.call, 1); + + /* Transform: + lhs = svld1rq ({-1, -1, ... }, arg1) + into: + tmp = mem_ref [(int * {ref-all}) arg1] + lhs = vec_perm_expr. + on little endian target. */ + + if (!BYTES_BIG_ENDIAN + && integer_all_onesp (arg0)) + { + tree lhs = gimple_call_lhs (f.call); + auto simd_type = aarch64_get_simd_info_for_type (Int32x4_t); + + tree elt_ptr_type + = build_pointer_type_for_mode (simd_type.eltype, VOIDmode, true); + tree zero = build_zero_cst (elt_ptr_type); + + /* Use element type alignment. */ + tree access_type + = build_aligned_type (simd_type.itype, TYPE_ALIGN (simd_type.eltype)); + + tree tmp = make_ssa_name_fn (cfun, access_type, 0); + gimple *mem_ref_stmt + = gimple_build_assign (tmp, fold_build2 (MEM_REF, access_type, arg1, zero)); + gsi_insert_before (f.gsi, mem_ref_stmt, GSI_SAME_STMT); + + tree mem_ref_lhs = gimple_get_lhs (mem_ref_stmt); + tree vectype = TREE_TYPE (mem_ref_lhs); + tree lhs_type = TREE_TYPE (lhs); + + int source_nelts = TYPE_VECTOR_SUBPARTS (vectype).to_constant (); + vec_perm_builder sel (TYPE_VECTOR_SUBPARTS (lhs_type), source_nelts, 1); + for (int i = 0; i < source_nelts; i++) + sel.quick_push (i); + + vec_perm_indices indices (sel, 1, source_nelts); + gcc_checking_assert (can_vec_perm_const_p (TYPE_MODE (lhs_type), indices)); + tree mask = vec_perm_indices_to_tree (lhs_type, indices); + return gimple_build_assign (lhs, VEC_PERM_EXPR, mem_ref_lhs, mem_ref_lhs, mask); + } + + return NULL; + } }; class svld1ro_impl : public load_replicate diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index f07330cff4f..dc6e5ca1e1d 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -23009,6 +23009,35 @@ aarch64_evpc_sve_tbl (struct expand_vec_perm_d *d) return true; } +/* Try to implement D using SVE dup instruction. */ + +static bool +aarch64_evpc_sve_dup (struct expand_vec_perm_d *d) +{ + if (BYTES_BIG_ENDIAN + || d->perm.length ().is_constant () + || !d->one_vector_p + || d->target == NULL + || d->op0 == NULL + || GET_MODE_NUNITS (GET_MODE (d->target)).is_constant () + || !GET_MODE_NUNITS (GET_MODE (d->op0)).is_constant ()) + return false; + + if (d->testing_p) + return true; + + int npatterns = d->perm.encoding ().npatterns (); + if (!known_eq (npatterns, GET_MODE_NUNITS (GET_MODE (d->op0)))) + return false; + + for (int i = 0; i < npatterns; i++) + if (!known_eq (d->perm[i], i)) + return false; + + aarch64_expand_sve_dupq (d->target, GET_MODE (d->target), d->op0); + return true; +} + /* Try to implement D using SVE SEL instruction. */ static bool @@ -23169,7 +23198,12 @@ aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) else if (aarch64_evpc_reencode (d)) return true; if (d->vec_flags == VEC_SVE_DATA) - return aarch64_evpc_sve_tbl (d); + { + if (aarch64_evpc_sve_dup (d)) + return true; + else if (aarch64_evpc_sve_tbl (d)) + return true; + } else if (d->vec_flags == VEC_ADVSIMD) return aarch64_evpc_tbl (d); } diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr96463.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr96463.c new file mode 100644 index 00000000000..35100a9e01c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr96463.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ + +#include "arm_neon.h" +#include "arm_sve.h" + +svint32_t f1 (int32x4_t x) +{ + return svld1rq (svptrue_b8 (), &x[0]); +} + +svint32_t f2 (int *x) +{ + return svld1rq (svptrue_b8 (), x); +} + +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]} 2 { target aarch64_little_endian } } } */