* [PATCH 1/6] aarch64: Move Neon vector-tuple type declaration into the compiler
@ 2021-10-22 14:28 Jonathan Wright
2021-10-22 14:40 ` Richard Sandiford
0 siblings, 1 reply; 2+ messages in thread
From: Jonathan Wright @ 2021-10-22 14:28 UTC (permalink / raw)
To: gcc-patches; +Cc: Richard Sandiford, Kyrylo Tkachov
[-- Attachment #1: Type: text/plain, Size: 1461 bytes --]
Hi,
As subject, this patch declares the Neon vector-tuple types inside the
compiler instead of in the arm_neon.h header. This is a necessary first
step before adding corresponding machine modes to the AArch64
backend.
The vector-tuple types are implemented using a #pragma. This means
initialization of builtin functions that have vector-tuple types as
arguments or return values has to be delayed until the #pragma is
handled.
Bootstrapped and regression tested on aarch64-none-linux-gnu - no
issues.
Note that this patch series cannot be merged until the following has
been accepted:
https://gcc.gnu.org/pipermail/gcc-patches/2021-October/581948.html
Ok for master with this proviso?
Thanks,
Jonathan
---
gcc/ChangeLog:
2021-09-10 Jonathan Wright <jonathan.wright@arm.com>
* config/aarch64/aarch64-builtins.c (aarch64_init_simd_builtins):
Factor out main loop to...
(aarch64_init_simd_builtin_functions): This new function.
(register_tuple_type): Define.
(aarch64_scalar_builtin_type_p): Define.
(handle_arm_neon_h): Define.
* config/aarch64/aarch64-c.c (aarch64_pragma_aarch64): Handle
pragma for arm_neon.h.
* config/aarch64/aarch64-protos.h (aarch64_advsimd_struct_mode_p):
Declare.
(handle_arm_neon_h): Likewise.
* config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p):
Remove static modifier.
* config/aarch64/arm_neon.h (target): Remove Neon vector
structure type definitions.
[-- Attachment #2: rb14838.patch --]
[-- Type: application/octet-stream, Size: 14801 bytes --]
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index 1a507ea59142d0b5977b0167abfe9a58a567adf7..27f2dc5ea4337da80f3b84b6a798263e7bd9012e 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -1045,32 +1045,22 @@ aarch64_init_fcmla_laneq_builtins (void)
}
void
-aarch64_init_simd_builtins (void)
+aarch64_init_simd_builtin_functions (bool called_from_pragma)
{
unsigned int i, fcode = AARCH64_SIMD_PATTERN_START;
- if (aarch64_simd_builtins_initialized_p)
- return;
-
- aarch64_simd_builtins_initialized_p = true;
-
- aarch64_init_simd_builtin_types ();
-
- /* Strong-typing hasn't been implemented for all AdvSIMD builtin intrinsics.
- Therefore we need to preserve the old __builtin scalar types. It can be
- removed once all the intrinsics become strongly typed using the qualifier
- system. */
- aarch64_init_simd_builtin_scalar_types ();
-
- tree lane_check_fpr = build_function_type_list (void_type_node,
- size_type_node,
- size_type_node,
- intSI_type_node,
- NULL);
- aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_LANE_CHECK]
- = aarch64_general_add_builtin ("__builtin_aarch64_im_lane_boundsi",
- lane_check_fpr,
- AARCH64_SIMD_BUILTIN_LANE_CHECK);
+ if (!called_from_pragma)
+ {
+ tree lane_check_fpr = build_function_type_list (void_type_node,
+ size_type_node,
+ size_type_node,
+ intSI_type_node,
+ NULL);
+ aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_LANE_CHECK]
+ = aarch64_general_add_builtin ("__builtin_aarch64_im_lane_boundsi",
+ lane_check_fpr,
+ AARCH64_SIMD_BUILTIN_LANE_CHECK);
+ }
for (i = 0; i < ARRAY_SIZE (aarch64_simd_builtin_data); i++, fcode++)
{
@@ -1100,6 +1090,18 @@ aarch64_init_simd_builtins (void)
tree return_type = void_type_node, args = void_list_node;
tree eltype;
+ int struct_mode_args = 0;
+ for (int j = op_num; j >= 0; j--)
+ {
+ machine_mode op_mode = insn_data[d->code].operand[j].mode;
+ if (aarch64_advsimd_struct_mode_p (op_mode))
+ struct_mode_args++;
+ }
+
+ if ((called_from_pragma && struct_mode_args == 0)
+ || (!called_from_pragma && struct_mode_args > 0))
+ continue;
+
/* Build a function type directly from the insn_data for this
builtin. The build_function_type () function takes care of
removing duplicates for us. */
@@ -1173,9 +1175,82 @@ aarch64_init_simd_builtins (void)
fndecl = aarch64_general_add_builtin (namebuf, ftype, fcode, attrs);
aarch64_builtin_decls[fcode] = fndecl;
}
+}
+
+/* Register the tuple type that contains NUM_VECTORS of the AdvSIMD type
+ indexed by TYPE_INDEX. */
+static void
+register_tuple_type (unsigned int num_vectors, unsigned int type_index)
+{
+ aarch64_simd_type_info *type = &aarch64_simd_types[type_index];
+
+ /* Synthesize the name of the user-visible vector tuple type. */
+ const char *vector_type_name = type->name;
+ char tuple_type_name[sizeof ("bfloat16x4x2_t")];
+ snprintf (tuple_type_name, sizeof (tuple_type_name), "%.*sx%d_t",
+ (int) strlen (vector_type_name) - 4, vector_type_name + 2,
+ num_vectors);
+ tuple_type_name[0] = TOLOWER (tuple_type_name[0]);
+
+ tree vector_type = type->itype;
+ tree array_type = build_array_type_nelts (vector_type, num_vectors);
+ unsigned int alignment
+ = (known_eq (GET_MODE_SIZE (type->mode), 16) ? 128 : 64);
+ gcc_assert (TYPE_MODE_RAW (array_type) == TYPE_MODE (array_type)
+ && TYPE_ALIGN (array_type) == alignment);
+
+ tree field = build_decl (input_location, FIELD_DECL,
+ get_identifier ("val"), array_type);
+
+ tree t = lang_hooks.types.simulate_record_decl (input_location,
+ tuple_type_name,
+ make_array_slice (&field,
+ 1));
+ gcc_assert (TYPE_MODE_RAW (t) == TYPE_MODE (t)
+ && TYPE_ALIGN (t) == alignment);
+}
+
+static bool
+aarch64_scalar_builtin_type_p (aarch64_simd_type t)
+{
+ return (t == Poly8_t || t == Poly16_t || t == Poly64_t || t == Poly128_t);
+}
+
+/* Implement #pragma GCC aarch64 "arm_neon.h". */
+void
+handle_arm_neon_h (void)
+{
+ /* Register the AdvSIMD vector tuple types. */
+ for (unsigned int i = 0; i < ARM_NEON_H_TYPES_LAST; i++)
+ for (unsigned int count = 2; count <= 4; ++count)
+ if (!aarch64_scalar_builtin_type_p (aarch64_simd_types[i].type))
+ register_tuple_type (count, i);
+
+ aarch64_init_simd_builtin_functions (true);
+}
+
+void
+aarch64_init_simd_builtins (void)
+{
+ if (aarch64_simd_builtins_initialized_p)
+ return;
+
+ aarch64_simd_builtins_initialized_p = true;
+
+ aarch64_init_simd_builtin_types ();
+
+ /* Strong-typing hasn't been implemented for all AdvSIMD builtin intrinsics.
+ Therefore we need to preserve the old __builtin scalar types. It can be
+ removed once all the intrinsics become strongly typed using the qualifier
+ system. */
+ aarch64_init_simd_builtin_scalar_types ();
+
+ aarch64_init_simd_builtin_functions (false);
+ if (in_lto_p)
+ handle_arm_neon_h ();
- /* Initialize the remaining fcmla_laneq intrinsics. */
- aarch64_init_fcmla_laneq_builtins ();
+ /* Initialize the remaining fcmla_laneq intrinsics. */
+ aarch64_init_fcmla_laneq_builtins ();
}
static void
diff --git a/gcc/config/aarch64/aarch64-c.c b/gcc/config/aarch64/aarch64-c.c
index f9ddffa007818a42710cfc0d60af580dd6c76f95..d6653e474dec9bcddde2106f36ceb22f1d43375c 100644
--- a/gcc/config/aarch64/aarch64-c.c
+++ b/gcc/config/aarch64/aarch64-c.c
@@ -296,6 +296,8 @@ aarch64_pragma_aarch64 (cpp_reader *)
const char *name = TREE_STRING_POINTER (x);
if (strcmp (name, "arm_sve.h") == 0)
aarch64_sve::handle_arm_sve_h ();
+ else if (strcmp (name, "arm_neon.h") == 0)
+ handle_arm_neon_h ();
else
error ("unknown %<#pragma GCC aarch64%> option %qs", name);
}
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index b91eeeba1012ef830f8bb3535c7c81c29afccc17..c6a83d57c8ede53053cdd852b7d7e6e8a4a08514 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -743,6 +743,7 @@ unsigned HOST_WIDE_INT aarch64_and_split_imm2 (HOST_WIDE_INT val_in);
bool aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode);
int aarch64_branch_cost (bool, bool);
enum aarch64_symbol_type aarch64_classify_symbolic_expression (rtx);
+bool aarch64_advsimd_struct_mode_p (machine_mode mode);
opt_machine_mode aarch64_vq_mode (scalar_mode);
opt_machine_mode aarch64_full_sve_mode (scalar_mode);
bool aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode);
@@ -967,6 +968,7 @@ rtx aarch64_general_expand_builtin (unsigned int, tree, rtx, int);
tree aarch64_general_builtin_decl (unsigned, bool);
tree aarch64_general_builtin_rsqrt (unsigned int);
tree aarch64_builtin_vectorized_function (unsigned int, tree, tree);
+void handle_arm_neon_h (void);
namespace aarch64_sve {
void init_builtins ();
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index a9a1800af53b18306465e382e9dd149d0e335b09..fe09e36710b9b082ea6fe86b1c3a937b9e3bd9b6 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2785,7 +2785,7 @@ aarch64_estimated_sve_vq ()
}
/* Return true if MODE is any of the Advanced SIMD structure modes. */
-static bool
+bool
aarch64_advsimd_struct_mode_p (machine_mode mode)
{
return (TARGET_SIMD
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 2d5bf34b698a88ed934c522cc9f14f125c604a39..b638854c32460ccfbc0cdd24ccbdbf18ccb2df44 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -30,6 +30,8 @@
#pragma GCC push_options
#pragma GCC target ("+nothing+simd")
+#pragma GCC aarch64 "arm_neon.h"
+
#include <stdint.h>
#define __AARCH64_UINT64_C(__C) ((uint64_t) __C)
@@ -76,456 +78,6 @@ typedef double float64_t;
typedef __Bfloat16x4_t bfloat16x4_t;
typedef __Bfloat16x8_t bfloat16x8_t;
-typedef struct bfloat16x4x2_t
-{
- bfloat16x4_t val[2];
-} bfloat16x4x2_t;
-
-typedef struct bfloat16x8x2_t
-{
- bfloat16x8_t val[2];
-} bfloat16x8x2_t;
-
-typedef struct bfloat16x4x3_t
-{
- bfloat16x4_t val[3];
-} bfloat16x4x3_t;
-
-typedef struct bfloat16x8x3_t
-{
- bfloat16x8_t val[3];
-} bfloat16x8x3_t;
-
-typedef struct bfloat16x4x4_t
-{
- bfloat16x4_t val[4];
-} bfloat16x4x4_t;
-
-typedef struct bfloat16x8x4_t
-{
- bfloat16x8_t val[4];
-} bfloat16x8x4_t;
-
-typedef struct int8x8x2_t
-{
- int8x8_t val[2];
-} int8x8x2_t;
-
-typedef struct int8x16x2_t
-{
- int8x16_t val[2];
-} int8x16x2_t;
-
-typedef struct int16x4x2_t
-{
- int16x4_t val[2];
-} int16x4x2_t;
-
-typedef struct int16x8x2_t
-{
- int16x8_t val[2];
-} int16x8x2_t;
-
-typedef struct int32x2x2_t
-{
- int32x2_t val[2];
-} int32x2x2_t;
-
-typedef struct int32x4x2_t
-{
- int32x4_t val[2];
-} int32x4x2_t;
-
-typedef struct int64x1x2_t
-{
- int64x1_t val[2];
-} int64x1x2_t;
-
-typedef struct int64x2x2_t
-{
- int64x2_t val[2];
-} int64x2x2_t;
-
-typedef struct uint8x8x2_t
-{
- uint8x8_t val[2];
-} uint8x8x2_t;
-
-typedef struct uint8x16x2_t
-{
- uint8x16_t val[2];
-} uint8x16x2_t;
-
-typedef struct uint16x4x2_t
-{
- uint16x4_t val[2];
-} uint16x4x2_t;
-
-typedef struct uint16x8x2_t
-{
- uint16x8_t val[2];
-} uint16x8x2_t;
-
-typedef struct uint32x2x2_t
-{
- uint32x2_t val[2];
-} uint32x2x2_t;
-
-typedef struct uint32x4x2_t
-{
- uint32x4_t val[2];
-} uint32x4x2_t;
-
-typedef struct uint64x1x2_t
-{
- uint64x1_t val[2];
-} uint64x1x2_t;
-
-typedef struct uint64x2x2_t
-{
- uint64x2_t val[2];
-} uint64x2x2_t;
-
-typedef struct float16x4x2_t
-{
- float16x4_t val[2];
-} float16x4x2_t;
-
-typedef struct float16x8x2_t
-{
- float16x8_t val[2];
-} float16x8x2_t;
-
-typedef struct float32x2x2_t
-{
- float32x2_t val[2];
-} float32x2x2_t;
-
-typedef struct float32x4x2_t
-{
- float32x4_t val[2];
-} float32x4x2_t;
-
-typedef struct float64x2x2_t
-{
- float64x2_t val[2];
-} float64x2x2_t;
-
-typedef struct float64x1x2_t
-{
- float64x1_t val[2];
-} float64x1x2_t;
-
-typedef struct poly8x8x2_t
-{
- poly8x8_t val[2];
-} poly8x8x2_t;
-
-typedef struct poly8x16x2_t
-{
- poly8x16_t val[2];
-} poly8x16x2_t;
-
-typedef struct poly16x4x2_t
-{
- poly16x4_t val[2];
-} poly16x4x2_t;
-
-typedef struct poly16x8x2_t
-{
- poly16x8_t val[2];
-} poly16x8x2_t;
-
-typedef struct poly64x1x2_t
-{
- poly64x1_t val[2];
-} poly64x1x2_t;
-
-typedef struct poly64x1x3_t
-{
- poly64x1_t val[3];
-} poly64x1x3_t;
-
-typedef struct poly64x1x4_t
-{
- poly64x1_t val[4];
-} poly64x1x4_t;
-
-typedef struct poly64x2x2_t
-{
- poly64x2_t val[2];
-} poly64x2x2_t;
-
-typedef struct poly64x2x3_t
-{
- poly64x2_t val[3];
-} poly64x2x3_t;
-
-typedef struct poly64x2x4_t
-{
- poly64x2_t val[4];
-} poly64x2x4_t;
-
-typedef struct int8x8x3_t
-{
- int8x8_t val[3];
-} int8x8x3_t;
-
-typedef struct int8x16x3_t
-{
- int8x16_t val[3];
-} int8x16x3_t;
-
-typedef struct int16x4x3_t
-{
- int16x4_t val[3];
-} int16x4x3_t;
-
-typedef struct int16x8x3_t
-{
- int16x8_t val[3];
-} int16x8x3_t;
-
-typedef struct int32x2x3_t
-{
- int32x2_t val[3];
-} int32x2x3_t;
-
-typedef struct int32x4x3_t
-{
- int32x4_t val[3];
-} int32x4x3_t;
-
-typedef struct int64x1x3_t
-{
- int64x1_t val[3];
-} int64x1x3_t;
-
-typedef struct int64x2x3_t
-{
- int64x2_t val[3];
-} int64x2x3_t;
-
-typedef struct uint8x8x3_t
-{
- uint8x8_t val[3];
-} uint8x8x3_t;
-
-typedef struct uint8x16x3_t
-{
- uint8x16_t val[3];
-} uint8x16x3_t;
-
-typedef struct uint16x4x3_t
-{
- uint16x4_t val[3];
-} uint16x4x3_t;
-
-typedef struct uint16x8x3_t
-{
- uint16x8_t val[3];
-} uint16x8x3_t;
-
-typedef struct uint32x2x3_t
-{
- uint32x2_t val[3];
-} uint32x2x3_t;
-
-typedef struct uint32x4x3_t
-{
- uint32x4_t val[3];
-} uint32x4x3_t;
-
-typedef struct uint64x1x3_t
-{
- uint64x1_t val[3];
-} uint64x1x3_t;
-
-typedef struct uint64x2x3_t
-{
- uint64x2_t val[3];
-} uint64x2x3_t;
-
-typedef struct float16x4x3_t
-{
- float16x4_t val[3];
-} float16x4x3_t;
-
-typedef struct float16x8x3_t
-{
- float16x8_t val[3];
-} float16x8x3_t;
-
-typedef struct float32x2x3_t
-{
- float32x2_t val[3];
-} float32x2x3_t;
-
-typedef struct float32x4x3_t
-{
- float32x4_t val[3];
-} float32x4x3_t;
-
-typedef struct float64x2x3_t
-{
- float64x2_t val[3];
-} float64x2x3_t;
-
-typedef struct float64x1x3_t
-{
- float64x1_t val[3];
-} float64x1x3_t;
-
-typedef struct poly8x8x3_t
-{
- poly8x8_t val[3];
-} poly8x8x3_t;
-
-typedef struct poly8x16x3_t
-{
- poly8x16_t val[3];
-} poly8x16x3_t;
-
-typedef struct poly16x4x3_t
-{
- poly16x4_t val[3];
-} poly16x4x3_t;
-
-typedef struct poly16x8x3_t
-{
- poly16x8_t val[3];
-} poly16x8x3_t;
-
-typedef struct int8x8x4_t
-{
- int8x8_t val[4];
-} int8x8x4_t;
-
-typedef struct int8x16x4_t
-{
- int8x16_t val[4];
-} int8x16x4_t;
-
-typedef struct int16x4x4_t
-{
- int16x4_t val[4];
-} int16x4x4_t;
-
-typedef struct int16x8x4_t
-{
- int16x8_t val[4];
-} int16x8x4_t;
-
-typedef struct int32x2x4_t
-{
- int32x2_t val[4];
-} int32x2x4_t;
-
-typedef struct int32x4x4_t
-{
- int32x4_t val[4];
-} int32x4x4_t;
-
-typedef struct int64x1x4_t
-{
- int64x1_t val[4];
-} int64x1x4_t;
-
-typedef struct int64x2x4_t
-{
- int64x2_t val[4];
-} int64x2x4_t;
-
-typedef struct uint8x8x4_t
-{
- uint8x8_t val[4];
-} uint8x8x4_t;
-
-typedef struct uint8x16x4_t
-{
- uint8x16_t val[4];
-} uint8x16x4_t;
-
-typedef struct uint16x4x4_t
-{
- uint16x4_t val[4];
-} uint16x4x4_t;
-
-typedef struct uint16x8x4_t
-{
- uint16x8_t val[4];
-} uint16x8x4_t;
-
-typedef struct uint32x2x4_t
-{
- uint32x2_t val[4];
-} uint32x2x4_t;
-
-typedef struct uint32x4x4_t
-{
- uint32x4_t val[4];
-} uint32x4x4_t;
-
-typedef struct uint64x1x4_t
-{
- uint64x1_t val[4];
-} uint64x1x4_t;
-
-typedef struct uint64x2x4_t
-{
- uint64x2_t val[4];
-} uint64x2x4_t;
-
-typedef struct float16x4x4_t
-{
- float16x4_t val[4];
-} float16x4x4_t;
-
-typedef struct float16x8x4_t
-{
- float16x8_t val[4];
-} float16x8x4_t;
-
-typedef struct float32x2x4_t
-{
- float32x2_t val[4];
-} float32x2x4_t;
-
-typedef struct float32x4x4_t
-{
- float32x4_t val[4];
-} float32x4x4_t;
-
-typedef struct float64x2x4_t
-{
- float64x2_t val[4];
-} float64x2x4_t;
-
-typedef struct float64x1x4_t
-{
- float64x1_t val[4];
-} float64x1x4_t;
-
-typedef struct poly8x8x4_t
-{
- poly8x8_t val[4];
-} poly8x8x4_t;
-
-typedef struct poly8x16x4_t
-{
- poly8x16_t val[4];
-} poly8x16x4_t;
-
-typedef struct poly16x4x4_t
-{
- poly16x4_t val[4];
-} poly16x4x4_t;
-
-typedef struct poly16x8x4_t
-{
- poly16x8_t val[4];
-} poly16x8x4_t;
-
/* __aarch64_vdup_lane internal macros. */
#define __aarch64_vdup_lane_any(__size, __q, __a, __b) \
vdup##__q##_n_##__size (__aarch64_vget_lane_any (__a, __b))
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [PATCH 1/6] aarch64: Move Neon vector-tuple type declaration into the compiler
2021-10-22 14:28 [PATCH 1/6] aarch64: Move Neon vector-tuple type declaration into the compiler Jonathan Wright
@ 2021-10-22 14:40 ` Richard Sandiford
0 siblings, 0 replies; 2+ messages in thread
From: Richard Sandiford @ 2021-10-22 14:40 UTC (permalink / raw)
To: Jonathan Wright; +Cc: gcc-patches, Kyrylo Tkachov
Jonathan Wright <Jonathan.Wright@arm.com> writes:
> Hi,
>
> As subject, this patch declares the Neon vector-tuple types inside the
> compiler instead of in the arm_neon.h header. This is a necessary first
> step before adding corresponding machine modes to the AArch64
> backend.
>
> The vector-tuple types are implemented using a #pragma. This means
> initialization of builtin functions that have vector-tuple types as
> arguments or return values has to be delayed until the #pragma is
> handled.
>
> Bootstrapped and regression tested on aarch64-none-linux-gnu - no
> issues.
>
> Note that this patch series cannot be merged until the following has
> been accepted:
> https://gcc.gnu.org/pipermail/gcc-patches/2021-October/581948.html
>
> Ok for master with this proviso?
>
> Thanks,
> Jonathan
>
> ---
>
> gcc/ChangeLog:
>
> 2021-09-10 Jonathan Wright <jonathan.wright@arm.com>
>
> * config/aarch64/aarch64-builtins.c (aarch64_init_simd_builtins):
> Factor out main loop to...
> (aarch64_init_simd_builtin_functions): This new function.
> (register_tuple_type): Define.
> (aarch64_scalar_builtin_type_p): Define.
> (handle_arm_neon_h): Define.
> * config/aarch64/aarch64-c.c (aarch64_pragma_aarch64): Handle
> pragma for arm_neon.h.
> * config/aarch64/aarch64-protos.h (aarch64_advsimd_struct_mode_p):
> Declare.
> (handle_arm_neon_h): Likewise.
> * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p):
> Remove static modifier.
> * config/aarch64/arm_neon.h (target): Remove Neon vector
> structure type definitions.
OK when the prerequisite you mention is applied, thanks.
Richard
> diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
> index 1a507ea59142d0b5977b0167abfe9a58a567adf7..27f2dc5ea4337da80f3b84b6a798263e7bd9012e 100644
> --- a/gcc/config/aarch64/aarch64-builtins.c
> +++ b/gcc/config/aarch64/aarch64-builtins.c
> @@ -1045,32 +1045,22 @@ aarch64_init_fcmla_laneq_builtins (void)
> }
>
> void
> -aarch64_init_simd_builtins (void)
> +aarch64_init_simd_builtin_functions (bool called_from_pragma)
> {
> unsigned int i, fcode = AARCH64_SIMD_PATTERN_START;
>
> - if (aarch64_simd_builtins_initialized_p)
> - return;
> -
> - aarch64_simd_builtins_initialized_p = true;
> -
> - aarch64_init_simd_builtin_types ();
> -
> - /* Strong-typing hasn't been implemented for all AdvSIMD builtin intrinsics.
> - Therefore we need to preserve the old __builtin scalar types. It can be
> - removed once all the intrinsics become strongly typed using the qualifier
> - system. */
> - aarch64_init_simd_builtin_scalar_types ();
> -
> - tree lane_check_fpr = build_function_type_list (void_type_node,
> - size_type_node,
> - size_type_node,
> - intSI_type_node,
> - NULL);
> - aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_LANE_CHECK]
> - = aarch64_general_add_builtin ("__builtin_aarch64_im_lane_boundsi",
> - lane_check_fpr,
> - AARCH64_SIMD_BUILTIN_LANE_CHECK);
> + if (!called_from_pragma)
> + {
> + tree lane_check_fpr = build_function_type_list (void_type_node,
> + size_type_node,
> + size_type_node,
> + intSI_type_node,
> + NULL);
> + aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_LANE_CHECK]
> + = aarch64_general_add_builtin ("__builtin_aarch64_im_lane_boundsi",
> + lane_check_fpr,
> + AARCH64_SIMD_BUILTIN_LANE_CHECK);
> + }
>
> for (i = 0; i < ARRAY_SIZE (aarch64_simd_builtin_data); i++, fcode++)
> {
> @@ -1100,6 +1090,18 @@ aarch64_init_simd_builtins (void)
> tree return_type = void_type_node, args = void_list_node;
> tree eltype;
>
> + int struct_mode_args = 0;
> + for (int j = op_num; j >= 0; j--)
> + {
> + machine_mode op_mode = insn_data[d->code].operand[j].mode;
> + if (aarch64_advsimd_struct_mode_p (op_mode))
> + struct_mode_args++;
> + }
> +
> + if ((called_from_pragma && struct_mode_args == 0)
> + || (!called_from_pragma && struct_mode_args > 0))
> + continue;
> +
> /* Build a function type directly from the insn_data for this
> builtin. The build_function_type () function takes care of
> removing duplicates for us. */
> @@ -1173,9 +1175,82 @@ aarch64_init_simd_builtins (void)
> fndecl = aarch64_general_add_builtin (namebuf, ftype, fcode, attrs);
> aarch64_builtin_decls[fcode] = fndecl;
> }
> +}
> +
> +/* Register the tuple type that contains NUM_VECTORS of the AdvSIMD type
> + indexed by TYPE_INDEX. */
> +static void
> +register_tuple_type (unsigned int num_vectors, unsigned int type_index)
> +{
> + aarch64_simd_type_info *type = &aarch64_simd_types[type_index];
> +
> + /* Synthesize the name of the user-visible vector tuple type. */
> + const char *vector_type_name = type->name;
> + char tuple_type_name[sizeof ("bfloat16x4x2_t")];
> + snprintf (tuple_type_name, sizeof (tuple_type_name), "%.*sx%d_t",
> + (int) strlen (vector_type_name) - 4, vector_type_name + 2,
> + num_vectors);
> + tuple_type_name[0] = TOLOWER (tuple_type_name[0]);
> +
> + tree vector_type = type->itype;
> + tree array_type = build_array_type_nelts (vector_type, num_vectors);
> + unsigned int alignment
> + = (known_eq (GET_MODE_SIZE (type->mode), 16) ? 128 : 64);
> + gcc_assert (TYPE_MODE_RAW (array_type) == TYPE_MODE (array_type)
> + && TYPE_ALIGN (array_type) == alignment);
> +
> + tree field = build_decl (input_location, FIELD_DECL,
> + get_identifier ("val"), array_type);
> +
> + tree t = lang_hooks.types.simulate_record_decl (input_location,
> + tuple_type_name,
> + make_array_slice (&field,
> + 1));
> + gcc_assert (TYPE_MODE_RAW (t) == TYPE_MODE (t)
> + && TYPE_ALIGN (t) == alignment);
> +}
> +
> +static bool
> +aarch64_scalar_builtin_type_p (aarch64_simd_type t)
> +{
> + return (t == Poly8_t || t == Poly16_t || t == Poly64_t || t == Poly128_t);
> +}
> +
> +/* Implement #pragma GCC aarch64 "arm_neon.h". */
> +void
> +handle_arm_neon_h (void)
> +{
> + /* Register the AdvSIMD vector tuple types. */
> + for (unsigned int i = 0; i < ARM_NEON_H_TYPES_LAST; i++)
> + for (unsigned int count = 2; count <= 4; ++count)
> + if (!aarch64_scalar_builtin_type_p (aarch64_simd_types[i].type))
> + register_tuple_type (count, i);
> +
> + aarch64_init_simd_builtin_functions (true);
> +}
> +
> +void
> +aarch64_init_simd_builtins (void)
> +{
> + if (aarch64_simd_builtins_initialized_p)
> + return;
> +
> + aarch64_simd_builtins_initialized_p = true;
> +
> + aarch64_init_simd_builtin_types ();
> +
> + /* Strong-typing hasn't been implemented for all AdvSIMD builtin intrinsics.
> + Therefore we need to preserve the old __builtin scalar types. It can be
> + removed once all the intrinsics become strongly typed using the qualifier
> + system. */
> + aarch64_init_simd_builtin_scalar_types ();
> +
> + aarch64_init_simd_builtin_functions (false);
> + if (in_lto_p)
> + handle_arm_neon_h ();
>
> - /* Initialize the remaining fcmla_laneq intrinsics. */
> - aarch64_init_fcmla_laneq_builtins ();
> + /* Initialize the remaining fcmla_laneq intrinsics. */
> + aarch64_init_fcmla_laneq_builtins ();
> }
>
> static void
> diff --git a/gcc/config/aarch64/aarch64-c.c b/gcc/config/aarch64/aarch64-c.c
> index f9ddffa007818a42710cfc0d60af580dd6c76f95..d6653e474dec9bcddde2106f36ceb22f1d43375c 100644
> --- a/gcc/config/aarch64/aarch64-c.c
> +++ b/gcc/config/aarch64/aarch64-c.c
> @@ -296,6 +296,8 @@ aarch64_pragma_aarch64 (cpp_reader *)
> const char *name = TREE_STRING_POINTER (x);
> if (strcmp (name, "arm_sve.h") == 0)
> aarch64_sve::handle_arm_sve_h ();
> + else if (strcmp (name, "arm_neon.h") == 0)
> + handle_arm_neon_h ();
> else
> error ("unknown %<#pragma GCC aarch64%> option %qs", name);
> }
> diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
> index b91eeeba1012ef830f8bb3535c7c81c29afccc17..c6a83d57c8ede53053cdd852b7d7e6e8a4a08514 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -743,6 +743,7 @@ unsigned HOST_WIDE_INT aarch64_and_split_imm2 (HOST_WIDE_INT val_in);
> bool aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode);
> int aarch64_branch_cost (bool, bool);
> enum aarch64_symbol_type aarch64_classify_symbolic_expression (rtx);
> +bool aarch64_advsimd_struct_mode_p (machine_mode mode);
> opt_machine_mode aarch64_vq_mode (scalar_mode);
> opt_machine_mode aarch64_full_sve_mode (scalar_mode);
> bool aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode);
> @@ -967,6 +968,7 @@ rtx aarch64_general_expand_builtin (unsigned int, tree, rtx, int);
> tree aarch64_general_builtin_decl (unsigned, bool);
> tree aarch64_general_builtin_rsqrt (unsigned int);
> tree aarch64_builtin_vectorized_function (unsigned int, tree, tree);
> +void handle_arm_neon_h (void);
>
> namespace aarch64_sve {
> void init_builtins ();
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index a9a1800af53b18306465e382e9dd149d0e335b09..fe09e36710b9b082ea6fe86b1c3a937b9e3bd9b6 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -2785,7 +2785,7 @@ aarch64_estimated_sve_vq ()
> }
>
> /* Return true if MODE is any of the Advanced SIMD structure modes. */
> -static bool
> +bool
> aarch64_advsimd_struct_mode_p (machine_mode mode)
> {
> return (TARGET_SIMD
> diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
> index 2d5bf34b698a88ed934c522cc9f14f125c604a39..b638854c32460ccfbc0cdd24ccbdbf18ccb2df44 100644
> --- a/gcc/config/aarch64/arm_neon.h
> +++ b/gcc/config/aarch64/arm_neon.h
> @@ -30,6 +30,8 @@
> #pragma GCC push_options
> #pragma GCC target ("+nothing+simd")
>
> +#pragma GCC aarch64 "arm_neon.h"
> +
> #include <stdint.h>
>
> #define __AARCH64_UINT64_C(__C) ((uint64_t) __C)
> @@ -76,456 +78,6 @@ typedef double float64_t;
> typedef __Bfloat16x4_t bfloat16x4_t;
> typedef __Bfloat16x8_t bfloat16x8_t;
>
> -typedef struct bfloat16x4x2_t
> -{
> - bfloat16x4_t val[2];
> -} bfloat16x4x2_t;
> -
> -typedef struct bfloat16x8x2_t
> -{
> - bfloat16x8_t val[2];
> -} bfloat16x8x2_t;
> -
> -typedef struct bfloat16x4x3_t
> -{
> - bfloat16x4_t val[3];
> -} bfloat16x4x3_t;
> -
> -typedef struct bfloat16x8x3_t
> -{
> - bfloat16x8_t val[3];
> -} bfloat16x8x3_t;
> -
> -typedef struct bfloat16x4x4_t
> -{
> - bfloat16x4_t val[4];
> -} bfloat16x4x4_t;
> -
> -typedef struct bfloat16x8x4_t
> -{
> - bfloat16x8_t val[4];
> -} bfloat16x8x4_t;
> -
> -typedef struct int8x8x2_t
> -{
> - int8x8_t val[2];
> -} int8x8x2_t;
> -
> -typedef struct int8x16x2_t
> -{
> - int8x16_t val[2];
> -} int8x16x2_t;
> -
> -typedef struct int16x4x2_t
> -{
> - int16x4_t val[2];
> -} int16x4x2_t;
> -
> -typedef struct int16x8x2_t
> -{
> - int16x8_t val[2];
> -} int16x8x2_t;
> -
> -typedef struct int32x2x2_t
> -{
> - int32x2_t val[2];
> -} int32x2x2_t;
> -
> -typedef struct int32x4x2_t
> -{
> - int32x4_t val[2];
> -} int32x4x2_t;
> -
> -typedef struct int64x1x2_t
> -{
> - int64x1_t val[2];
> -} int64x1x2_t;
> -
> -typedef struct int64x2x2_t
> -{
> - int64x2_t val[2];
> -} int64x2x2_t;
> -
> -typedef struct uint8x8x2_t
> -{
> - uint8x8_t val[2];
> -} uint8x8x2_t;
> -
> -typedef struct uint8x16x2_t
> -{
> - uint8x16_t val[2];
> -} uint8x16x2_t;
> -
> -typedef struct uint16x4x2_t
> -{
> - uint16x4_t val[2];
> -} uint16x4x2_t;
> -
> -typedef struct uint16x8x2_t
> -{
> - uint16x8_t val[2];
> -} uint16x8x2_t;
> -
> -typedef struct uint32x2x2_t
> -{
> - uint32x2_t val[2];
> -} uint32x2x2_t;
> -
> -typedef struct uint32x4x2_t
> -{
> - uint32x4_t val[2];
> -} uint32x4x2_t;
> -
> -typedef struct uint64x1x2_t
> -{
> - uint64x1_t val[2];
> -} uint64x1x2_t;
> -
> -typedef struct uint64x2x2_t
> -{
> - uint64x2_t val[2];
> -} uint64x2x2_t;
> -
> -typedef struct float16x4x2_t
> -{
> - float16x4_t val[2];
> -} float16x4x2_t;
> -
> -typedef struct float16x8x2_t
> -{
> - float16x8_t val[2];
> -} float16x8x2_t;
> -
> -typedef struct float32x2x2_t
> -{
> - float32x2_t val[2];
> -} float32x2x2_t;
> -
> -typedef struct float32x4x2_t
> -{
> - float32x4_t val[2];
> -} float32x4x2_t;
> -
> -typedef struct float64x2x2_t
> -{
> - float64x2_t val[2];
> -} float64x2x2_t;
> -
> -typedef struct float64x1x2_t
> -{
> - float64x1_t val[2];
> -} float64x1x2_t;
> -
> -typedef struct poly8x8x2_t
> -{
> - poly8x8_t val[2];
> -} poly8x8x2_t;
> -
> -typedef struct poly8x16x2_t
> -{
> - poly8x16_t val[2];
> -} poly8x16x2_t;
> -
> -typedef struct poly16x4x2_t
> -{
> - poly16x4_t val[2];
> -} poly16x4x2_t;
> -
> -typedef struct poly16x8x2_t
> -{
> - poly16x8_t val[2];
> -} poly16x8x2_t;
> -
> -typedef struct poly64x1x2_t
> -{
> - poly64x1_t val[2];
> -} poly64x1x2_t;
> -
> -typedef struct poly64x1x3_t
> -{
> - poly64x1_t val[3];
> -} poly64x1x3_t;
> -
> -typedef struct poly64x1x4_t
> -{
> - poly64x1_t val[4];
> -} poly64x1x4_t;
> -
> -typedef struct poly64x2x2_t
> -{
> - poly64x2_t val[2];
> -} poly64x2x2_t;
> -
> -typedef struct poly64x2x3_t
> -{
> - poly64x2_t val[3];
> -} poly64x2x3_t;
> -
> -typedef struct poly64x2x4_t
> -{
> - poly64x2_t val[4];
> -} poly64x2x4_t;
> -
> -typedef struct int8x8x3_t
> -{
> - int8x8_t val[3];
> -} int8x8x3_t;
> -
> -typedef struct int8x16x3_t
> -{
> - int8x16_t val[3];
> -} int8x16x3_t;
> -
> -typedef struct int16x4x3_t
> -{
> - int16x4_t val[3];
> -} int16x4x3_t;
> -
> -typedef struct int16x8x3_t
> -{
> - int16x8_t val[3];
> -} int16x8x3_t;
> -
> -typedef struct int32x2x3_t
> -{
> - int32x2_t val[3];
> -} int32x2x3_t;
> -
> -typedef struct int32x4x3_t
> -{
> - int32x4_t val[3];
> -} int32x4x3_t;
> -
> -typedef struct int64x1x3_t
> -{
> - int64x1_t val[3];
> -} int64x1x3_t;
> -
> -typedef struct int64x2x3_t
> -{
> - int64x2_t val[3];
> -} int64x2x3_t;
> -
> -typedef struct uint8x8x3_t
> -{
> - uint8x8_t val[3];
> -} uint8x8x3_t;
> -
> -typedef struct uint8x16x3_t
> -{
> - uint8x16_t val[3];
> -} uint8x16x3_t;
> -
> -typedef struct uint16x4x3_t
> -{
> - uint16x4_t val[3];
> -} uint16x4x3_t;
> -
> -typedef struct uint16x8x3_t
> -{
> - uint16x8_t val[3];
> -} uint16x8x3_t;
> -
> -typedef struct uint32x2x3_t
> -{
> - uint32x2_t val[3];
> -} uint32x2x3_t;
> -
> -typedef struct uint32x4x3_t
> -{
> - uint32x4_t val[3];
> -} uint32x4x3_t;
> -
> -typedef struct uint64x1x3_t
> -{
> - uint64x1_t val[3];
> -} uint64x1x3_t;
> -
> -typedef struct uint64x2x3_t
> -{
> - uint64x2_t val[3];
> -} uint64x2x3_t;
> -
> -typedef struct float16x4x3_t
> -{
> - float16x4_t val[3];
> -} float16x4x3_t;
> -
> -typedef struct float16x8x3_t
> -{
> - float16x8_t val[3];
> -} float16x8x3_t;
> -
> -typedef struct float32x2x3_t
> -{
> - float32x2_t val[3];
> -} float32x2x3_t;
> -
> -typedef struct float32x4x3_t
> -{
> - float32x4_t val[3];
> -} float32x4x3_t;
> -
> -typedef struct float64x2x3_t
> -{
> - float64x2_t val[3];
> -} float64x2x3_t;
> -
> -typedef struct float64x1x3_t
> -{
> - float64x1_t val[3];
> -} float64x1x3_t;
> -
> -typedef struct poly8x8x3_t
> -{
> - poly8x8_t val[3];
> -} poly8x8x3_t;
> -
> -typedef struct poly8x16x3_t
> -{
> - poly8x16_t val[3];
> -} poly8x16x3_t;
> -
> -typedef struct poly16x4x3_t
> -{
> - poly16x4_t val[3];
> -} poly16x4x3_t;
> -
> -typedef struct poly16x8x3_t
> -{
> - poly16x8_t val[3];
> -} poly16x8x3_t;
> -
> -typedef struct int8x8x4_t
> -{
> - int8x8_t val[4];
> -} int8x8x4_t;
> -
> -typedef struct int8x16x4_t
> -{
> - int8x16_t val[4];
> -} int8x16x4_t;
> -
> -typedef struct int16x4x4_t
> -{
> - int16x4_t val[4];
> -} int16x4x4_t;
> -
> -typedef struct int16x8x4_t
> -{
> - int16x8_t val[4];
> -} int16x8x4_t;
> -
> -typedef struct int32x2x4_t
> -{
> - int32x2_t val[4];
> -} int32x2x4_t;
> -
> -typedef struct int32x4x4_t
> -{
> - int32x4_t val[4];
> -} int32x4x4_t;
> -
> -typedef struct int64x1x4_t
> -{
> - int64x1_t val[4];
> -} int64x1x4_t;
> -
> -typedef struct int64x2x4_t
> -{
> - int64x2_t val[4];
> -} int64x2x4_t;
> -
> -typedef struct uint8x8x4_t
> -{
> - uint8x8_t val[4];
> -} uint8x8x4_t;
> -
> -typedef struct uint8x16x4_t
> -{
> - uint8x16_t val[4];
> -} uint8x16x4_t;
> -
> -typedef struct uint16x4x4_t
> -{
> - uint16x4_t val[4];
> -} uint16x4x4_t;
> -
> -typedef struct uint16x8x4_t
> -{
> - uint16x8_t val[4];
> -} uint16x8x4_t;
> -
> -typedef struct uint32x2x4_t
> -{
> - uint32x2_t val[4];
> -} uint32x2x4_t;
> -
> -typedef struct uint32x4x4_t
> -{
> - uint32x4_t val[4];
> -} uint32x4x4_t;
> -
> -typedef struct uint64x1x4_t
> -{
> - uint64x1_t val[4];
> -} uint64x1x4_t;
> -
> -typedef struct uint64x2x4_t
> -{
> - uint64x2_t val[4];
> -} uint64x2x4_t;
> -
> -typedef struct float16x4x4_t
> -{
> - float16x4_t val[4];
> -} float16x4x4_t;
> -
> -typedef struct float16x8x4_t
> -{
> - float16x8_t val[4];
> -} float16x8x4_t;
> -
> -typedef struct float32x2x4_t
> -{
> - float32x2_t val[4];
> -} float32x2x4_t;
> -
> -typedef struct float32x4x4_t
> -{
> - float32x4_t val[4];
> -} float32x4x4_t;
> -
> -typedef struct float64x2x4_t
> -{
> - float64x2_t val[4];
> -} float64x2x4_t;
> -
> -typedef struct float64x1x4_t
> -{
> - float64x1_t val[4];
> -} float64x1x4_t;
> -
> -typedef struct poly8x8x4_t
> -{
> - poly8x8_t val[4];
> -} poly8x8x4_t;
> -
> -typedef struct poly8x16x4_t
> -{
> - poly8x16_t val[4];
> -} poly8x16x4_t;
> -
> -typedef struct poly16x4x4_t
> -{
> - poly16x4_t val[4];
> -} poly16x4x4_t;
> -
> -typedef struct poly16x8x4_t
> -{
> - poly16x8_t val[4];
> -} poly16x8x4_t;
> -
> /* __aarch64_vdup_lane internal macros. */
> #define __aarch64_vdup_lane_any(__size, __q, __a, __b) \
> vdup##__q##_n_##__size (__aarch64_vget_lane_any (__a, __b))
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2021-10-22 14:40 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-10-22 14:28 [PATCH 1/6] aarch64: Move Neon vector-tuple type declaration into the compiler Jonathan Wright
2021-10-22 14:40 ` Richard Sandiford
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).