* [GCC][PATCH] arm: Fix polymorphic variants failing with undefined reference to `__ARM_undef` error.
@ 2021-06-10 16:14 Srinath Parvathaneni
2021-06-11 16:38 ` Kyrylo Tkachov
0 siblings, 1 reply; 2+ messages in thread
From: Srinath Parvathaneni @ 2021-06-10 16:14 UTC (permalink / raw)
To: gcc-patches; +Cc: Kyrylo.Tkachov, Richard.Earnshaw
[-- Attachment #1: Type: text/plain, Size: 24141 bytes --]
Hi,
This patch fixes the issue mentioned in PR101016, which is mve polymorphic variants
failing at linking with undefined reference to "__ARM_undef" error.
Regression tested on arm-none-eabi and found no regressions.
Ok for master?
Regards,
Srinath.
gcc/ChangeLog:
2021-06-10 Srinath Parvathaneni <srinath.parvathaneni@arm.com>
PR target/101016
* config/arm/arm_mve.h (__arm_vld1q): Change __ARM_mve_coerce(p0,
int8_t const *) to __ARM_mve_coerce1(p0, int8_t *) in the argument for
the polymorphic variants matching code.
(__arm_vld1q_z): Likewise.
(__arm_vld2q): Likewise.
(__arm_vld4q): Likewise.
(__arm_vldrbq_gather_offset): Likewise.
(__arm_vldrbq_gather_offset_z): Likewise.
gcc/testsuite/ChangeLog:
2021-06-10 Srinath Parvathaneni <srinath.parvathaneni@arm.com>
PR target/101016
* gcc.target/arm/mve/intrinsics/pr101016.c: New test.
############### Attachment also inlined for ease of reply ###############
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index 1380f3acbfe64026bc882c308bb1c243e27ac4b3..83f10036990fc3df956fb2fa4818d1304138b485 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -37565,47 +37565,47 @@ extern void *__ARM_undef;
#define __arm_vld1q(p0) (\
_Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
- int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_s8 (__ARM_mve_coerce(p0, int8_t const *)), \
- int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_s16 (__ARM_mve_coerce(p0, int16_t const *)), \
- int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_s32 (__ARM_mve_coerce(p0, int32_t const *)), \
- int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_u8 (__ARM_mve_coerce(p0, uint8_t const *)), \
- int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_u16 (__ARM_mve_coerce(p0, uint16_t const *)), \
- int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_u32 (__ARM_mve_coerce(p0, uint32_t const *)), \
- int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld1q_f16 (__ARM_mve_coerce(p0, float16_t const *)), \
- int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld1q_f32 (__ARM_mve_coerce(p0, float32_t const *))))
+ int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_s8 (__ARM_mve_coerce1(p0, int8_t *)), \
+ int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_s16 (__ARM_mve_coerce1(p0, int16_t *)), \
+ int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_s32 (__ARM_mve_coerce1(p0, int32_t *)), \
+ int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_u8 (__ARM_mve_coerce1(p0, uint8_t *)), \
+ int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_u16 (__ARM_mve_coerce1(p0, uint16_t *)), \
+ int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_u32 (__ARM_mve_coerce1(p0, uint32_t *)), \
+ int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld1q_f16 (__ARM_mve_coerce1(p0, float16_t *)), \
+ int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld1q_f32 (__ARM_mve_coerce1(p0, float32_t *))))
#define __arm_vld1q_z(p0,p1) ( \
_Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
- int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_z_s8 (__ARM_mve_coerce(p0, int8_t const *), p1), \
- int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_z_s16 (__ARM_mve_coerce(p0, int16_t const *), p1), \
- int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_z_s32 (__ARM_mve_coerce(p0, int32_t const *), p1), \
- int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_z_u8 (__ARM_mve_coerce(p0, uint8_t const *), p1), \
- int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_z_u16 (__ARM_mve_coerce(p0, uint16_t const *), p1), \
- int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_z_u32 (__ARM_mve_coerce(p0, uint32_t const *), p1), \
- int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld1q_z_f16 (__ARM_mve_coerce(p0, float16_t const *), p1), \
- int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld1q_z_f32 (__ARM_mve_coerce(p0, float32_t const *), p1)))
+ int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_z_s8 (__ARM_mve_coerce1(p0, int8_t *), p1), \
+ int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_z_s16 (__ARM_mve_coerce1(p0, int16_t *), p1), \
+ int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_z_s32 (__ARM_mve_coerce1(p0, int32_t *), p1), \
+ int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_z_u8 (__ARM_mve_coerce1(p0, uint8_t *), p1), \
+ int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_z_u16 (__ARM_mve_coerce1(p0, uint16_t *), p1), \
+ int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_z_u32 (__ARM_mve_coerce1(p0, uint32_t *), p1), \
+ int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld1q_z_f16 (__ARM_mve_coerce1(p0, float16_t *), p1), \
+ int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld1q_z_f32 (__ARM_mve_coerce1(p0, float32_t *), p1)))
#define __arm_vld2q(p0) ( \
_Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
- int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld2q_s8 (__ARM_mve_coerce(p0, int8_t const *)), \
- int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld2q_s16 (__ARM_mve_coerce(p0, int16_t const *)), \
- int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld2q_s32 (__ARM_mve_coerce(p0, int32_t const *)), \
- int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld2q_u8 (__ARM_mve_coerce(p0, uint8_t const *)), \
- int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld2q_u16 (__ARM_mve_coerce(p0, uint16_t const *)), \
- int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld2q_u32 (__ARM_mve_coerce(p0, uint32_t const *)), \
- int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld2q_f16 (__ARM_mve_coerce(p0, float16_t const *)), \
- int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld2q_f32 (__ARM_mve_coerce(p0, float32_t const *))))
+ int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld2q_s8 (__ARM_mve_coerce1(p0, int8_t *)), \
+ int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld2q_s16 (__ARM_mve_coerce1(p0, int16_t *)), \
+ int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld2q_s32 (__ARM_mve_coerce1(p0, int32_t *)), \
+ int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld2q_u8 (__ARM_mve_coerce1(p0, uint8_t *)), \
+ int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld2q_u16 (__ARM_mve_coerce1(p0, uint16_t *)), \
+ int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld2q_u32 (__ARM_mve_coerce1(p0, uint32_t *)), \
+ int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld2q_f16 (__ARM_mve_coerce1(p0, float16_t *)), \
+ int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld2q_f32 (__ARM_mve_coerce1(p0, float32_t *))))
#define __arm_vld4q(p0) ( \
_Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
- int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld4q_s8 (__ARM_mve_coerce(p0, int8_t const *)), \
- int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld4q_s16 (__ARM_mve_coerce(p0, int16_t const *)), \
- int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld4q_s32 (__ARM_mve_coerce(p0, int32_t const *)), \
- int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld4q_u8 (__ARM_mve_coerce(p0, uint8_t const *)), \
- int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld4q_u16 (__ARM_mve_coerce(p0, uint16_t const *)), \
- int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld4q_u32 (__ARM_mve_coerce(p0, uint32_t const *)), \
- int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld4q_f16 (__ARM_mve_coerce(p0, float16_t const *)), \
- int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld4q_f32 (__ARM_mve_coerce(p0, float32_t const *))))
+ int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld4q_s8 (__ARM_mve_coerce1(p0, int8_t *)), \
+ int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld4q_s16 (__ARM_mve_coerce1(p0, int16_t *)), \
+ int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld4q_s32 (__ARM_mve_coerce1(p0, int32_t *)), \
+ int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld4q_u8 (__ARM_mve_coerce1(p0, uint8_t *)), \
+ int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld4q_u16 (__ARM_mve_coerce1(p0, uint16_t *)), \
+ int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld4q_u32 (__ARM_mve_coerce1(p0, uint32_t *)), \
+ int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld4q_f16 (__ARM_mve_coerce1(p0, float16_t *)), \
+ int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld4q_f32 (__ARM_mve_coerce1(p0, float32_t *))))
#define __arm_vldrhq_gather_offset(p0,p1) ({ __typeof(p1) __p1 = (p1); \
_Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
@@ -39631,25 +39631,26 @@ extern void *__ARM_undef;
#define __arm_vldrbq_gather_offset(p0,p1) ({ __typeof(p1) __p1 = (p1); \
_Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
- int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_s8 (__ARM_mve_coerce(p0, int8_t const *), __ARM_mve_coerce(__p1, uint8x16_t)), \
- int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_s16 (__ARM_mve_coerce(p0, int8_t const *), __ARM_mve_coerce(__p1, uint16x8_t)), \
- int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_s32 (__ARM_mve_coerce(p0, int8_t const *), __ARM_mve_coerce(__p1, uint32x4_t)), \
- int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_u8 (__ARM_mve_coerce(p0, uint8_t const *), __ARM_mve_coerce(__p1, uint8x16_t)), \
- int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_u16 (__ARM_mve_coerce(p0, uint8_t const *), __ARM_mve_coerce(__p1, uint16x8_t)), \
- int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_u32 (__ARM_mve_coerce(p0, uint8_t const *), __ARM_mve_coerce(__p1, uint32x4_t)));})
+ int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_s8 (__ARM_mve_coerce1(p0, int8_t *), __ARM_mve_coerce(__p1, uint8x16_t)), \
+ int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_s16 (__ARM_mve_coerce1(p0, int8_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \
+ int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_s32 (__ARM_mve_coerce1(p0, int8_t *), __ARM_mve_coerce(__p1, uint32x4_t)), \
+ int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_u8 (__ARM_mve_coerce1(p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t)), \
+ int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_u16 (__ARM_mve_coerce1(p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \
+ int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_u32 (__ARM_mve_coerce1(p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t)));})
#define __arm_vstrwq_scatter_base_p(p0,p1,p2,p3) ({ __typeof(p2) __p2 = (p2); \
_Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_p_s32 (p0, p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
int (*)[__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_base_p_u32 (p0, p1, __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-#define __arm_vld1q(p0) (_Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
- int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_s8 (__ARM_mve_coerce(p0, int8_t const *)), \
- int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_s16 (__ARM_mve_coerce(p0, int16_t const *)), \
- int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_s32 (__ARM_mve_coerce(p0, int32_t const *)), \
- int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_u8 (__ARM_mve_coerce(p0, uint8_t const *)), \
- int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_u16 (__ARM_mve_coerce(p0, uint16_t const *)), \
- int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_u32 (__ARM_mve_coerce(p0, uint32_t const *))))
+#define __arm_vld1q(p0) (\
+ _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
+ int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_s8 (__ARM_mve_coerce1(p0, int8_t *)), \
+ int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_s16 (__ARM_mve_coerce1(p0, int16_t *)), \
+ int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_s32 (__ARM_mve_coerce1(p0, int32_t *)), \
+ int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_u8 (__ARM_mve_coerce1(p0, uint8_t *)), \
+ int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_u16 (__ARM_mve_coerce1(p0, uint16_t *)), \
+ int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_u32 (__ARM_mve_coerce1(p0, uint32_t *))))
#define __arm_vldrhq_gather_offset(p0,p1) ({ __typeof(p1) __p1 = (p1); \
_Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
@@ -40146,29 +40147,29 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_uint32x4_t]: __arm_vbrsrq_x_n_u32 (__ARM_mve_coerce(__p1, uint32x4_t), p2, p3));})
#define __arm_vld1q_z(p0,p1) ( _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
- int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_z_s8 (__ARM_mve_coerce(p0, int8_t const *), p1), \
- int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_z_s16 (__ARM_mve_coerce(p0, int16_t const *), p1), \
- int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_z_s32 (__ARM_mve_coerce(p0, int32_t const *), p1), \
- int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_z_u8 (__ARM_mve_coerce(p0, uint8_t const *), p1), \
- int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_z_u16 (__ARM_mve_coerce(p0, uint16_t const *), p1), \
- int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_z_u32 (__ARM_mve_coerce(p0, uint32_t const *), p1)))
+ int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_z_s8 (__ARM_mve_coerce1(p0, int8_t *), p1), \
+ int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_z_s16 (__ARM_mve_coerce1(p0, int16_t *), p1), \
+ int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_z_s32 (__ARM_mve_coerce1(p0, int32_t *), p1), \
+ int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_z_u8 (__ARM_mve_coerce1(p0, uint8_t *), p1), \
+ int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_z_u16 (__ARM_mve_coerce1(p0, uint16_t *), p1), \
+ int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_z_u32 (__ARM_mve_coerce1(p0, uint32_t *), p1)))
#define __arm_vld2q(p0) ( _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
- int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld2q_s8 (__ARM_mve_coerce(p0, int8_t const *)), \
- int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld2q_s16 (__ARM_mve_coerce(p0, int16_t const *)), \
- int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld2q_s32 (__ARM_mve_coerce(p0, int32_t const *)), \
- int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld2q_u8 (__ARM_mve_coerce(p0, uint8_t const *)), \
- int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld2q_u16 (__ARM_mve_coerce(p0, uint16_t const *)), \
- int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld2q_u32 (__ARM_mve_coerce(p0, uint32_t const *))))
+ int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld2q_s8 (__ARM_mve_coerce1(p0, int8_t *)), \
+ int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld2q_s16 (__ARM_mve_coerce1(p0, int16_t *)), \
+ int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld2q_s32 (__ARM_mve_coerce1(p0, int32_t *)), \
+ int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld2q_u8 (__ARM_mve_coerce1(p0, uint8_t *)), \
+ int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld2q_u16 (__ARM_mve_coerce1(p0, uint16_t *)), \
+ int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld2q_u32 (__ARM_mve_coerce1(p0, uint32_t *))))
#define __arm_vld4q(p0) ( _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
- int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld4q_s8 (__ARM_mve_coerce(p0, int8_t const *)), \
- int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld4q_s16 (__ARM_mve_coerce(p0, int16_t const *)), \
- int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld4q_s32 (__ARM_mve_coerce(p0, int32_t const *)), \
- int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld4q_u8 (__ARM_mve_coerce(p0, uint8_t const *)), \
- int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld4q_u16 (__ARM_mve_coerce(p0, uint16_t const *)), \
- int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld4q_u32 (__ARM_mve_coerce(p0, uint32_t const *))))
+ int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld4q_s8 (__ARM_mve_coerce1(p0, int8_t *)), \
+ int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld4q_s16 (__ARM_mve_coerce1(p0, int16_t *)), \
+ int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld4q_s32 (__ARM_mve_coerce1(p0, int32_t *)), \
+ int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld4q_u8 (__ARM_mve_coerce1(p0, uint8_t *)), \
+ int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld4q_u16 (__ARM_mve_coerce1(p0, uint16_t *)), \
+ int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld4q_u32 (__ARM_mve_coerce1(p0, uint32_t *))))
#define __arm_vgetq_lane(p0,p1) ({ __typeof(p0) __p0 = (p0); \
_Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
@@ -40515,12 +40516,12 @@ extern void *__ARM_undef;
#define __arm_vldrbq_gather_offset_z(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
_Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
- int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_z_s8 (__ARM_mve_coerce(p0, int8_t const *), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
- int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_z_s16 (__ARM_mve_coerce(p0, int8_t const *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
- int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_z_s32 (__ARM_mve_coerce(p0, int8_t const *), __ARM_mve_coerce(__p1, uint32x4_t), p2), \
- int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_z_u8 (__ARM_mve_coerce(p0, uint8_t const *), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
- int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_z_u16 (__ARM_mve_coerce(p0, uint8_t const *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
- int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_z_u32 (__ARM_mve_coerce(p0, uint8_t const *), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
+ int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_z_s8 (__ARM_mve_coerce1(p0, int8_t *), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
+ int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_z_s16 (__ARM_mve_coerce1(p0, int8_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
+ int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_z_s32 (__ARM_mve_coerce1(p0, int8_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2), \
+ int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_z_u8 (__ARM_mve_coerce1(p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
+ int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_z_u16 (__ARM_mve_coerce1(p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
+ int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_z_u32 (__ARM_mve_coerce1(p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
#define __arm_vqrdmlahq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
@@ -41201,12 +41202,12 @@ extern void *__ARM_undef;
#define __arm_vldrbq_gather_offset(p0,p1) ({ __typeof(p1) __p1 = (p1); \
_Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
- int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_s8 (__ARM_mve_coerce(p0, int8_t const *), __ARM_mve_coerce(__p1, uint8x16_t)), \
- int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_s16 (__ARM_mve_coerce(p0, int8_t const *), __ARM_mve_coerce(__p1, uint16x8_t)), \
- int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_s32 (__ARM_mve_coerce(p0, int8_t const *), __ARM_mve_coerce(__p1, uint32x4_t)), \
- int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_u8 (__ARM_mve_coerce(p0, uint8_t const *), __ARM_mve_coerce(__p1, uint8x16_t)), \
- int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_u16 (__ARM_mve_coerce(p0, uint8_t const *), __ARM_mve_coerce(__p1, uint16x8_t)), \
- int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_u32 (__ARM_mve_coerce(p0, uint8_t const *), __ARM_mve_coerce(__p1, uint32x4_t)));})
+ int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_s8(__ARM_mve_coerce1(p0, int8_t *), __ARM_mve_coerce(__p1, uint8x16_t)), \
+ int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_s16(__ARM_mve_coerce1(p0, int8_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \
+ int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_s32(__ARM_mve_coerce1(p0, int8_t *), __ARM_mve_coerce(__p1, uint32x4_t)), \
+ int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_u8(__ARM_mve_coerce1(p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t)), \
+ int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_u16(__ARM_mve_coerce1(p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \
+ int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_u32(__ARM_mve_coerce1(p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t)));})
#define __arm_vidupq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/pr101016.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/pr101016.c
new file mode 100644
index 0000000000000000000000000000000000000000..b12786d04f558474ed9b3df9998663c7f9bc4d1a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/pr101016.c
@@ -0,0 +1,136 @@
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+
+#include "arm_mve.h"
+
+void
+foo (void)
+{
+ mve_pred16_t p;
+ int8x16_t a;
+ int8_t a1[10];
+ int16x8_t b;
+ int16_t b1[10];
+ int32x4_t c;
+ int32_t c1[10];
+ uint8x16_t ua;
+ uint8_t ua1[10];
+ uint16x8_t ub;
+ uint16_t ub1[10];
+ uint32x4_t uc;
+ uint32_t uc1[10];
+ float16x8_t fb;
+ float16_t fb1[10];
+ float32x4_t fc;
+ float32_t fc1[10];
+
+ fb = vld1q (fb1);
+ fc = vld1q (fc1);
+ b = vld1q (b1);
+ c = vld1q (c1);
+ a = vld1q (a1);
+ ub = vld1q (ub1);
+ uc = vld1q (uc1);
+ ua = vld1q (ua1);
+ fb = vld1q_z (fb1, p);
+ fc = vld1q_z (fc1, p);
+ b = vld1q_z (b1, p);
+ c = vld1q_z (c1, p);
+ a = vld1q_z (a1, p);
+ ub = vld1q_z (ub1, p);
+ uc = vld1q_z (uc1, p);
+ ua = vld1q_z (ua1, p);
+}
+
+void
+foo1 (void)
+{
+ mve_pred16_t p;
+ int8x16x2_t a;
+ int8_t a1[10];
+ int16x8x2_t b;
+ int16_t b1[10];
+ int32x4x2_t c;
+ int32_t c1[10];
+ uint8x16x2_t ua;
+ uint8_t ua1[10];
+ uint16x8x2_t ub;
+ uint16_t ub1[10];
+ uint32x4x2_t uc;
+ uint32_t uc1[10];
+ float16x8x2_t fb;
+ float16_t fb1[10];
+ float32x4x2_t fc;
+ float32_t fc1[10];
+
+ fb = vld2q (fb1);
+ fc = vld2q (fc1);
+ b = vld2q (b1);
+ c = vld2q (c1);
+ a = vld2q (a1);
+ ub = vld2q (ub1);
+ uc = vld2q (uc1);
+ ua = vld2q (ua1);
+}
+
+void
+foo2 (void)
+{
+ mve_pred16_t p;
+ int8x16x4_t a;
+ int8_t a1[10];
+ int16x8x4_t b;
+ int16_t b1[10];
+ int32x4x4_t c;
+ int32_t c1[10];
+ uint8x16x4_t ua;
+ uint8_t ua1[10];
+ uint16x8x4_t ub;
+ uint16_t ub1[10];
+ uint32x4x4_t uc;
+ uint32_t uc1[10];
+ float16x8x4_t fb;
+ float16_t fb1[10];
+ float32x4x4_t fc;
+ float32_t fc1[10];
+
+ fb = vld4q (fb1);
+ fc = vld4q (fc1);
+ b = vld4q (b1);
+ c = vld4q (c1);
+ a = vld4q (a1);
+ ub = vld4q (ub1);
+ uc = vld4q (uc1);
+ ua = vld4q (ua1);
+}
+
+void
+foo3 (void)
+{
+ mve_pred16_t p;
+ int16x8_t a;
+ uint16x8_t ua;
+ int8_t a1[10];
+ uint8_t ua1[10];
+ uint16x8_t offset_a;
+ int8x16_t b;
+ uint8x16_t ub;
+ uint8x16_t offset_b;
+ int32x4_t c;
+ uint32x4_t uc;
+ uint32x4_t offset_c;
+
+ a = vldrbq_gather_offset (a1, offset_a);
+ ua = vldrbq_gather_offset (ua1, offset_a);
+ b = vldrbq_gather_offset (a1, offset_b);
+ ub = vldrbq_gather_offset (ua1, offset_b);
+ c = vldrbq_gather_offset (a1, offset_c);
+ uc = vldrbq_gather_offset (ua1, offset_c);
+ a = vldrbq_gather_offset_z (a1, offset_a, p);
+ ua = vldrbq_gather_offset_z (ua1, offset_a, p);
+ b = vldrbq_gather_offset_z (a1, offset_b, p);
+ ub = vldrbq_gather_offset_z (ua1, offset_b, p);
+ c = vldrbq_gather_offset_z (a1, offset_c, p);
+ uc = vldrbq_gather_offset_z (ua1, offset_c, p);
+}
+/* { dg-final { scan-assembler-not "__ARM_undef" } } */
[-- Attachment #2: rb14533.patch --]
[-- Type: text/plain, Size: 23190 bytes --]
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index 1380f3acbfe64026bc882c308bb1c243e27ac4b3..83f10036990fc3df956fb2fa4818d1304138b485 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -37565,47 +37565,47 @@ extern void *__ARM_undef;
#define __arm_vld1q(p0) (\
_Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
- int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_s8 (__ARM_mve_coerce(p0, int8_t const *)), \
- int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_s16 (__ARM_mve_coerce(p0, int16_t const *)), \
- int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_s32 (__ARM_mve_coerce(p0, int32_t const *)), \
- int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_u8 (__ARM_mve_coerce(p0, uint8_t const *)), \
- int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_u16 (__ARM_mve_coerce(p0, uint16_t const *)), \
- int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_u32 (__ARM_mve_coerce(p0, uint32_t const *)), \
- int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld1q_f16 (__ARM_mve_coerce(p0, float16_t const *)), \
- int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld1q_f32 (__ARM_mve_coerce(p0, float32_t const *))))
+ int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_s8 (__ARM_mve_coerce1(p0, int8_t *)), \
+ int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_s16 (__ARM_mve_coerce1(p0, int16_t *)), \
+ int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_s32 (__ARM_mve_coerce1(p0, int32_t *)), \
+ int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_u8 (__ARM_mve_coerce1(p0, uint8_t *)), \
+ int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_u16 (__ARM_mve_coerce1(p0, uint16_t *)), \
+ int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_u32 (__ARM_mve_coerce1(p0, uint32_t *)), \
+ int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld1q_f16 (__ARM_mve_coerce1(p0, float16_t *)), \
+ int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld1q_f32 (__ARM_mve_coerce1(p0, float32_t *))))
#define __arm_vld1q_z(p0,p1) ( \
_Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
- int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_z_s8 (__ARM_mve_coerce(p0, int8_t const *), p1), \
- int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_z_s16 (__ARM_mve_coerce(p0, int16_t const *), p1), \
- int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_z_s32 (__ARM_mve_coerce(p0, int32_t const *), p1), \
- int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_z_u8 (__ARM_mve_coerce(p0, uint8_t const *), p1), \
- int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_z_u16 (__ARM_mve_coerce(p0, uint16_t const *), p1), \
- int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_z_u32 (__ARM_mve_coerce(p0, uint32_t const *), p1), \
- int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld1q_z_f16 (__ARM_mve_coerce(p0, float16_t const *), p1), \
- int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld1q_z_f32 (__ARM_mve_coerce(p0, float32_t const *), p1)))
+ int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_z_s8 (__ARM_mve_coerce1(p0, int8_t *), p1), \
+ int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_z_s16 (__ARM_mve_coerce1(p0, int16_t *), p1), \
+ int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_z_s32 (__ARM_mve_coerce1(p0, int32_t *), p1), \
+ int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_z_u8 (__ARM_mve_coerce1(p0, uint8_t *), p1), \
+ int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_z_u16 (__ARM_mve_coerce1(p0, uint16_t *), p1), \
+ int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_z_u32 (__ARM_mve_coerce1(p0, uint32_t *), p1), \
+ int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld1q_z_f16 (__ARM_mve_coerce1(p0, float16_t *), p1), \
+ int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld1q_z_f32 (__ARM_mve_coerce1(p0, float32_t *), p1)))
#define __arm_vld2q(p0) ( \
_Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
- int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld2q_s8 (__ARM_mve_coerce(p0, int8_t const *)), \
- int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld2q_s16 (__ARM_mve_coerce(p0, int16_t const *)), \
- int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld2q_s32 (__ARM_mve_coerce(p0, int32_t const *)), \
- int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld2q_u8 (__ARM_mve_coerce(p0, uint8_t const *)), \
- int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld2q_u16 (__ARM_mve_coerce(p0, uint16_t const *)), \
- int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld2q_u32 (__ARM_mve_coerce(p0, uint32_t const *)), \
- int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld2q_f16 (__ARM_mve_coerce(p0, float16_t const *)), \
- int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld2q_f32 (__ARM_mve_coerce(p0, float32_t const *))))
+ int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld2q_s8 (__ARM_mve_coerce1(p0, int8_t *)), \
+ int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld2q_s16 (__ARM_mve_coerce1(p0, int16_t *)), \
+ int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld2q_s32 (__ARM_mve_coerce1(p0, int32_t *)), \
+ int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld2q_u8 (__ARM_mve_coerce1(p0, uint8_t *)), \
+ int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld2q_u16 (__ARM_mve_coerce1(p0, uint16_t *)), \
+ int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld2q_u32 (__ARM_mve_coerce1(p0, uint32_t *)), \
+ int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld2q_f16 (__ARM_mve_coerce1(p0, float16_t *)), \
+ int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld2q_f32 (__ARM_mve_coerce1(p0, float32_t *))))
#define __arm_vld4q(p0) ( \
_Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
- int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld4q_s8 (__ARM_mve_coerce(p0, int8_t const *)), \
- int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld4q_s16 (__ARM_mve_coerce(p0, int16_t const *)), \
- int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld4q_s32 (__ARM_mve_coerce(p0, int32_t const *)), \
- int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld4q_u8 (__ARM_mve_coerce(p0, uint8_t const *)), \
- int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld4q_u16 (__ARM_mve_coerce(p0, uint16_t const *)), \
- int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld4q_u32 (__ARM_mve_coerce(p0, uint32_t const *)), \
- int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld4q_f16 (__ARM_mve_coerce(p0, float16_t const *)), \
- int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld4q_f32 (__ARM_mve_coerce(p0, float32_t const *))))
+ int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld4q_s8 (__ARM_mve_coerce1(p0, int8_t *)), \
+ int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld4q_s16 (__ARM_mve_coerce1(p0, int16_t *)), \
+ int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld4q_s32 (__ARM_mve_coerce1(p0, int32_t *)), \
+ int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld4q_u8 (__ARM_mve_coerce1(p0, uint8_t *)), \
+ int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld4q_u16 (__ARM_mve_coerce1(p0, uint16_t *)), \
+ int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld4q_u32 (__ARM_mve_coerce1(p0, uint32_t *)), \
+ int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld4q_f16 (__ARM_mve_coerce1(p0, float16_t *)), \
+ int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld4q_f32 (__ARM_mve_coerce1(p0, float32_t *))))
#define __arm_vldrhq_gather_offset(p0,p1) ({ __typeof(p1) __p1 = (p1); \
_Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
@@ -39631,25 +39631,26 @@ extern void *__ARM_undef;
#define __arm_vldrbq_gather_offset(p0,p1) ({ __typeof(p1) __p1 = (p1); \
_Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
- int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_s8 (__ARM_mve_coerce(p0, int8_t const *), __ARM_mve_coerce(__p1, uint8x16_t)), \
- int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_s16 (__ARM_mve_coerce(p0, int8_t const *), __ARM_mve_coerce(__p1, uint16x8_t)), \
- int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_s32 (__ARM_mve_coerce(p0, int8_t const *), __ARM_mve_coerce(__p1, uint32x4_t)), \
- int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_u8 (__ARM_mve_coerce(p0, uint8_t const *), __ARM_mve_coerce(__p1, uint8x16_t)), \
- int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_u16 (__ARM_mve_coerce(p0, uint8_t const *), __ARM_mve_coerce(__p1, uint16x8_t)), \
- int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_u32 (__ARM_mve_coerce(p0, uint8_t const *), __ARM_mve_coerce(__p1, uint32x4_t)));})
+ int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_s8 (__ARM_mve_coerce1(p0, int8_t *), __ARM_mve_coerce(__p1, uint8x16_t)), \
+ int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_s16 (__ARM_mve_coerce1(p0, int8_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \
+ int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_s32 (__ARM_mve_coerce1(p0, int8_t *), __ARM_mve_coerce(__p1, uint32x4_t)), \
+ int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_u8 (__ARM_mve_coerce1(p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t)), \
+ int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_u16 (__ARM_mve_coerce1(p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \
+ int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_u32 (__ARM_mve_coerce1(p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t)));})
#define __arm_vstrwq_scatter_base_p(p0,p1,p2,p3) ({ __typeof(p2) __p2 = (p2); \
_Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_p_s32 (p0, p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
int (*)[__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_base_p_u32 (p0, p1, __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-#define __arm_vld1q(p0) (_Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
- int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_s8 (__ARM_mve_coerce(p0, int8_t const *)), \
- int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_s16 (__ARM_mve_coerce(p0, int16_t const *)), \
- int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_s32 (__ARM_mve_coerce(p0, int32_t const *)), \
- int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_u8 (__ARM_mve_coerce(p0, uint8_t const *)), \
- int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_u16 (__ARM_mve_coerce(p0, uint16_t const *)), \
- int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_u32 (__ARM_mve_coerce(p0, uint32_t const *))))
+#define __arm_vld1q(p0) (\
+ _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
+ int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_s8 (__ARM_mve_coerce1(p0, int8_t *)), \
+ int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_s16 (__ARM_mve_coerce1(p0, int16_t *)), \
+ int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_s32 (__ARM_mve_coerce1(p0, int32_t *)), \
+ int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_u8 (__ARM_mve_coerce1(p0, uint8_t *)), \
+ int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_u16 (__ARM_mve_coerce1(p0, uint16_t *)), \
+ int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_u32 (__ARM_mve_coerce1(p0, uint32_t *))))
#define __arm_vldrhq_gather_offset(p0,p1) ({ __typeof(p1) __p1 = (p1); \
_Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
@@ -40146,29 +40147,29 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_uint32x4_t]: __arm_vbrsrq_x_n_u32 (__ARM_mve_coerce(__p1, uint32x4_t), p2, p3));})
#define __arm_vld1q_z(p0,p1) ( _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
- int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_z_s8 (__ARM_mve_coerce(p0, int8_t const *), p1), \
- int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_z_s16 (__ARM_mve_coerce(p0, int16_t const *), p1), \
- int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_z_s32 (__ARM_mve_coerce(p0, int32_t const *), p1), \
- int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_z_u8 (__ARM_mve_coerce(p0, uint8_t const *), p1), \
- int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_z_u16 (__ARM_mve_coerce(p0, uint16_t const *), p1), \
- int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_z_u32 (__ARM_mve_coerce(p0, uint32_t const *), p1)))
+ int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_z_s8 (__ARM_mve_coerce1(p0, int8_t *), p1), \
+ int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_z_s16 (__ARM_mve_coerce1(p0, int16_t *), p1), \
+ int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_z_s32 (__ARM_mve_coerce1(p0, int32_t *), p1), \
+ int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_z_u8 (__ARM_mve_coerce1(p0, uint8_t *), p1), \
+ int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_z_u16 (__ARM_mve_coerce1(p0, uint16_t *), p1), \
+ int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_z_u32 (__ARM_mve_coerce1(p0, uint32_t *), p1)))
#define __arm_vld2q(p0) ( _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
- int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld2q_s8 (__ARM_mve_coerce(p0, int8_t const *)), \
- int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld2q_s16 (__ARM_mve_coerce(p0, int16_t const *)), \
- int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld2q_s32 (__ARM_mve_coerce(p0, int32_t const *)), \
- int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld2q_u8 (__ARM_mve_coerce(p0, uint8_t const *)), \
- int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld2q_u16 (__ARM_mve_coerce(p0, uint16_t const *)), \
- int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld2q_u32 (__ARM_mve_coerce(p0, uint32_t const *))))
+ int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld2q_s8 (__ARM_mve_coerce1(p0, int8_t *)), \
+ int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld2q_s16 (__ARM_mve_coerce1(p0, int16_t *)), \
+ int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld2q_s32 (__ARM_mve_coerce1(p0, int32_t *)), \
+ int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld2q_u8 (__ARM_mve_coerce1(p0, uint8_t *)), \
+ int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld2q_u16 (__ARM_mve_coerce1(p0, uint16_t *)), \
+ int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld2q_u32 (__ARM_mve_coerce1(p0, uint32_t *))))
#define __arm_vld4q(p0) ( _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
- int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld4q_s8 (__ARM_mve_coerce(p0, int8_t const *)), \
- int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld4q_s16 (__ARM_mve_coerce(p0, int16_t const *)), \
- int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld4q_s32 (__ARM_mve_coerce(p0, int32_t const *)), \
- int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld4q_u8 (__ARM_mve_coerce(p0, uint8_t const *)), \
- int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld4q_u16 (__ARM_mve_coerce(p0, uint16_t const *)), \
- int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld4q_u32 (__ARM_mve_coerce(p0, uint32_t const *))))
+ int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld4q_s8 (__ARM_mve_coerce1(p0, int8_t *)), \
+ int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld4q_s16 (__ARM_mve_coerce1(p0, int16_t *)), \
+ int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld4q_s32 (__ARM_mve_coerce1(p0, int32_t *)), \
+ int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld4q_u8 (__ARM_mve_coerce1(p0, uint8_t *)), \
+ int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld4q_u16 (__ARM_mve_coerce1(p0, uint16_t *)), \
+ int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld4q_u32 (__ARM_mve_coerce1(p0, uint32_t *))))
#define __arm_vgetq_lane(p0,p1) ({ __typeof(p0) __p0 = (p0); \
_Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
@@ -40515,12 +40516,12 @@ extern void *__ARM_undef;
#define __arm_vldrbq_gather_offset_z(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
_Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
- int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_z_s8 (__ARM_mve_coerce(p0, int8_t const *), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
- int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_z_s16 (__ARM_mve_coerce(p0, int8_t const *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
- int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_z_s32 (__ARM_mve_coerce(p0, int8_t const *), __ARM_mve_coerce(__p1, uint32x4_t), p2), \
- int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_z_u8 (__ARM_mve_coerce(p0, uint8_t const *), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
- int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_z_u16 (__ARM_mve_coerce(p0, uint8_t const *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
- int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_z_u32 (__ARM_mve_coerce(p0, uint8_t const *), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
+ int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_z_s8 (__ARM_mve_coerce1(p0, int8_t *), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
+ int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_z_s16 (__ARM_mve_coerce1(p0, int8_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
+ int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_z_s32 (__ARM_mve_coerce1(p0, int8_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2), \
+ int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_z_u8 (__ARM_mve_coerce1(p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
+ int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_z_u16 (__ARM_mve_coerce1(p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
+ int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_z_u32 (__ARM_mve_coerce1(p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
#define __arm_vqrdmlahq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
@@ -41201,12 +41202,12 @@ extern void *__ARM_undef;
#define __arm_vldrbq_gather_offset(p0,p1) ({ __typeof(p1) __p1 = (p1); \
_Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
- int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_s8 (__ARM_mve_coerce(p0, int8_t const *), __ARM_mve_coerce(__p1, uint8x16_t)), \
- int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_s16 (__ARM_mve_coerce(p0, int8_t const *), __ARM_mve_coerce(__p1, uint16x8_t)), \
- int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_s32 (__ARM_mve_coerce(p0, int8_t const *), __ARM_mve_coerce(__p1, uint32x4_t)), \
- int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_u8 (__ARM_mve_coerce(p0, uint8_t const *), __ARM_mve_coerce(__p1, uint8x16_t)), \
- int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_u16 (__ARM_mve_coerce(p0, uint8_t const *), __ARM_mve_coerce(__p1, uint16x8_t)), \
- int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_u32 (__ARM_mve_coerce(p0, uint8_t const *), __ARM_mve_coerce(__p1, uint32x4_t)));})
+ int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_s8(__ARM_mve_coerce1(p0, int8_t *), __ARM_mve_coerce(__p1, uint8x16_t)), \
+ int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_s16(__ARM_mve_coerce1(p0, int8_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \
+ int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_s32(__ARM_mve_coerce1(p0, int8_t *), __ARM_mve_coerce(__p1, uint32x4_t)), \
+ int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_u8(__ARM_mve_coerce1(p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t)), \
+ int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_u16(__ARM_mve_coerce1(p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \
+ int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_u32(__ARM_mve_coerce1(p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t)));})
#define __arm_vidupq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/pr101016.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/pr101016.c
new file mode 100644
index 0000000000000000000000000000000000000000..b12786d04f558474ed9b3df9998663c7f9bc4d1a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/pr101016.c
@@ -0,0 +1,136 @@
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+
+#include "arm_mve.h"
+
+void
+foo (void)
+{
+ mve_pred16_t p;
+ int8x16_t a;
+ int8_t a1[10];
+ int16x8_t b;
+ int16_t b1[10];
+ int32x4_t c;
+ int32_t c1[10];
+ uint8x16_t ua;
+ uint8_t ua1[10];
+ uint16x8_t ub;
+ uint16_t ub1[10];
+ uint32x4_t uc;
+ uint32_t uc1[10];
+ float16x8_t fb;
+ float16_t fb1[10];
+ float32x4_t fc;
+ float32_t fc1[10];
+
+ fb = vld1q (fb1);
+ fc = vld1q (fc1);
+ b = vld1q (b1);
+ c = vld1q (c1);
+ a = vld1q (a1);
+ ub = vld1q (ub1);
+ uc = vld1q (uc1);
+ ua = vld1q (ua1);
+ fb = vld1q_z (fb1, p);
+ fc = vld1q_z (fc1, p);
+ b = vld1q_z (b1, p);
+ c = vld1q_z (c1, p);
+ a = vld1q_z (a1, p);
+ ub = vld1q_z (ub1, p);
+ uc = vld1q_z (uc1, p);
+ ua = vld1q_z (ua1, p);
+}
+
+void
+foo1 (void)
+{
+ mve_pred16_t p;
+ int8x16x2_t a;
+ int8_t a1[10];
+ int16x8x2_t b;
+ int16_t b1[10];
+ int32x4x2_t c;
+ int32_t c1[10];
+ uint8x16x2_t ua;
+ uint8_t ua1[10];
+ uint16x8x2_t ub;
+ uint16_t ub1[10];
+ uint32x4x2_t uc;
+ uint32_t uc1[10];
+ float16x8x2_t fb;
+ float16_t fb1[10];
+ float32x4x2_t fc;
+ float32_t fc1[10];
+
+ fb = vld2q (fb1);
+ fc = vld2q (fc1);
+ b = vld2q (b1);
+ c = vld2q (c1);
+ a = vld2q (a1);
+ ub = vld2q (ub1);
+ uc = vld2q (uc1);
+ ua = vld2q (ua1);
+}
+
+void
+foo2 (void)
+{
+ mve_pred16_t p;
+ int8x16x4_t a;
+ int8_t a1[10];
+ int16x8x4_t b;
+ int16_t b1[10];
+ int32x4x4_t c;
+ int32_t c1[10];
+ uint8x16x4_t ua;
+ uint8_t ua1[10];
+ uint16x8x4_t ub;
+ uint16_t ub1[10];
+ uint32x4x4_t uc;
+ uint32_t uc1[10];
+ float16x8x4_t fb;
+ float16_t fb1[10];
+ float32x4x4_t fc;
+ float32_t fc1[10];
+
+ fb = vld4q (fb1);
+ fc = vld4q (fc1);
+ b = vld4q (b1);
+ c = vld4q (c1);
+ a = vld4q (a1);
+ ub = vld4q (ub1);
+ uc = vld4q (uc1);
+ ua = vld4q (ua1);
+}
+
+void
+foo3 (void)
+{
+ mve_pred16_t p;
+ int16x8_t a;
+ uint16x8_t ua;
+ int8_t a1[10];
+ uint8_t ua1[10];
+ uint16x8_t offset_a;
+ int8x16_t b;
+ uint8x16_t ub;
+ uint8x16_t offset_b;
+ int32x4_t c;
+ uint32x4_t uc;
+ uint32x4_t offset_c;
+
+ a = vldrbq_gather_offset (a1, offset_a);
+ ua = vldrbq_gather_offset (ua1, offset_a);
+ b = vldrbq_gather_offset (a1, offset_b);
+ ub = vldrbq_gather_offset (ua1, offset_b);
+ c = vldrbq_gather_offset (a1, offset_c);
+ uc = vldrbq_gather_offset (ua1, offset_c);
+ a = vldrbq_gather_offset_z (a1, offset_a, p);
+ ua = vldrbq_gather_offset_z (ua1, offset_a, p);
+ b = vldrbq_gather_offset_z (a1, offset_b, p);
+ ub = vldrbq_gather_offset_z (ua1, offset_b, p);
+ c = vldrbq_gather_offset_z (a1, offset_c, p);
+ uc = vldrbq_gather_offset_z (ua1, offset_c, p);
+}
+/* { dg-final { scan-assembler-not "__ARM_undef" } } */
^ permalink raw reply [flat|nested] 2+ messages in thread
* RE: [GCC][PATCH] arm: Fix polymorphic variants failing with undefined reference to `__ARM_undef` error.
2021-06-10 16:14 [GCC][PATCH] arm: Fix polymorphic variants failing with undefined reference to `__ARM_undef` error Srinath Parvathaneni
@ 2021-06-11 16:38 ` Kyrylo Tkachov
0 siblings, 0 replies; 2+ messages in thread
From: Kyrylo Tkachov @ 2021-06-11 16:38 UTC (permalink / raw)
To: Srinath Parvathaneni, gcc-patches; +Cc: Richard Earnshaw
> -----Original Message-----
> From: Srinath Parvathaneni <Srinath.Parvathaneni@arm.com>
> Sent: 10 June 2021 17:14
> To: gcc-patches@gcc.gnu.org
> Cc: Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>; Richard Earnshaw
> <Richard.Earnshaw@arm.com>
> Subject: [GCC][PATCH] arm: Fix polymorphic variants failing with undefined
> reference to `__ARM_undef` error.
>
> Hi,
>
> This patch fixes the issue mentioned in PR101016, which is mve polymorphic
> variants
> failing at linking with undefined reference to "__ARM_undef" error.
>
> Regression tested on arm-none-eabi and found no regressions.
>
> Ok for master?
Ok.
Thanks,
Kyrill
>
> Regards,
> Srinath.
>
> gcc/ChangeLog:
>
> 2021-06-10 Srinath Parvathaneni <srinath.parvathaneni@arm.com>
>
> PR target/101016
> * config/arm/arm_mve.h (__arm_vld1q): Change
> __ARM_mve_coerce(p0,
> int8_t const *) to __ARM_mve_coerce1(p0, int8_t *) in the argument
> for
> the polymorphic variants matching code.
> (__arm_vld1q_z): Likewise.
> (__arm_vld2q): Likewise.
> (__arm_vld4q): Likewise.
> (__arm_vldrbq_gather_offset): Likewise.
> (__arm_vldrbq_gather_offset_z): Likewise.
>
> gcc/testsuite/ChangeLog:
>
> 2021-06-10 Srinath Parvathaneni <srinath.parvathaneni@arm.com>
>
> PR target/101016
> * gcc.target/arm/mve/intrinsics/pr101016.c: New test.
>
>
>
> ############### Attachment also inlined for ease of reply
> ###############
>
>
> diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
> index
> 1380f3acbfe64026bc882c308bb1c243e27ac4b3..83f10036990fc3df956fb2fa
> 4818d1304138b485 100644
> --- a/gcc/config/arm/arm_mve.h
> +++ b/gcc/config/arm/arm_mve.h
> @@ -37565,47 +37565,47 @@ extern void *__ARM_undef;
>
> #define __arm_vld1q(p0) (\
> _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
> - int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_s8
> (__ARM_mve_coerce(p0, int8_t const *)), \
> - int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_s16
> (__ARM_mve_coerce(p0, int16_t const *)), \
> - int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_s32
> (__ARM_mve_coerce(p0, int32_t const *)), \
> - int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_u8
> (__ARM_mve_coerce(p0, uint8_t const *)), \
> - int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_u16
> (__ARM_mve_coerce(p0, uint16_t const *)), \
> - int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_u32
> (__ARM_mve_coerce(p0, uint32_t const *)), \
> - int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld1q_f16
> (__ARM_mve_coerce(p0, float16_t const *)), \
> - int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld1q_f32
> (__ARM_mve_coerce(p0, float32_t const *))))
> + int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_s8
> (__ARM_mve_coerce1(p0, int8_t *)), \
> + int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_s16
> (__ARM_mve_coerce1(p0, int16_t *)), \
> + int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_s32
> (__ARM_mve_coerce1(p0, int32_t *)), \
> + int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_u8
> (__ARM_mve_coerce1(p0, uint8_t *)), \
> + int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_u16
> (__ARM_mve_coerce1(p0, uint16_t *)), \
> + int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_u32
> (__ARM_mve_coerce1(p0, uint32_t *)), \
> + int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld1q_f16
> (__ARM_mve_coerce1(p0, float16_t *)), \
> + int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld1q_f32
> (__ARM_mve_coerce1(p0, float32_t *))))
>
> #define __arm_vld1q_z(p0,p1) ( \
> _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
> - int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_z_s8
> (__ARM_mve_coerce(p0, int8_t const *), p1), \
> - int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_z_s16
> (__ARM_mve_coerce(p0, int16_t const *), p1), \
> - int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_z_s32
> (__ARM_mve_coerce(p0, int32_t const *), p1), \
> - int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_z_u8
> (__ARM_mve_coerce(p0, uint8_t const *), p1), \
> - int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_z_u16
> (__ARM_mve_coerce(p0, uint16_t const *), p1), \
> - int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_z_u32
> (__ARM_mve_coerce(p0, uint32_t const *), p1), \
> - int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld1q_z_f16
> (__ARM_mve_coerce(p0, float16_t const *), p1), \
> - int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld1q_z_f32
> (__ARM_mve_coerce(p0, float32_t const *), p1)))
> + int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_z_s8
> (__ARM_mve_coerce1(p0, int8_t *), p1), \
> + int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_z_s16
> (__ARM_mve_coerce1(p0, int16_t *), p1), \
> + int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_z_s32
> (__ARM_mve_coerce1(p0, int32_t *), p1), \
> + int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_z_u8
> (__ARM_mve_coerce1(p0, uint8_t *), p1), \
> + int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_z_u16
> (__ARM_mve_coerce1(p0, uint16_t *), p1), \
> + int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_z_u32
> (__ARM_mve_coerce1(p0, uint32_t *), p1), \
> + int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld1q_z_f16
> (__ARM_mve_coerce1(p0, float16_t *), p1), \
> + int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld1q_z_f32
> (__ARM_mve_coerce1(p0, float32_t *), p1)))
>
> #define __arm_vld2q(p0) ( \
> _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
> - int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld2q_s8
> (__ARM_mve_coerce(p0, int8_t const *)), \
> - int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld2q_s16
> (__ARM_mve_coerce(p0, int16_t const *)), \
> - int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld2q_s32
> (__ARM_mve_coerce(p0, int32_t const *)), \
> - int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld2q_u8
> (__ARM_mve_coerce(p0, uint8_t const *)), \
> - int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld2q_u16
> (__ARM_mve_coerce(p0, uint16_t const *)), \
> - int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld2q_u32
> (__ARM_mve_coerce(p0, uint32_t const *)), \
> - int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld2q_f16
> (__ARM_mve_coerce(p0, float16_t const *)), \
> - int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld2q_f32
> (__ARM_mve_coerce(p0, float32_t const *))))
> + int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld2q_s8
> (__ARM_mve_coerce1(p0, int8_t *)), \
> + int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld2q_s16
> (__ARM_mve_coerce1(p0, int16_t *)), \
> + int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld2q_s32
> (__ARM_mve_coerce1(p0, int32_t *)), \
> + int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld2q_u8
> (__ARM_mve_coerce1(p0, uint8_t *)), \
> + int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld2q_u16
> (__ARM_mve_coerce1(p0, uint16_t *)), \
> + int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld2q_u32
> (__ARM_mve_coerce1(p0, uint32_t *)), \
> + int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld2q_f16
> (__ARM_mve_coerce1(p0, float16_t *)), \
> + int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld2q_f32
> (__ARM_mve_coerce1(p0, float32_t *))))
>
> #define __arm_vld4q(p0) ( \
> _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
> - int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld4q_s8
> (__ARM_mve_coerce(p0, int8_t const *)), \
> - int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld4q_s16
> (__ARM_mve_coerce(p0, int16_t const *)), \
> - int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld4q_s32
> (__ARM_mve_coerce(p0, int32_t const *)), \
> - int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld4q_u8
> (__ARM_mve_coerce(p0, uint8_t const *)), \
> - int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld4q_u16
> (__ARM_mve_coerce(p0, uint16_t const *)), \
> - int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld4q_u32
> (__ARM_mve_coerce(p0, uint32_t const *)), \
> - int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld4q_f16
> (__ARM_mve_coerce(p0, float16_t const *)), \
> - int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld4q_f32
> (__ARM_mve_coerce(p0, float32_t const *))))
> + int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld4q_s8
> (__ARM_mve_coerce1(p0, int8_t *)), \
> + int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld4q_s16
> (__ARM_mve_coerce1(p0, int16_t *)), \
> + int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld4q_s32
> (__ARM_mve_coerce1(p0, int32_t *)), \
> + int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld4q_u8
> (__ARM_mve_coerce1(p0, uint8_t *)), \
> + int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld4q_u16
> (__ARM_mve_coerce1(p0, uint16_t *)), \
> + int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld4q_u32
> (__ARM_mve_coerce1(p0, uint32_t *)), \
> + int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld4q_f16
> (__ARM_mve_coerce1(p0, float16_t *)), \
> + int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld4q_f32
> (__ARM_mve_coerce1(p0, float32_t *))))
>
> #define __arm_vldrhq_gather_offset(p0,p1) ({ __typeof(p1) __p1 = (p1); \
> _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
> @@ -39631,25 +39631,26 @@ extern void *__ARM_undef;
>
> #define __arm_vldrbq_gather_offset(p0,p1) ({ __typeof(p1) __p1 = (p1); \
> _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
> - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t]:
> __arm_vldrbq_gather_offset_s8 (__ARM_mve_coerce(p0, int8_t const *),
> __ARM_mve_coerce(__p1, uint8x16_t)), \
> - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t]:
> __arm_vldrbq_gather_offset_s16 (__ARM_mve_coerce(p0, int8_t const *),
> __ARM_mve_coerce(__p1, uint16x8_t)), \
> - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t]:
> __arm_vldrbq_gather_offset_s32 (__ARM_mve_coerce(p0, int8_t const *),
> __ARM_mve_coerce(__p1, uint32x4_t)), \
> - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]:
> __arm_vldrbq_gather_offset_u8 (__ARM_mve_coerce(p0, uint8_t const *),
> __ARM_mve_coerce(__p1, uint8x16_t)), \
> - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]:
> __arm_vldrbq_gather_offset_u16 (__ARM_mve_coerce(p0, uint8_t const *),
> __ARM_mve_coerce(__p1, uint16x8_t)), \
> - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]:
> __arm_vldrbq_gather_offset_u32 (__ARM_mve_coerce(p0, uint8_t const *),
> __ARM_mve_coerce(__p1, uint32x4_t)));})
> + int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t]:
> __arm_vldrbq_gather_offset_s8 (__ARM_mve_coerce1(p0, int8_t *),
> __ARM_mve_coerce(__p1, uint8x16_t)), \
> + int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t]:
> __arm_vldrbq_gather_offset_s16 (__ARM_mve_coerce1(p0, int8_t *),
> __ARM_mve_coerce(__p1, uint16x8_t)), \
> + int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t]:
> __arm_vldrbq_gather_offset_s32 (__ARM_mve_coerce1(p0, int8_t *),
> __ARM_mve_coerce(__p1, uint32x4_t)), \
> + int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]:
> __arm_vldrbq_gather_offset_u8 (__ARM_mve_coerce1(p0, uint8_t *),
> __ARM_mve_coerce(__p1, uint8x16_t)), \
> + int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]:
> __arm_vldrbq_gather_offset_u16 (__ARM_mve_coerce1(p0, uint8_t *),
> __ARM_mve_coerce(__p1, uint16x8_t)), \
> + int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]:
> __arm_vldrbq_gather_offset_u32 (__ARM_mve_coerce1(p0, uint8_t *),
> __ARM_mve_coerce(__p1, uint32x4_t)));})
>
> #define __arm_vstrwq_scatter_base_p(p0,p1,p2,p3) ({ __typeof(p2) __p2 =
> (p2); \
> _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
> int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_p_s32
> (p0, p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
> int (*)[__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_base_p_u32
> (p0, p1, __ARM_mve_coerce(__p2, uint32x4_t), p3));})
>
> -#define __arm_vld1q(p0) (_Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
> - int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_s8
> (__ARM_mve_coerce(p0, int8_t const *)), \
> - int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_s16
> (__ARM_mve_coerce(p0, int16_t const *)), \
> - int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_s32
> (__ARM_mve_coerce(p0, int32_t const *)), \
> - int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_u8
> (__ARM_mve_coerce(p0, uint8_t const *)), \
> - int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_u16
> (__ARM_mve_coerce(p0, uint16_t const *)), \
> - int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_u32
> (__ARM_mve_coerce(p0, uint32_t const *))))
> +#define __arm_vld1q(p0) (\
> + _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
> + int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_s8
> (__ARM_mve_coerce1(p0, int8_t *)), \
> + int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_s16
> (__ARM_mve_coerce1(p0, int16_t *)), \
> + int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_s32
> (__ARM_mve_coerce1(p0, int32_t *)), \
> + int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_u8
> (__ARM_mve_coerce1(p0, uint8_t *)), \
> + int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_u16
> (__ARM_mve_coerce1(p0, uint16_t *)), \
> + int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_u32
> (__ARM_mve_coerce1(p0, uint32_t *))))
>
> #define __arm_vldrhq_gather_offset(p0,p1) ({ __typeof(p1) __p1 = (p1); \
> _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
> @@ -40146,29 +40147,29 @@ extern void *__ARM_undef;
> int (*)[__ARM_mve_type_uint32x4_t]: __arm_vbrsrq_x_n_u32
> (__ARM_mve_coerce(__p1, uint32x4_t), p2, p3));})
>
> #define __arm_vld1q_z(p0,p1) ( _Generic( (int (*)[__ARM_mve_typeid(p0)])0,
> \
> - int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_z_s8
> (__ARM_mve_coerce(p0, int8_t const *), p1), \
> - int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_z_s16
> (__ARM_mve_coerce(p0, int16_t const *), p1), \
> - int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_z_s32
> (__ARM_mve_coerce(p0, int32_t const *), p1), \
> - int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_z_u8
> (__ARM_mve_coerce(p0, uint8_t const *), p1), \
> - int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_z_u16
> (__ARM_mve_coerce(p0, uint16_t const *), p1), \
> - int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_z_u32
> (__ARM_mve_coerce(p0, uint32_t const *), p1)))
> + int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_z_s8
> (__ARM_mve_coerce1(p0, int8_t *), p1), \
> + int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_z_s16
> (__ARM_mve_coerce1(p0, int16_t *), p1), \
> + int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_z_s32
> (__ARM_mve_coerce1(p0, int32_t *), p1), \
> + int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_z_u8
> (__ARM_mve_coerce1(p0, uint8_t *), p1), \
> + int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_z_u16
> (__ARM_mve_coerce1(p0, uint16_t *), p1), \
> + int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_z_u32
> (__ARM_mve_coerce1(p0, uint32_t *), p1)))
>
> #define __arm_vld2q(p0) ( _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
> - int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld2q_s8
> (__ARM_mve_coerce(p0, int8_t const *)), \
> - int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld2q_s16
> (__ARM_mve_coerce(p0, int16_t const *)), \
> - int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld2q_s32
> (__ARM_mve_coerce(p0, int32_t const *)), \
> - int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld2q_u8
> (__ARM_mve_coerce(p0, uint8_t const *)), \
> - int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld2q_u16
> (__ARM_mve_coerce(p0, uint16_t const *)), \
> - int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld2q_u32
> (__ARM_mve_coerce(p0, uint32_t const *))))
> + int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld2q_s8
> (__ARM_mve_coerce1(p0, int8_t *)), \
> + int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld2q_s16
> (__ARM_mve_coerce1(p0, int16_t *)), \
> + int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld2q_s32
> (__ARM_mve_coerce1(p0, int32_t *)), \
> + int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld2q_u8
> (__ARM_mve_coerce1(p0, uint8_t *)), \
> + int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld2q_u16
> (__ARM_mve_coerce1(p0, uint16_t *)), \
> + int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld2q_u32
> (__ARM_mve_coerce1(p0, uint32_t *))))
>
>
> #define __arm_vld4q(p0) ( _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
> - int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld4q_s8
> (__ARM_mve_coerce(p0, int8_t const *)), \
> - int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld4q_s16
> (__ARM_mve_coerce(p0, int16_t const *)), \
> - int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld4q_s32
> (__ARM_mve_coerce(p0, int32_t const *)), \
> - int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld4q_u8
> (__ARM_mve_coerce(p0, uint8_t const *)), \
> - int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld4q_u16
> (__ARM_mve_coerce(p0, uint16_t const *)), \
> - int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld4q_u32
> (__ARM_mve_coerce(p0, uint32_t const *))))
> + int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld4q_s8
> (__ARM_mve_coerce1(p0, int8_t *)), \
> + int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld4q_s16
> (__ARM_mve_coerce1(p0, int16_t *)), \
> + int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld4q_s32
> (__ARM_mve_coerce1(p0, int32_t *)), \
> + int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld4q_u8
> (__ARM_mve_coerce1(p0, uint8_t *)), \
> + int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld4q_u16
> (__ARM_mve_coerce1(p0, uint16_t *)), \
> + int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld4q_u32
> (__ARM_mve_coerce1(p0, uint32_t *))))
>
> #define __arm_vgetq_lane(p0,p1) ({ __typeof(p0) __p0 = (p0); \
> _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
> @@ -40515,12 +40516,12 @@ extern void *__ARM_undef;
>
> #define __arm_vldrbq_gather_offset_z(p0,p1,p2) ({ __typeof(p1) __p1 = (p1);
> \
> _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
> - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t]:
> __arm_vldrbq_gather_offset_z_s8 (__ARM_mve_coerce(p0, int8_t const *),
> __ARM_mve_coerce(__p1, uint8x16_t), p2), \
> - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t]:
> __arm_vldrbq_gather_offset_z_s16 (__ARM_mve_coerce(p0, int8_t const *),
> __ARM_mve_coerce(__p1, uint16x8_t), p2), \
> - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t]:
> __arm_vldrbq_gather_offset_z_s32 (__ARM_mve_coerce(p0, int8_t const *),
> __ARM_mve_coerce(__p1, uint32x4_t), p2), \
> - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]:
> __arm_vldrbq_gather_offset_z_u8 (__ARM_mve_coerce(p0, uint8_t const *),
> __ARM_mve_coerce(__p1, uint8x16_t), p2), \
> - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]:
> __arm_vldrbq_gather_offset_z_u16 (__ARM_mve_coerce(p0, uint8_t const
> *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
> - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]:
> __arm_vldrbq_gather_offset_z_u32 (__ARM_mve_coerce(p0, uint8_t const
> *), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
> + int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t]:
> __arm_vldrbq_gather_offset_z_s8 (__ARM_mve_coerce1(p0, int8_t *),
> __ARM_mve_coerce(__p1, uint8x16_t), p2), \
> + int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t]:
> __arm_vldrbq_gather_offset_z_s16 (__ARM_mve_coerce1(p0, int8_t *),
> __ARM_mve_coerce(__p1, uint16x8_t), p2), \
> + int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t]:
> __arm_vldrbq_gather_offset_z_s32 (__ARM_mve_coerce1(p0, int8_t *),
> __ARM_mve_coerce(__p1, uint32x4_t), p2), \
> + int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]:
> __arm_vldrbq_gather_offset_z_u8 (__ARM_mve_coerce1(p0, uint8_t *),
> __ARM_mve_coerce(__p1, uint8x16_t), p2), \
> + int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]:
> __arm_vldrbq_gather_offset_z_u16 (__ARM_mve_coerce1(p0, uint8_t *),
> __ARM_mve_coerce(__p1, uint16x8_t), p2), \
> + int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]:
> __arm_vldrbq_gather_offset_z_u32 (__ARM_mve_coerce1(p0, uint8_t *),
> __ARM_mve_coerce(__p1, uint32x4_t), p2));})
>
> #define __arm_vqrdmlahq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
> __typeof(p1) __p1 = (p1); \
> @@ -41201,12 +41202,12 @@ extern void *__ARM_undef;
>
> #define __arm_vldrbq_gather_offset(p0,p1) ({ __typeof(p1) __p1 = (p1); \
> _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
> - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t]:
> __arm_vldrbq_gather_offset_s8 (__ARM_mve_coerce(p0, int8_t const *),
> __ARM_mve_coerce(__p1, uint8x16_t)), \
> - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t]:
> __arm_vldrbq_gather_offset_s16 (__ARM_mve_coerce(p0, int8_t const *),
> __ARM_mve_coerce(__p1, uint16x8_t)), \
> - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t]:
> __arm_vldrbq_gather_offset_s32 (__ARM_mve_coerce(p0, int8_t const *),
> __ARM_mve_coerce(__p1, uint32x4_t)), \
> - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]:
> __arm_vldrbq_gather_offset_u8 (__ARM_mve_coerce(p0, uint8_t const *),
> __ARM_mve_coerce(__p1, uint8x16_t)), \
> - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]:
> __arm_vldrbq_gather_offset_u16 (__ARM_mve_coerce(p0, uint8_t const *),
> __ARM_mve_coerce(__p1, uint16x8_t)), \
> - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]:
> __arm_vldrbq_gather_offset_u32 (__ARM_mve_coerce(p0, uint8_t const *),
> __ARM_mve_coerce(__p1, uint32x4_t)));})
> + int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t]:
> __arm_vldrbq_gather_offset_s8(__ARM_mve_coerce1(p0, int8_t *),
> __ARM_mve_coerce(__p1, uint8x16_t)), \
> + int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t]:
> __arm_vldrbq_gather_offset_s16(__ARM_mve_coerce1(p0, int8_t *),
> __ARM_mve_coerce(__p1, uint16x8_t)), \
> + int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t]:
> __arm_vldrbq_gather_offset_s32(__ARM_mve_coerce1(p0, int8_t *),
> __ARM_mve_coerce(__p1, uint32x4_t)), \
> + int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]:
> __arm_vldrbq_gather_offset_u8(__ARM_mve_coerce1(p0, uint8_t *),
> __ARM_mve_coerce(__p1, uint8x16_t)), \
> + int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]:
> __arm_vldrbq_gather_offset_u16(__ARM_mve_coerce1(p0, uint8_t *),
> __ARM_mve_coerce(__p1, uint16x8_t)), \
> + int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]:
> __arm_vldrbq_gather_offset_u32(__ARM_mve_coerce1(p0, uint8_t *),
> __ARM_mve_coerce(__p1, uint32x4_t)));})
>
> #define __arm_vidupq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
> __typeof(p1) __p1 = (p1); \
> diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/pr101016.c
> b/gcc/testsuite/gcc.target/arm/mve/intrinsics/pr101016.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..b12786d04f558474ed9b3
> df9998663c7f9bc4d1a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/pr101016.c
> @@ -0,0 +1,136 @@
> +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
> +/* { dg-add-options arm_v8_1m_mve_fp } */
> +
> +#include "arm_mve.h"
> +
> +void
> +foo (void)
> +{
> + mve_pred16_t p;
> + int8x16_t a;
> + int8_t a1[10];
> + int16x8_t b;
> + int16_t b1[10];
> + int32x4_t c;
> + int32_t c1[10];
> + uint8x16_t ua;
> + uint8_t ua1[10];
> + uint16x8_t ub;
> + uint16_t ub1[10];
> + uint32x4_t uc;
> + uint32_t uc1[10];
> + float16x8_t fb;
> + float16_t fb1[10];
> + float32x4_t fc;
> + float32_t fc1[10];
> +
> + fb = vld1q (fb1);
> + fc = vld1q (fc1);
> + b = vld1q (b1);
> + c = vld1q (c1);
> + a = vld1q (a1);
> + ub = vld1q (ub1);
> + uc = vld1q (uc1);
> + ua = vld1q (ua1);
> + fb = vld1q_z (fb1, p);
> + fc = vld1q_z (fc1, p);
> + b = vld1q_z (b1, p);
> + c = vld1q_z (c1, p);
> + a = vld1q_z (a1, p);
> + ub = vld1q_z (ub1, p);
> + uc = vld1q_z (uc1, p);
> + ua = vld1q_z (ua1, p);
> +}
> +
> +void
> +foo1 (void)
> +{
> + mve_pred16_t p;
> + int8x16x2_t a;
> + int8_t a1[10];
> + int16x8x2_t b;
> + int16_t b1[10];
> + int32x4x2_t c;
> + int32_t c1[10];
> + uint8x16x2_t ua;
> + uint8_t ua1[10];
> + uint16x8x2_t ub;
> + uint16_t ub1[10];
> + uint32x4x2_t uc;
> + uint32_t uc1[10];
> + float16x8x2_t fb;
> + float16_t fb1[10];
> + float32x4x2_t fc;
> + float32_t fc1[10];
> +
> + fb = vld2q (fb1);
> + fc = vld2q (fc1);
> + b = vld2q (b1);
> + c = vld2q (c1);
> + a = vld2q (a1);
> + ub = vld2q (ub1);
> + uc = vld2q (uc1);
> + ua = vld2q (ua1);
> +}
> +
> +void
> +foo2 (void)
> +{
> + mve_pred16_t p;
> + int8x16x4_t a;
> + int8_t a1[10];
> + int16x8x4_t b;
> + int16_t b1[10];
> + int32x4x4_t c;
> + int32_t c1[10];
> + uint8x16x4_t ua;
> + uint8_t ua1[10];
> + uint16x8x4_t ub;
> + uint16_t ub1[10];
> + uint32x4x4_t uc;
> + uint32_t uc1[10];
> + float16x8x4_t fb;
> + float16_t fb1[10];
> + float32x4x4_t fc;
> + float32_t fc1[10];
> +
> + fb = vld4q (fb1);
> + fc = vld4q (fc1);
> + b = vld4q (b1);
> + c = vld4q (c1);
> + a = vld4q (a1);
> + ub = vld4q (ub1);
> + uc = vld4q (uc1);
> + ua = vld4q (ua1);
> +}
> +
> +void
> +foo3 (void)
> +{
> + mve_pred16_t p;
> + int16x8_t a;
> + uint16x8_t ua;
> + int8_t a1[10];
> + uint8_t ua1[10];
> + uint16x8_t offset_a;
> + int8x16_t b;
> + uint8x16_t ub;
> + uint8x16_t offset_b;
> + int32x4_t c;
> + uint32x4_t uc;
> + uint32x4_t offset_c;
> +
> + a = vldrbq_gather_offset (a1, offset_a);
> + ua = vldrbq_gather_offset (ua1, offset_a);
> + b = vldrbq_gather_offset (a1, offset_b);
> + ub = vldrbq_gather_offset (ua1, offset_b);
> + c = vldrbq_gather_offset (a1, offset_c);
> + uc = vldrbq_gather_offset (ua1, offset_c);
> + a = vldrbq_gather_offset_z (a1, offset_a, p);
> + ua = vldrbq_gather_offset_z (ua1, offset_a, p);
> + b = vldrbq_gather_offset_z (a1, offset_b, p);
> + ub = vldrbq_gather_offset_z (ua1, offset_b, p);
> + c = vldrbq_gather_offset_z (a1, offset_c, p);
> + uc = vldrbq_gather_offset_z (ua1, offset_c, p);
> +}
> +/* { dg-final { scan-assembler-not "__ARM_undef" } } */
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2021-06-11 16:39 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-10 16:14 [GCC][PATCH] arm: Fix polymorphic variants failing with undefined reference to `__ARM_undef` error Srinath Parvathaneni
2021-06-11 16:38 ` Kyrylo Tkachov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).