* [GCC][PATCH] arm: Remove duplicate definitions from arm_mve.h (pr100419).
@ 2021-05-05 9:56 Srinath Parvathaneni
2021-05-05 10:15 ` Richard Earnshaw
0 siblings, 1 reply; 4+ messages in thread
From: Srinath Parvathaneni @ 2021-05-05 9:56 UTC (permalink / raw)
To: gcc-patches; +Cc: Kyrylo.Tkachov, Richard.Earnshaw
[-- Attachment #1: Type: text/plain, Size: 68288 bytes --]
Hi All,
This patch removes several duplicated intrinsic definitions from
arm_mve.h mentioned in PR100419 and also fixes the wrong arguments
in few of intrinsics polymorphic variants.
Regression tested and found no issues.
Ok for master ? GCC-11 and GCC-10 branch backports?
gcc/ChangeLog:
2021-05-04 Srinath Parvathaneni <srinath.parvathaneni@arm.com>
PR target/100419
* config/arm/arm_mve.h (__arm_vstrwq_scatter_offset): Fix wrong arguments.
(__arm_vcmpneq): Remove duplicate definition.
(__arm_vstrwq_scatter_offset_p): Likewise.
(__arm_vmaxq_x): Likewise.
(__arm_vmlsdavaq): Likewise.
(__arm_vmlsdavaxq): Likewise.
(__arm_vmlsdavq_p): Likewise.
(__arm_vmlsdavxq_p): Likewise.
(__arm_vrmlaldavhaq): Likewise.
(__arm_vstrbq_p): Likewise.
(__arm_vstrbq_scatter_offset): Likewise.
(__arm_vstrbq_scatter_offset_p): Likewise.
(__arm_vstrdq_scatter_offset): Likewise.
(__arm_vstrdq_scatter_offset_p): Likewise.
(__arm_vstrdq_scatter_shifted_offset): Likewise.
(__arm_vstrdq_scatter_shifted_offset_p): Likewise.
Co-authored-by: Joe Ramsay <joe.ramsay@arm.com>
############### Attachment also inlined for ease of reply ###############
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index 3a40c6e68161b64319b071f57a5b0d8393303cfd..dc1d874a6366eb5fe755a70c72ed371c915bd04b 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -37808,33 +37808,19 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_base_p_u32(p0, p1, __ARM_mve_coerce(__p2, uint32x4_t), p3), \
int (*)[__ARM_mve_type_float32x4_t]: __arm_vstrwq_scatter_base_p_f32(p0, p1, __ARM_mve_coerce(__p2, float32x4_t), p3));})
-#define __arm_vstrwq_scatter_offset(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
+#define __arm_vstrwq_scatter_offset(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
__typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_offset_s32 (__ARM_mve_coerce(p0, int32_t *), __p1, __ARM_mve_coerce(__p2, int32x4_t)), \
- int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_offset_u32 (__ARM_mve_coerce(p0, uint32_t *), __p1, __ARM_mve_coerce(__p2, uint32x4_t)), \
- int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4_t]: __arm_vstrwq_scatter_offset_f32 (__ARM_mve_coerce(p0, float32_t *), __p1, __ARM_mve_coerce(__p2, float32x4_t)));})
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_offset_s32 (__ARM_mve_coerce(__p0, int32_t *), p1, __ARM_mve_coerce(__p2, int32x4_t)), \
+ int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_offset_u32 (__ARM_mve_coerce(__p0, uint32_t *), p1, __ARM_mve_coerce(__p2, uint32x4_t)), \
+ int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4_t]: __arm_vstrwq_scatter_offset_f32 (__ARM_mve_coerce(__p0, float32_t *), p1, __ARM_mve_coerce(__p2, float32x4_t)));})
-#define __arm_vstrwq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
+#define __arm_vstrwq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
__typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_offset_p_s32 (__ARM_mve_coerce(p0, int32_t *), __p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
- int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_offset_p_u32 (__ARM_mve_coerce(p0, uint32_t *), __p1, __ARM_mve_coerce(__p2, uint32x4_t), p3), \
- int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4_t]: __arm_vstrwq_scatter_offset_p_f32 (__ARM_mve_coerce(p0, float32_t *), __p1, __ARM_mve_coerce(__p2, float32x4_t), p3));})
-
-#define __arm_vstrwq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
- __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_offset_p_s32 (__ARM_mve_coerce(p0, int32_t *), __p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
- int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_offset_p_u32 (__ARM_mve_coerce(p0, uint32_t *), __p1, __ARM_mve_coerce(__p2, uint32x4_t), p3), \
- int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4_t]: __arm_vstrwq_scatter_offset_p_f32 (__ARM_mve_coerce(p0, float32_t *), __p1, __ARM_mve_coerce(__p2, float32x4_t), p3));})
-
-#define __arm_vstrwq_scatter_offset(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
- __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_offset_s32 (__ARM_mve_coerce(p0, int32_t *), __p1, __ARM_mve_coerce(__p2, int32x4_t)), \
- int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_offset_u32 (__ARM_mve_coerce(p0, uint32_t *), __p1, __ARM_mve_coerce(__p2, uint32x4_t)), \
- int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4_t]: __arm_vstrwq_scatter_offset_f32 (__ARM_mve_coerce(p0, float32_t *), __p1, __ARM_mve_coerce(__p2, float32x4_t)));})
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_offset_p_s32 (__ARM_mve_coerce(__p0, int32_t *), p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
+ int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_offset_p_u32 (__ARM_mve_coerce(__p0, uint32_t *), p1, __ARM_mve_coerce(__p2, uint32x4_t), p3), \
+ int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4_t]: __arm_vstrwq_scatter_offset_p_f32 (__ARM_mve_coerce(__p0, float32_t *), p1, __ARM_mve_coerce(__p2, float32x4_t), p3));})
#define __arm_vstrwq_scatter_shifted_offset(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
__typeof(p2) __p2 = (p2); \
@@ -38422,6 +38408,12 @@ extern void *__ARM_undef;
#define __arm_vcmpneq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
_Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t)), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t)), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \
+ int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8_t)), \
+ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16_t)), \
+ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t)), \
int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpneq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpneq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpneq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
@@ -38871,23 +38863,6 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vcmpeqq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16_t)), \
int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vcmpeqq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t)));})
-#define __arm_vcmpneq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
- __typeof(p1) __p1 = (p1); \
- _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpneq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpneq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpneq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
- int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vcmpneq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \
- int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vcmpneq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \
- int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vcmpneq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)), \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t)), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t)), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \
- int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8_t)), \
- int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16_t)), \
- int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t)));})
-
-
#define __arm_vqmovntq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
_Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -39036,22 +39011,6 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vcmpneq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vcmpneq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
-#define __arm_vcmpneq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
- __typeof(p1) __p1 = (p1); \
- _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpneq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpneq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpneq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
- int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vcmpneq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \
- int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vcmpneq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \
- int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vcmpneq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)), \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t)), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t)), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \
- int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8_t)), \
- int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16_t)), \
- int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t)));})
-
#define __arm_vshlcq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
_Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlcq_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1, p2), \
@@ -39367,52 +39326,6 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vminaq_m_s16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vminaq_m_s32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2));})
-#define __arm_vrmlaldavhaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
- __typeof(p1) __p1 = (p1); \
- _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpltq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpltq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpltq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpltq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t), p2), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpltq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t), p2), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpltq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t), p2));})
-
-#define __arm_vmlsdavxq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
- __typeof(p1) __p1 = (p1); \
- _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpleq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpleq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpleq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpleq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t), p2), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpleq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t), p2), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpleq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t), p2));})
-
-#define __arm_vmlsdavq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
- __typeof(p1) __p1 = (p1); \
- _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpgtq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpgtq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpgtq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpgtq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t), p2), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpgtq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t), p2), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpgtq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t), p2));})
-
-#define __arm_vmlsdavaxq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
- __typeof(p1) __p1 = (p1); \
- _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]: __arm_vshrntq_n_s16 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]: __arm_vshrntq_n_s32 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
- int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]: __arm_vshrntq_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
- int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]: __arm_vshrntq_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
-#define __arm_vmlsdavaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
- __typeof(p1) __p1 = (p1); \
- _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]: __arm_vrshrntq_n_s16 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]: __arm_vrshrntq_n_s32 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
- int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]: __arm_vrshrntq_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
- int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]: __arm_vrshrntq_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
#define __arm_vmovlbq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
_Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -39711,26 +39624,6 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmulq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmulq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-#define __arm_vstrbq(p0,p1) ({ __typeof(p1) __p1 = (p1); \
- _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
- int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16_t]: __arm_vstrbq_s8 (__ARM_mve_coerce(p0, int8_t *), __ARM_mve_coerce(__p1, int8x16_t)), \
- int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int16x8_t]: __arm_vstrbq_s16 (__ARM_mve_coerce(p0, int8_t *), __ARM_mve_coerce(__p1, int16x8_t)), \
- int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrbq_s32 (__ARM_mve_coerce(p0, int8_t *), __ARM_mve_coerce(__p1, int32x4_t)), \
- int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vstrbq_u8 (__ARM_mve_coerce(p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t)), \
- int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vstrbq_u16 (__ARM_mve_coerce(p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \
- int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrbq_u32 (__ARM_mve_coerce(p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t)));})
-
-#define __arm_vstrbq_scatter_offset(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
- __typeof(p1) __p1 = (p1); \
- __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]: __arm_vstrbq_scatter_offset_s8 (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, int8x16_t)), \
- int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vstrbq_scatter_offset_s16 (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
- int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vstrbq_scatter_offset_s32 (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int32x4_t)), \
- int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vstrbq_scatter_offset_u8 (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t)), \
- int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vstrbq_scatter_offset_u16 (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t)), \
- int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vstrbq_scatter_offset_u32 (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t)));})
-
#define __arm_vstrwq_scatter_base(p0,p1,p2) ({ __typeof(p2) __p2 = (p2); \
_Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_s32(p0, p1, __ARM_mve_coerce(__p2, int32x4_t)), \
@@ -39745,27 +39638,6 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_u16 (__ARM_mve_coerce(p0, uint8_t const *), __ARM_mve_coerce(__p1, uint16x8_t)), \
int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_u32 (__ARM_mve_coerce(p0, uint8_t const *), __ARM_mve_coerce(__p1, uint32x4_t)));})
-#define __arm_vstrbq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
- __typeof(p1) __p1 = (p1); \
- _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
- int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16_t]: __arm_vstrbq_p_s8 (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, int8x16_t), p2), \
- int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int16x8_t]: __arm_vstrbq_p_s16 (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, int16x8_t), p2), \
- int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrbq_p_s32 (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, int32x4_t), p2), \
- int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vstrbq_p_u8 (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
- int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vstrbq_p_u16 (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
- int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrbq_p_u32 (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
-#define __arm_vstrbq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
- __typeof(p1) __p1 = (p1); \
- __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]: __arm_vstrbq_scatter_offset_p_s8 (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
- int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vstrbq_scatter_offset_p_s16 (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
- int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vstrbq_scatter_offset_p_s32 (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
- int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vstrbq_scatter_offset_p_u8 (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
- int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vstrbq_scatter_offset_p_u16 (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
- int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vstrbq_scatter_offset_p_u32 (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
#define __arm_vstrwq_scatter_base_p(p0,p1,p2,p3) ({ __typeof(p2) __p2 = (p2); \
_Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_p_s32 (p0, p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
@@ -39921,34 +39793,6 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_base_s64 (p0, p1, __ARM_mve_coerce(__p2, int64x2_t)), \
int (*)[__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_base_u64 (p0, p1, __ARM_mve_coerce(__p2, uint64x2_t)));})
-#define __arm_vstrdq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
- __typeof(p1) __p1 = (p1); \
- __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_offset_p_s64 (__ARM_mve_coerce(__p0, int64_t *), __p1, __ARM_mve_coerce(__p2, int64x2_t), p3), \
- int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_offset_p_u64 (__ARM_mve_coerce(__p0, uint64_t *), __p1, __ARM_mve_coerce(__p2, uint64x2_t), p3));})
-
-#define __arm_vstrdq_scatter_offset(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
- __typeof(p1) __p1 = (p1); \
- __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_offset_s64 (__ARM_mve_coerce(__p0, int64_t *), __p1, __ARM_mve_coerce(__p2, int64x2_t)), \
- int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_offset_u64 (__ARM_mve_coerce(__p0, uint64_t *), __p1, __ARM_mve_coerce(__p2, uint64x2_t)));})
-
-#define __arm_vstrdq_scatter_shifted_offset_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
- __typeof(p1) __p1 = (p1); \
- __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_shifted_offset_p_s64 (__ARM_mve_coerce(__p0, int64_t *), __p1, __ARM_mve_coerce(__p2, int64x2_t), p3), \
- int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_shifted_offset_p_u64 (__ARM_mve_coerce(__p0, uint64_t *), __p1, __ARM_mve_coerce(__p2, uint64x2_t), p3));})
-
-#define __arm_vstrdq_scatter_shifted_offset(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
- __typeof(p1) __p1 = (p1); \
- __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_shifted_offset_s64 (__ARM_mve_coerce(__p0, int64_t *), __p1, __ARM_mve_coerce(__p2, int64x2_t)), \
- int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_shifted_offset_u64 (__ARM_mve_coerce(__p0, uint64_t *), __p1, __ARM_mve_coerce(__p2, uint64x2_t)));})
-
#define __arm_vstrhq_scatter_offset(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
__typeof(p2) __p2 = (p2); \
_Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
@@ -39981,29 +39825,17 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vstrhq_scatter_shifted_offset_p_u16 (__ARM_mve_coerce(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vstrhq_scatter_shifted_offset_p_u32 (__ARM_mve_coerce(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-#define __arm_vstrwq_scatter_offset(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
+#define __arm_vstrwq_scatter_offset(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
__typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_offset_s32 (__ARM_mve_coerce(p0, int32_t *), __p1, __ARM_mve_coerce(__p2, int32x4_t)), \
- int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_offset_u32 (__ARM_mve_coerce(p0, uint32_t *), __p1, __ARM_mve_coerce(__p2, uint32x4_t)));})
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_offset_s32 (__ARM_mve_coerce(__p0, int32_t *), p1, __ARM_mve_coerce(__p2, int32x4_t)), \
+ int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_offset_u32 (__ARM_mve_coerce(__p0, uint32_t *), p1, __ARM_mve_coerce(__p2, uint32x4_t)));})
-#define __arm_vstrwq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
+#define __arm_vstrwq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
__typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_offset_p_s32 (__ARM_mve_coerce(p0, int32_t *), __p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
- int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_offset_p_u32 (__ARM_mve_coerce(p0, uint32_t *), __p1, __ARM_mve_coerce(__p2, uint32x4_t)));})
-
-#define __arm_vstrwq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
- __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_offset_p_s32 (__ARM_mve_coerce(p0, int32_t *), __p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
- int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_offset_p_u32 (__ARM_mve_coerce(p0, uint32_t *), __p1, __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
-#define __arm_vstrwq_scatter_offset(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
- __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_offset_s32 (__ARM_mve_coerce(p0, int32_t *), __p1, __ARM_mve_coerce(__p2, int32x4_t)), \
- int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_offset_u32 (__ARM_mve_coerce(p0, uint32_t *), __p1, __ARM_mve_coerce(__p2, uint32x4_t)));})
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_offset_p_s32 (__ARM_mve_coerce(__p0, int32_t *), p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
+ int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_offset_p_u32 (__ARM_mve_coerce(__p0, uint32_t *), p1, __ARM_mve_coerce(__p2, uint32x4_t), p3));})
#define __arm_vstrwq_scatter_shifted_offset(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
__typeof(p2) __p2 = (p2); \
@@ -40160,32 +39992,6 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_veorq_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_veorq_x_u32 (__ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-#define __arm_vmaxq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
- __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmulq_x_s8 (__ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmulq_x_s16 (__ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmulq_x_s32 (__ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vmulq_x_n_s8 (__ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vmulq_x_n_s16 (__ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vmulq_x_n_s32 (__ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \
- int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmulq_x_u8 (__ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
- int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmulq_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
- int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmulq_x_u32 (__ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3), \
- int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vmulq_x_n_u8 (__ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t), p3), \
- int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vmulq_x_n_u16 (__ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t), p3), \
- int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vmulq_x_n_u32 (__ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3));})
-
-#define __arm_vminq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
- __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vminq_x_s8 (__ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vminq_x_s16 (__ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vminq_x_s32 (__ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
- int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vminq_x_u8 (__ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
- int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vminq_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
- int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vminq_x_u32 (__ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
#define __arm_vmovlbq_x(p1,p2) ({ __typeof(p1) __p1 = (p1); \
_Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
int (*)[__ARM_mve_type_int8x16_t]: __arm_vmovlbq_x_s8 (__ARM_mve_coerce(__p1, int8x16_t), p2), \
@@ -41013,13 +40819,6 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsldavaxq_p_s16(__p0, __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsldavaxq_p_s32(__p0, __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
-#define __arm_vrmlaldavhaq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
- __typeof(p1) __p1 = (p1); \
- __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vrmlaldavhaq_p_s32 (__ARM_mve_coerce(__p0, int64_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
- int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vrmlaldavhaq_p_u32 (__ARM_mve_coerce(__p0, uint64_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
#define __arm_vrmlaldavhaxq_p(p0,p1,p2,p3) __arm_vrmlaldavhaxq_p_s32(p0,p1,p2,p3)
#define __arm_vrmlsldavhaq_p(p0,p1,p2,p3) __arm_vrmlsldavhaq_p_s32(p0,p1,p2,p3)
@@ -41343,21 +41142,47 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqrdmladhxq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqrdmladhxq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
-#define __arm_vmlsdavaxq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
- __typeof(p1) __p1 = (p1); \
+#define __arm_vmlsdavaxq_p(p0,p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
__typeof(p2) __p2 = (p2); \
_Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmlsdavaxq_p_s8 (__p0, __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsdavaxq_p_s16 (__p0, __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsdavaxq_p_s32 (__p0, __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmlsdavaxq_p_s8 (p0, __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsdavaxq_p_s16 (p0, __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsdavaxq_p_s32 (p0, __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
+
+#define __arm_vmlsdavaq(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmlsdavaq_s8(p0, __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t)), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsdavaq_s16(p0, __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsdavaq_s32(p0, __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)));})
+
+#define __arm_vmlsdavaxq(p0,p1,p2) ({ __typeof(p2) __p2 = (p2); \
+ __typeof(p1) __p1 = (p1); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmlsdavaxq_s8(p0, __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t)), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsdavaxq_s16(p0, __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsdavaxq_s32(p0, __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)));})
-#define __arm_vmlsdavaq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+#define __arm_vmlsdavq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmlsdavq_p_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsdavq_p_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsdavq_p_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2));})
+
+#define __arm_vmlsdavxq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmlsdavxq_p_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsdavxq_p_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsdavxq_p_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2));})
+
+#define __arm_vmlsdavaq_p(p0,p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
__typeof(p2) __p2 = (p2); \
_Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmlsdavaq_p_s8(__p0, __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsdavaq_p_s16(__p0, __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsdavaq_p_s32(__p0, __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmlsdavaq_p_s8(p0, __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsdavaq_p_s16(p0, __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsdavaq_p_s32(p0, __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
#define __arm_vmladavaxq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
@@ -41445,8 +41270,8 @@ extern void *__ARM_undef;
#define __arm_viwdupq_u16(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
_Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
- int (*)[__ARM_mve_type_int_n]: __arm_viwdupq_n_u16 (__ARM_mve_coerce(__p0, uint32_t), p1, p2), \
- int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_viwdupq_wb_u16 (__ARM_mve_coerce(__p0, uint32_t *), p1, p2));})
+ int (*)[__ARM_mve_type_int_n]: __arm_viwdupq_n_u16 (__ARM_mve_coerce(__p0, uint32_t), p1, (const int) p2), \
+ int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_viwdupq_wb_u16 (__ARM_mve_coerce(__p0, uint32_t *), p1, (const int) p2));})
#define __arm_viwdupq_u32(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
_Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
@@ -41628,16 +41453,6 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vmaxavq_p_s16 (__p0, __ARM_mve_coerce(__p1, int16x8_t), p2), \
int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vmaxavq_p_s32 (__p0, __ARM_mve_coerce(__p1, int32x4_t), p2));})
-#define __arm_vmaxq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
- __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmaxq_x_s8 (__ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmaxq_x_s16 (__ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmaxq_x_s32 (__ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
- int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmaxq_x_u8( __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
- int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmaxq_x_u16( __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
- int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmaxq_x_u32( __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
#define __arm_vmaxvq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
_Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -41672,6 +41487,16 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vminavq_p_s16 (__p0, __ARM_mve_coerce(__p1, int16x8_t), p2), \
int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vminavq_p_s32 (__p0, __ARM_mve_coerce(__p1, int32x4_t), p2));})
+#define __arm_vmaxq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmaxq_x_s8 (__ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmaxq_x_s16 (__ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmaxq_x_s32 (__ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
+ int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmaxq_x_u8 (__ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
+ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmaxq_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
+ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmaxq_x_u32 (__ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
+
#define __arm_vminq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
__typeof(p2) __p2 = (p2); \
_Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
@@ -41810,22 +41635,6 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlaldavxq_p_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlaldavxq_p_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2));})
-#define __arm_vmlsdavaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
- __typeof(p1) __p1 = (p1); \
- __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmlsdavaq_s8(__p0, __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t)), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsdavaq_s16(__p0, __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsdavaq_s32(__p0, __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)));})
-
-#define __arm_vmlsdavaxq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
- __typeof(p1) __p1 = (p1); \
- __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmlsdavaxq_s8(__p0, __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t)), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsdavaxq_s16(__p0, __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsdavaxq_s32(__p0, __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)));})
-
#define __arm_vmlsdavq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
_Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -41833,13 +41642,6 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsdavq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsdavq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));})
-#define __arm_vmlsdavq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
- __typeof(p1) __p1 = (p1); \
- _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmlsdavq_p_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsdavq_p_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsdavq_p_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2));})
-
#define __arm_vmlsdavxq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
_Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -41847,13 +41649,6 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsdavxq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsdavxq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));})
-#define __arm_vmlsdavxq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
- __typeof(p1) __p1 = (p1); \
- _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmlsdavxq_p_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsdavxq_p_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsdavxq_p_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2));})
-
#define __arm_vmlsldavaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
__typeof(p2) __p2 = (p2); \
@@ -41948,13 +41743,6 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmulltq_poly_x_p8 (__ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmulltq_poly_x_p16 (__ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3));})
-#define __arm_vrmlaldavhaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
- __typeof(p1) __p1 = (p1); \
- __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vrmlaldavhaq_s32 (__ARM_mve_coerce(__p0, int64_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)), \
- int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vrmlaldavhaq_u32 (__ARM_mve_coerce(__p0, uint64_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t)));})
-
#define __arm_vrmlaldavhaxq(p0,p1,p2) __arm_vrmlaldavhaxq_s32(p0,p1,p2)
#define __arm_vrmlaldavhq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
@@ -41994,35 +41782,15 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vstrbq_u16 (__ARM_mve_coerce(p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \
int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrbq_u32 (__ARM_mve_coerce(p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t)));})
-#define __arm_vstrbq_p(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
- _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
- int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16_t]: __arm_vstrbq_p_s8 (__ARM_mve_coerce(p0, int8_t *), __ARM_mve_coerce(__p1, int8x16_t), p2), \
- int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int16x8_t]: __arm_vstrbq_p_s16 (__ARM_mve_coerce(p0, int8_t *), __ARM_mve_coerce(__p1, int16x8_t), p2), \
- int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrbq_p_s32 (__ARM_mve_coerce(p0, int8_t *), __ARM_mve_coerce(__p1, int32x4_t), p2), \
- int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vstrbq_p_u8 (__ARM_mve_coerce(p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
- int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vstrbq_p_u16 (__ARM_mve_coerce(p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
- int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrbq_p_u32 (__ARM_mve_coerce(p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
-#define __arm_vstrbq_scatter_offset(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
- __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]: __arm_vstrbq_scatter_offset_s8 (__ARM_mve_coerce(p0, int8_t *), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, int8x16_t)), \
- int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vstrbq_scatter_offset_s16 (__ARM_mve_coerce(p0, int8_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
- int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vstrbq_scatter_offset_s32 (__ARM_mve_coerce(p0, int8_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int32x4_t)), \
- int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vstrbq_scatter_offset_u8 (__ARM_mve_coerce(p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t)), \
- int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vstrbq_scatter_offset_u16 (__ARM_mve_coerce(p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t)), \
- int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vstrbq_scatter_offset_u32 (__ARM_mve_coerce(p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t)));})
-
-
-#define __arm_vstrbq_scatter_offset_p(p0,p1,p2,p3) ({__typeof(p1) __p1 = (p1); \
- __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]: __arm_vstrbq_scatter_offset_p_s8 (__ARM_mve_coerce(p0, int8_t *), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
- int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vstrbq_scatter_offset_p_s16 (__ARM_mve_coerce(p0, int8_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
- int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vstrbq_scatter_offset_p_s32 (__ARM_mve_coerce(p0, int8_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
- int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vstrbq_scatter_offset_p_u8 (__ARM_mve_coerce(p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
- int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vstrbq_scatter_offset_p_u16 (__ARM_mve_coerce(p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
- int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vstrbq_scatter_offset_p_u32 (__ARM_mve_coerce(p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
+#define __arm_vstrbq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
+ int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16_t]: __arm_vstrbq_p_s8 (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, int8x16_t), p2), \
+ int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int16x8_t]: __arm_vstrbq_p_s16 (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, int16x8_t), p2), \
+ int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrbq_p_s32 (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, int32x4_t), p2), \
+ int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vstrbq_p_u8 (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
+ int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vstrbq_p_u16 (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
+ int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrbq_p_u32 (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
#define __arm_vstrdq_scatter_base(p0,p1,p2) ({ __typeof(p2) __p2 = (p2); \
_Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
@@ -42034,29 +41802,65 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_base_p_s64 (p0, p1, __ARM_mve_coerce(__p2, int64x2_t), p3), \
int (*)[__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_base_p_u64 (p0, p1, __ARM_mve_coerce(__p2, uint64x2_t), p3));})
-#define __arm_vstrdq_scatter_offset(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
+#define __arm_vrmlaldavhaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
__typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_offset_s64 (__ARM_mve_coerce(p0, int64_t *), __p1, __ARM_mve_coerce(__p2, int64x2_t)), \
- int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_offset_u64 (__ARM_mve_coerce(p0, uint64_t *), __p1, __ARM_mve_coerce(__p2, uint64x2_t)));})
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vrmlaldavhaq_s32 (__ARM_mve_coerce(__p0, int64_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)), \
+ int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vrmlaldavhaq_u32 (__ARM_mve_coerce(__p0, uint64_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t)));})
-#define __arm_vstrdq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
+#define __arm_vrmlaldavhaq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
__typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_offset_p_s64 (__ARM_mve_coerce(p0, int64_t *), __p1, __ARM_mve_coerce(__p2, int64x2_t), p3), \
- int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_offset_p_u64 (__ARM_mve_coerce(p0, uint64_t *), __p1, __ARM_mve_coerce(__p2, uint64x2_t), p3));})
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vrmlaldavhaq_p_s32 (__ARM_mve_coerce(__p0, int64_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
+ int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vrmlaldavhaq_p_u32 (__ARM_mve_coerce(__p0, uint64_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-#define __arm_vstrdq_scatter_shifted_offset(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
+#define __arm_vstrbq_scatter_offset(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
__typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_shifted_offset_s64 (__ARM_mve_coerce(p0, int64_t *), __p1, __ARM_mve_coerce(__p2, int64x2_t)), \
- int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_shifted_offset_u64 (__ARM_mve_coerce(p0, uint64_t *), __p1, __ARM_mve_coerce(__p2, uint64x2_t)));})
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]: __arm_vstrbq_scatter_offset_s8 (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, int8x16_t)), \
+ int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vstrbq_scatter_offset_s16 (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
+ int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vstrbq_scatter_offset_s32 (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int32x4_t)), \
+ int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vstrbq_scatter_offset_u8 (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t)), \
+ int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vstrbq_scatter_offset_u16 (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t)), \
+ int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vstrbq_scatter_offset_u32 (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t)));})
-#define __arm_vstrdq_scatter_shifted_offset_p(p0,p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
+#define __arm_vstrbq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
__typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_shifted_offset_p_s64 (__ARM_mve_coerce(p0, int64_t *), __p1, __ARM_mve_coerce(__p2, int64x2_t), p3), \
- int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_shifted_offset_p_u64 (__ARM_mve_coerce(p0, uint64_t *), __p1, __ARM_mve_coerce(__p2, uint64x2_t), p3));})
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]: __arm_vstrbq_scatter_offset_p_s8 (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
+ int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vstrbq_scatter_offset_p_s16 (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
+ int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vstrbq_scatter_offset_p_s32 (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
+ int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vstrbq_scatter_offset_p_u8 (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
+ int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vstrbq_scatter_offset_p_u16 (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
+ int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vstrbq_scatter_offset_p_u32 (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
+
+#define __arm_vstrdq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_offset_p_s64 (__ARM_mve_coerce(__p0, int64_t *), p1, __ARM_mve_coerce(__p2, int64x2_t), p3), \
+ int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_offset_p_u64 (__ARM_mve_coerce(__p0, uint64_t *), p1, __ARM_mve_coerce(__p2, uint64x2_t), p3));})
+
+#define __arm_vstrdq_scatter_offset(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_offset_s64 (__ARM_mve_coerce(__p0, int64_t *), p1, __ARM_mve_coerce(__p2, int64x2_t)), \
+ int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_offset_u64 (__ARM_mve_coerce(__p0, uint64_t *), p1, __ARM_mve_coerce(__p2, uint64x2_t)));})
+
+#define __arm_vstrdq_scatter_shifted_offset_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_shifted_offset_p_s64 (__ARM_mve_coerce(__p0, int64_t *), p1, __ARM_mve_coerce(__p2, int64x2_t), p3), \
+ int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_shifted_offset_p_u64 (__ARM_mve_coerce(__p0, uint64_t *), p1, __ARM_mve_coerce(__p2, uint64x2_t), p3));})
+
+#define __arm_vstrdq_scatter_shifted_offset(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_shifted_offset_s64 (__ARM_mve_coerce(__p0, int64_t *), p1, __ARM_mve_coerce(__p2, int64x2_t)), \
+ int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_shifted_offset_u64 (__ARM_mve_coerce(__p0, uint64_t *), p1, __ARM_mve_coerce(__p2, uint64x2_t)));})
#endif /* __cplusplus */
#endif /* __ARM_FEATURE_MVE */
[-- Attachment #2: rb14446.patch.gz --]
[-- Type: application/gzip, Size: 3735 bytes --]
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [GCC][PATCH] arm: Remove duplicate definitions from arm_mve.h (pr100419).
2021-05-05 9:56 [GCC][PATCH] arm: Remove duplicate definitions from arm_mve.h (pr100419) Srinath Parvathaneni
@ 2021-05-05 10:15 ` Richard Earnshaw
2021-05-05 12:39 ` Srinath Parvathaneni
0 siblings, 1 reply; 4+ messages in thread
From: Richard Earnshaw @ 2021-05-05 10:15 UTC (permalink / raw)
To: Srinath Parvathaneni, gcc-patches; +Cc: Richard.Earnshaw
On 05/05/2021 10:56, Srinath Parvathaneni via Gcc-patches wrote:
> Hi All,
>
> This patch removes several duplicated intrinsic definitions from
> arm_mve.h mentioned in PR100419 and also fixes the wrong arguments
> in few of intrinsics polymorphic variants.
>
> Regression tested and found no issues.
>
> Ok for master ? GCC-11 and GCC-10 branch backports?
> gcc/ChangeLog:
>
> 2021-05-04 Srinath Parvathaneni <srinath.parvathaneni@arm.com>
>
> PR target/100419
> * config/arm/arm_mve.h (__arm_vstrwq_scatter_offset): Fix wrong arguments.
> (__arm_vcmpneq): Remove duplicate definition.
> (__arm_vstrwq_scatter_offset_p): Likewise.
> (__arm_vmaxq_x): Likewise.
> (__arm_vmlsdavaq): Likewise.
> (__arm_vmlsdavaxq): Likewise.
> (__arm_vmlsdavq_p): Likewise.
> (__arm_vmlsdavxq_p): Likewise.
> (__arm_vrmlaldavhaq): Likewise.
> (__arm_vstrbq_p): Likewise.
> (__arm_vstrbq_scatter_offset): Likewise.
> (__arm_vstrbq_scatter_offset_p): Likewise.
> (__arm_vstrdq_scatter_offset): Likewise.
> (__arm_vstrdq_scatter_offset_p): Likewise.
> (__arm_vstrdq_scatter_shifted_offset): Likewise.
> (__arm_vstrdq_scatter_shifted_offset_p): Likewise.
>
> Co-authored-by: Joe Ramsay <joe.ramsay@arm.com>
Let's take this example:
-#define __arm_vstrwq_scatter_offset(p0,p1,p2) ({ __typeof(p1) __p1 =
(p1); \
+#define __arm_vstrwq_scatter_offset(p0,p1,p2) ({ __typeof(p0) __p0 =
(p0); \
__typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]:
__arm_vstrwq_scatter_offset_s32 (__ARM_mve_coerce(p0, int32_t *), __p1,
__ARM_mve_coerce(__p2, int32x4_t)), \
- int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]:
__arm_vstrwq_scatter_offset_u32 (__ARM_mve_coerce(p0, uint32_t *), __p1,
__ARM_mve_coerce(__p2, uint32x4_t)));})
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]:
__arm_vstrwq_scatter_offset_s32 (__ARM_mve_coerce(__p0, int32_t *), p1,
__ARM_mve_coerce(__p2, int32x4_t)), \
+ int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]:
__arm_vstrwq_scatter_offset_u32 (__ARM_mve_coerce(__p0, uint32_t *), p1,
__ARM_mve_coerce(__p2, uint32x4_t)));})
It removes the safe shadow copy of p1 but adds a safe shadow copy of p0.
Why? Isn't it better (and safer) to just create shadow copies of all
the arguments and let the compiler worry about when it's safe to
eliminate them?
R.
>
>
> ############### Attachment also inlined for ease of reply ###############
>
>
> diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
> index 3a40c6e68161b64319b071f57a5b0d8393303cfd..dc1d874a6366eb5fe755a70c72ed371c915bd04b 100644
> --- a/gcc/config/arm/arm_mve.h
> +++ b/gcc/config/arm/arm_mve.h
> @@ -37808,33 +37808,19 @@ extern void *__ARM_undef;
> int (*)[__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_base_p_u32(p0, p1, __ARM_mve_coerce(__p2, uint32x4_t), p3), \
> int (*)[__ARM_mve_type_float32x4_t]: __arm_vstrwq_scatter_base_p_f32(p0, p1, __ARM_mve_coerce(__p2, float32x4_t), p3));})
>
> -#define __arm_vstrwq_scatter_offset(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
> +#define __arm_vstrwq_scatter_offset(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> __typeof(p2) __p2 = (p2); \
> - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0, \
> - int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_offset_s32 (__ARM_mve_coerce(p0, int32_t *), __p1, __ARM_mve_coerce(__p2, int32x4_t)), \
> - int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_offset_u32 (__ARM_mve_coerce(p0, uint32_t *), __p1, __ARM_mve_coerce(__p2, uint32x4_t)), \
> - int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4_t]: __arm_vstrwq_scatter_offset_f32 (__ARM_mve_coerce(p0, float32_t *), __p1, __ARM_mve_coerce(__p2, float32x4_t)));})
> + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
> + int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_offset_s32 (__ARM_mve_coerce(__p0, int32_t *), p1, __ARM_mve_coerce(__p2, int32x4_t)), \
> + int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_offset_u32 (__ARM_mve_coerce(__p0, uint32_t *), p1, __ARM_mve_coerce(__p2, uint32x4_t)), \
> + int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4_t]: __arm_vstrwq_scatter_offset_f32 (__ARM_mve_coerce(__p0, float32_t *), p1, __ARM_mve_coerce(__p2, float32x4_t)));})
>
> -#define __arm_vstrwq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
> +#define __arm_vstrwq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
> __typeof(p2) __p2 = (p2); \
> - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0, \
> - int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_offset_p_s32 (__ARM_mve_coerce(p0, int32_t *), __p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
> - int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_offset_p_u32 (__ARM_mve_coerce(p0, uint32_t *), __p1, __ARM_mve_coerce(__p2, uint32x4_t), p3), \
> - int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4_t]: __arm_vstrwq_scatter_offset_p_f32 (__ARM_mve_coerce(p0, float32_t *), __p1, __ARM_mve_coerce(__p2, float32x4_t), p3));})
> -
> -#define __arm_vstrwq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
> - __typeof(p2) __p2 = (p2); \
> - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0, \
> - int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_offset_p_s32 (__ARM_mve_coerce(p0, int32_t *), __p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
> - int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_offset_p_u32 (__ARM_mve_coerce(p0, uint32_t *), __p1, __ARM_mve_coerce(__p2, uint32x4_t), p3), \
> - int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4_t]: __arm_vstrwq_scatter_offset_p_f32 (__ARM_mve_coerce(p0, float32_t *), __p1, __ARM_mve_coerce(__p2, float32x4_t), p3));})
> -
> -#define __arm_vstrwq_scatter_offset(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
> - __typeof(p2) __p2 = (p2); \
> - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0, \
> - int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_offset_s32 (__ARM_mve_coerce(p0, int32_t *), __p1, __ARM_mve_coerce(__p2, int32x4_t)), \
> - int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_offset_u32 (__ARM_mve_coerce(p0, uint32_t *), __p1, __ARM_mve_coerce(__p2, uint32x4_t)), \
> - int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4_t]: __arm_vstrwq_scatter_offset_f32 (__ARM_mve_coerce(p0, float32_t *), __p1, __ARM_mve_coerce(__p2, float32x4_t)));})
> + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
> + int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_offset_p_s32 (__ARM_mve_coerce(__p0, int32_t *), p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
> + int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_offset_p_u32 (__ARM_mve_coerce(__p0, uint32_t *), p1, __ARM_mve_coerce(__p2, uint32x4_t), p3), \
> + int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4_t]: __arm_vstrwq_scatter_offset_p_f32 (__ARM_mve_coerce(__p0, float32_t *), p1, __ARM_mve_coerce(__p2, float32x4_t), p3));})
>
> #define __arm_vstrwq_scatter_shifted_offset(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
> __typeof(p2) __p2 = (p2); \
> @@ -38422,6 +38408,12 @@ extern void *__ARM_undef;
> #define __arm_vcmpneq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
> __typeof(p1) __p1 = (p1); \
> _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
> + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t)), \
> + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t)), \
> + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \
> + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8_t)), \
> + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16_t)), \
> + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t)), \
> int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpneq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
> int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpneq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
> int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpneq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
> @@ -38871,23 +38863,6 @@ extern void *__ARM_undef;
> int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vcmpeqq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16_t)), \
> int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vcmpeqq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t)));})
>
> -#define __arm_vcmpneq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
> - __typeof(p1) __p1 = (p1); \
> - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
> - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpneq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
> - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpneq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
> - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpneq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
> - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vcmpneq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \
> - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vcmpneq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \
> - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vcmpneq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)), \
> - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t)), \
> - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t)), \
> - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \
> - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8_t)), \
> - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16_t)), \
> - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t)));})
> -
> -
> #define __arm_vqmovntq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
> __typeof(p1) __p1 = (p1); \
> _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
> @@ -39036,22 +39011,6 @@ extern void *__ARM_undef;
> int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vcmpneq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
> int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vcmpneq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
>
> -#define __arm_vcmpneq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
> - __typeof(p1) __p1 = (p1); \
> - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
> - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpneq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
> - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpneq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
> - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpneq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
> - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vcmpneq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \
> - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vcmpneq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \
> - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vcmpneq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)), \
> - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t)), \
> - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t)), \
> - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \
> - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8_t)), \
> - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16_t)), \
> - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t)));})
> -
> #define __arm_vshlcq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
> int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlcq_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1, p2), \
> @@ -39367,52 +39326,6 @@ extern void *__ARM_undef;
> int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vminaq_m_s16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
> int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vminaq_m_s32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2));})
>
> -#define __arm_vrmlaldavhaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> - __typeof(p1) __p1 = (p1); \
> - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
> - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpltq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
> - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpltq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
> - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpltq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
> - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpltq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t), p2), \
> - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpltq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t), p2), \
> - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpltq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t), p2));})
> -
> -#define __arm_vmlsdavxq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> - __typeof(p1) __p1 = (p1); \
> - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
> - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpleq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
> - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpleq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
> - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpleq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
> - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpleq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t), p2), \
> - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpleq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t), p2), \
> - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpleq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t), p2));})
> -
> -#define __arm_vmlsdavq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> - __typeof(p1) __p1 = (p1); \
> - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
> - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpgtq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
> - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpgtq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
> - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpgtq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
> - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpgtq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t), p2), \
> - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpgtq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t), p2), \
> - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpgtq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t), p2));})
> -
> -#define __arm_vmlsdavaxq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> - __typeof(p1) __p1 = (p1); \
> - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
> - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]: __arm_vshrntq_n_s16 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
> - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]: __arm_vshrntq_n_s32 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
> - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]: __arm_vshrntq_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
> - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]: __arm_vshrntq_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
> -
> -#define __arm_vmlsdavaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> - __typeof(p1) __p1 = (p1); \
> - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
> - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]: __arm_vrshrntq_n_s16 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
> - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]: __arm_vrshrntq_n_s32 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
> - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]: __arm_vrshrntq_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
> - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]: __arm_vrshrntq_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
> -
> #define __arm_vmovlbq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> __typeof(p1) __p1 = (p1); \
> _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
> @@ -39711,26 +39624,6 @@ extern void *__ARM_undef;
> int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmulq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
> int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmulq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
>
> -#define __arm_vstrbq(p0,p1) ({ __typeof(p1) __p1 = (p1); \
> - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
> - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16_t]: __arm_vstrbq_s8 (__ARM_mve_coerce(p0, int8_t *), __ARM_mve_coerce(__p1, int8x16_t)), \
> - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int16x8_t]: __arm_vstrbq_s16 (__ARM_mve_coerce(p0, int8_t *), __ARM_mve_coerce(__p1, int16x8_t)), \
> - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrbq_s32 (__ARM_mve_coerce(p0, int8_t *), __ARM_mve_coerce(__p1, int32x4_t)), \
> - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vstrbq_u8 (__ARM_mve_coerce(p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t)), \
> - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vstrbq_u16 (__ARM_mve_coerce(p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \
> - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrbq_u32 (__ARM_mve_coerce(p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t)));})
> -
> -#define __arm_vstrbq_scatter_offset(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> - __typeof(p1) __p1 = (p1); \
> - __typeof(p2) __p2 = (p2); \
> - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
> - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]: __arm_vstrbq_scatter_offset_s8 (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, int8x16_t)), \
> - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vstrbq_scatter_offset_s16 (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
> - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vstrbq_scatter_offset_s32 (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int32x4_t)), \
> - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vstrbq_scatter_offset_u8 (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t)), \
> - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vstrbq_scatter_offset_u16 (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t)), \
> - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vstrbq_scatter_offset_u32 (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t)));})
> -
> #define __arm_vstrwq_scatter_base(p0,p1,p2) ({ __typeof(p2) __p2 = (p2); \
> _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
> int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_s32(p0, p1, __ARM_mve_coerce(__p2, int32x4_t)), \
> @@ -39745,27 +39638,6 @@ extern void *__ARM_undef;
> int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_u16 (__ARM_mve_coerce(p0, uint8_t const *), __ARM_mve_coerce(__p1, uint16x8_t)), \
> int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_u32 (__ARM_mve_coerce(p0, uint8_t const *), __ARM_mve_coerce(__p1, uint32x4_t)));})
>
> -#define __arm_vstrbq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> - __typeof(p1) __p1 = (p1); \
> - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
> - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16_t]: __arm_vstrbq_p_s8 (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, int8x16_t), p2), \
> - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int16x8_t]: __arm_vstrbq_p_s16 (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, int16x8_t), p2), \
> - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrbq_p_s32 (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, int32x4_t), p2), \
> - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vstrbq_p_u8 (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
> - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vstrbq_p_u16 (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
> - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrbq_p_u32 (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
> -
> -#define __arm_vstrbq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
> - __typeof(p1) __p1 = (p1); \
> - __typeof(p2) __p2 = (p2); \
> - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
> - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]: __arm_vstrbq_scatter_offset_p_s8 (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
> - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vstrbq_scatter_offset_p_s16 (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
> - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vstrbq_scatter_offset_p_s32 (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
> - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vstrbq_scatter_offset_p_u8 (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
> - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vstrbq_scatter_offset_p_u16 (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
> - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vstrbq_scatter_offset_p_u32 (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
> -
> #define __arm_vstrwq_scatter_base_p(p0,p1,p2,p3) ({ __typeof(p2) __p2 = (p2); \
> _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
> int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_p_s32 (p0, p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
> @@ -39921,34 +39793,6 @@ extern void *__ARM_undef;
> int (*)[__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_base_s64 (p0, p1, __ARM_mve_coerce(__p2, int64x2_t)), \
> int (*)[__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_base_u64 (p0, p1, __ARM_mve_coerce(__p2, uint64x2_t)));})
>
> -#define __arm_vstrdq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
> - __typeof(p1) __p1 = (p1); \
> - __typeof(p2) __p2 = (p2); \
> - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
> - int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_offset_p_s64 (__ARM_mve_coerce(__p0, int64_t *), __p1, __ARM_mve_coerce(__p2, int64x2_t), p3), \
> - int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_offset_p_u64 (__ARM_mve_coerce(__p0, uint64_t *), __p1, __ARM_mve_coerce(__p2, uint64x2_t), p3));})
> -
> -#define __arm_vstrdq_scatter_offset(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> - __typeof(p1) __p1 = (p1); \
> - __typeof(p2) __p2 = (p2); \
> - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
> - int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_offset_s64 (__ARM_mve_coerce(__p0, int64_t *), __p1, __ARM_mve_coerce(__p2, int64x2_t)), \
> - int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_offset_u64 (__ARM_mve_coerce(__p0, uint64_t *), __p1, __ARM_mve_coerce(__p2, uint64x2_t)));})
> -
> -#define __arm_vstrdq_scatter_shifted_offset_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
> - __typeof(p1) __p1 = (p1); \
> - __typeof(p2) __p2 = (p2); \
> - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
> - int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_shifted_offset_p_s64 (__ARM_mve_coerce(__p0, int64_t *), __p1, __ARM_mve_coerce(__p2, int64x2_t), p3), \
> - int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_shifted_offset_p_u64 (__ARM_mve_coerce(__p0, uint64_t *), __p1, __ARM_mve_coerce(__p2, uint64x2_t), p3));})
> -
> -#define __arm_vstrdq_scatter_shifted_offset(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> - __typeof(p1) __p1 = (p1); \
> - __typeof(p2) __p2 = (p2); \
> - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
> - int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_shifted_offset_s64 (__ARM_mve_coerce(__p0, int64_t *), __p1, __ARM_mve_coerce(__p2, int64x2_t)), \
> - int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_shifted_offset_u64 (__ARM_mve_coerce(__p0, uint64_t *), __p1, __ARM_mve_coerce(__p2, uint64x2_t)));})
> -
> #define __arm_vstrhq_scatter_offset(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
> __typeof(p2) __p2 = (p2); \
> _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
> @@ -39981,29 +39825,17 @@ extern void *__ARM_undef;
> int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vstrhq_scatter_shifted_offset_p_u16 (__ARM_mve_coerce(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
> int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vstrhq_scatter_shifted_offset_p_u32 (__ARM_mve_coerce(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
>
> -#define __arm_vstrwq_scatter_offset(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
> +#define __arm_vstrwq_scatter_offset(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> __typeof(p2) __p2 = (p2); \
> - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0, \
> - int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_offset_s32 (__ARM_mve_coerce(p0, int32_t *), __p1, __ARM_mve_coerce(__p2, int32x4_t)), \
> - int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_offset_u32 (__ARM_mve_coerce(p0, uint32_t *), __p1, __ARM_mve_coerce(__p2, uint32x4_t)));})
> + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
> + int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_offset_s32 (__ARM_mve_coerce(__p0, int32_t *), p1, __ARM_mve_coerce(__p2, int32x4_t)), \
> + int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_offset_u32 (__ARM_mve_coerce(__p0, uint32_t *), p1, __ARM_mve_coerce(__p2, uint32x4_t)));})
>
> -#define __arm_vstrwq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
> +#define __arm_vstrwq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
> __typeof(p2) __p2 = (p2); \
> - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0, \
> - int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_offset_p_s32 (__ARM_mve_coerce(p0, int32_t *), __p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
> - int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_offset_p_u32 (__ARM_mve_coerce(p0, uint32_t *), __p1, __ARM_mve_coerce(__p2, uint32x4_t)));})
> -
> -#define __arm_vstrwq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
> - __typeof(p2) __p2 = (p2); \
> - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0, \
> - int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_offset_p_s32 (__ARM_mve_coerce(p0, int32_t *), __p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
> - int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_offset_p_u32 (__ARM_mve_coerce(p0, uint32_t *), __p1, __ARM_mve_coerce(__p2, uint32x4_t), p3));})
> -
> -#define __arm_vstrwq_scatter_offset(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
> - __typeof(p2) __p2 = (p2); \
> - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0, \
> - int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_offset_s32 (__ARM_mve_coerce(p0, int32_t *), __p1, __ARM_mve_coerce(__p2, int32x4_t)), \
> - int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_offset_u32 (__ARM_mve_coerce(p0, uint32_t *), __p1, __ARM_mve_coerce(__p2, uint32x4_t)));})
> + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
> + int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_offset_p_s32 (__ARM_mve_coerce(__p0, int32_t *), p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
> + int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_offset_p_u32 (__ARM_mve_coerce(__p0, uint32_t *), p1, __ARM_mve_coerce(__p2, uint32x4_t), p3));})
>
> #define __arm_vstrwq_scatter_shifted_offset(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
> __typeof(p2) __p2 = (p2); \
> @@ -40160,32 +39992,6 @@ extern void *__ARM_undef;
> int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_veorq_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
> int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_veorq_x_u32 (__ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
>
> -#define __arm_vmaxq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
> - __typeof(p2) __p2 = (p2); \
> - _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
> - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmulq_x_s8 (__ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
> - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmulq_x_s16 (__ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
> - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmulq_x_s32 (__ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
> - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vmulq_x_n_s8 (__ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \
> - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vmulq_x_n_s16 (__ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \
> - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vmulq_x_n_s32 (__ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \
> - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmulq_x_u8 (__ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
> - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmulq_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
> - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmulq_x_u32 (__ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3), \
> - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vmulq_x_n_u8 (__ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t), p3), \
> - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vmulq_x_n_u16 (__ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t), p3), \
> - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vmulq_x_n_u32 (__ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3));})
> -
> -#define __arm_vminq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
> - __typeof(p2) __p2 = (p2); \
> - _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
> - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vminq_x_s8 (__ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
> - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vminq_x_s16 (__ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
> - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vminq_x_s32 (__ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
> - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vminq_x_u8 (__ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
> - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vminq_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
> - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vminq_x_u32 (__ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
> -
> #define __arm_vmovlbq_x(p1,p2) ({ __typeof(p1) __p1 = (p1); \
> _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
> int (*)[__ARM_mve_type_int8x16_t]: __arm_vmovlbq_x_s8 (__ARM_mve_coerce(__p1, int8x16_t), p2), \
> @@ -41013,13 +40819,6 @@ extern void *__ARM_undef;
> int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsldavaxq_p_s16(__p0, __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
> int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsldavaxq_p_s32(__p0, __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
>
> -#define __arm_vrmlaldavhaq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
> - __typeof(p1) __p1 = (p1); \
> - __typeof(p2) __p2 = (p2); \
> - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
> - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vrmlaldavhaq_p_s32 (__ARM_mve_coerce(__p0, int64_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
> - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vrmlaldavhaq_p_u32 (__ARM_mve_coerce(__p0, uint64_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
> -
> #define __arm_vrmlaldavhaxq_p(p0,p1,p2,p3) __arm_vrmlaldavhaxq_p_s32(p0,p1,p2,p3)
>
> #define __arm_vrmlsldavhaq_p(p0,p1,p2,p3) __arm_vrmlsldavhaq_p_s32(p0,p1,p2,p3)
> @@ -41343,21 +41142,47 @@ extern void *__ARM_undef;
> int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqrdmladhxq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
> int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqrdmladhxq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
>
> -#define __arm_vmlsdavaxq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
> - __typeof(p1) __p1 = (p1); \
> +#define __arm_vmlsdavaxq_p(p0,p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
> __typeof(p2) __p2 = (p2); \
> _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
> - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmlsdavaxq_p_s8 (__p0, __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
> - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsdavaxq_p_s16 (__p0, __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
> - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsdavaxq_p_s32 (__p0, __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
> + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmlsdavaxq_p_s8 (p0, __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
> + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsdavaxq_p_s16 (p0, __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
> + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsdavaxq_p_s32 (p0, __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
> +
> +#define __arm_vmlsdavaq(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
> + __typeof(p2) __p2 = (p2); \
> + _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
> + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmlsdavaq_s8(p0, __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t)), \
> + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsdavaq_s16(p0, __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
> + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsdavaq_s32(p0, __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)));})
> +
> +#define __arm_vmlsdavaxq(p0,p1,p2) ({ __typeof(p2) __p2 = (p2); \
> + __typeof(p1) __p1 = (p1); \
> + _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
> + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmlsdavaxq_s8(p0, __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t)), \
> + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsdavaxq_s16(p0, __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
> + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsdavaxq_s32(p0, __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)));})
>
> -#define __arm_vmlsdavaq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
> +#define __arm_vmlsdavq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> __typeof(p1) __p1 = (p1); \
> + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
> + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmlsdavq_p_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
> + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsdavq_p_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
> + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsdavq_p_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2));})
> +
> +#define __arm_vmlsdavxq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> + __typeof(p1) __p1 = (p1); \
> + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
> + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmlsdavxq_p_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
> + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsdavxq_p_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
> + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsdavxq_p_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2));})
> +
> +#define __arm_vmlsdavaq_p(p0,p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
> __typeof(p2) __p2 = (p2); \
> _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
> - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmlsdavaq_p_s8(__p0, __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
> - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsdavaq_p_s16(__p0, __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
> - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsdavaq_p_s32(__p0, __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
> + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmlsdavaq_p_s8(p0, __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
> + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsdavaq_p_s16(p0, __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
> + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsdavaq_p_s32(p0, __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
>
> #define __arm_vmladavaxq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
> __typeof(p1) __p1 = (p1); \
> @@ -41445,8 +41270,8 @@ extern void *__ARM_undef;
>
> #define __arm_viwdupq_u16(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
> - int (*)[__ARM_mve_type_int_n]: __arm_viwdupq_n_u16 (__ARM_mve_coerce(__p0, uint32_t), p1, p2), \
> - int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_viwdupq_wb_u16 (__ARM_mve_coerce(__p0, uint32_t *), p1, p2));})
> + int (*)[__ARM_mve_type_int_n]: __arm_viwdupq_n_u16 (__ARM_mve_coerce(__p0, uint32_t), p1, (const int) p2), \
> + int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_viwdupq_wb_u16 (__ARM_mve_coerce(__p0, uint32_t *), p1, (const int) p2));})
>
> #define __arm_viwdupq_u32(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
> @@ -41628,16 +41453,6 @@ extern void *__ARM_undef;
> int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vmaxavq_p_s16 (__p0, __ARM_mve_coerce(__p1, int16x8_t), p2), \
> int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vmaxavq_p_s32 (__p0, __ARM_mve_coerce(__p1, int32x4_t), p2));})
>
> -#define __arm_vmaxq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
> - __typeof(p2) __p2 = (p2); \
> - _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
> - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmaxq_x_s8 (__ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
> - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmaxq_x_s16 (__ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
> - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmaxq_x_s32 (__ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
> - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmaxq_x_u8( __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
> - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmaxq_x_u16( __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
> - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmaxq_x_u32( __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
> -
> #define __arm_vmaxvq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
> __typeof(p1) __p1 = (p1); \
> _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
> @@ -41672,6 +41487,16 @@ extern void *__ARM_undef;
> int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vminavq_p_s16 (__p0, __ARM_mve_coerce(__p1, int16x8_t), p2), \
> int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vminavq_p_s32 (__p0, __ARM_mve_coerce(__p1, int32x4_t), p2));})
>
> +#define __arm_vmaxq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
> + __typeof(p2) __p2 = (p2); \
> + _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
> + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmaxq_x_s8 (__ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
> + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmaxq_x_s16 (__ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
> + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmaxq_x_s32 (__ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
> + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmaxq_x_u8 (__ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
> + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmaxq_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
> + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmaxq_x_u32 (__ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
> +
> #define __arm_vminq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
> __typeof(p2) __p2 = (p2); \
> _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
> @@ -41810,22 +41635,6 @@ extern void *__ARM_undef;
> int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlaldavxq_p_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
> int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlaldavxq_p_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2));})
>
> -#define __arm_vmlsdavaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> - __typeof(p1) __p1 = (p1); \
> - __typeof(p2) __p2 = (p2); \
> - _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
> - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmlsdavaq_s8(__p0, __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t)), \
> - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsdavaq_s16(__p0, __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
> - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsdavaq_s32(__p0, __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)));})
> -
> -#define __arm_vmlsdavaxq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> - __typeof(p1) __p1 = (p1); \
> - __typeof(p2) __p2 = (p2); \
> - _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
> - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmlsdavaxq_s8(__p0, __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t)), \
> - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsdavaxq_s16(__p0, __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
> - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsdavaxq_s32(__p0, __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)));})
> -
> #define __arm_vmlsdavq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
> __typeof(p1) __p1 = (p1); \
> _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
> @@ -41833,13 +41642,6 @@ extern void *__ARM_undef;
> int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsdavq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
> int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsdavq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));})
>
> -#define __arm_vmlsdavq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> - __typeof(p1) __p1 = (p1); \
> - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
> - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmlsdavq_p_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
> - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsdavq_p_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
> - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsdavq_p_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2));})
> -
> #define __arm_vmlsdavxq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
> __typeof(p1) __p1 = (p1); \
> _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
> @@ -41847,13 +41649,6 @@ extern void *__ARM_undef;
> int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsdavxq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
> int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsdavxq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));})
>
> -#define __arm_vmlsdavxq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> - __typeof(p1) __p1 = (p1); \
> - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
> - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmlsdavxq_p_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
> - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsdavxq_p_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
> - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsdavxq_p_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2));})
> -
> #define __arm_vmlsldavaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> __typeof(p1) __p1 = (p1); \
> __typeof(p2) __p2 = (p2); \
> @@ -41948,13 +41743,6 @@ extern void *__ARM_undef;
> int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmulltq_poly_x_p8 (__ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
> int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmulltq_poly_x_p16 (__ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3));})
>
> -#define __arm_vrmlaldavhaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> - __typeof(p1) __p1 = (p1); \
> - __typeof(p2) __p2 = (p2); \
> - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
> - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vrmlaldavhaq_s32 (__ARM_mve_coerce(__p0, int64_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)), \
> - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vrmlaldavhaq_u32 (__ARM_mve_coerce(__p0, uint64_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t)));})
> -
> #define __arm_vrmlaldavhaxq(p0,p1,p2) __arm_vrmlaldavhaxq_s32(p0,p1,p2)
>
> #define __arm_vrmlaldavhq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
> @@ -41994,35 +41782,15 @@ extern void *__ARM_undef;
> int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vstrbq_u16 (__ARM_mve_coerce(p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \
> int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrbq_u32 (__ARM_mve_coerce(p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t)));})
>
> -#define __arm_vstrbq_p(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
> - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
> - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16_t]: __arm_vstrbq_p_s8 (__ARM_mve_coerce(p0, int8_t *), __ARM_mve_coerce(__p1, int8x16_t), p2), \
> - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int16x8_t]: __arm_vstrbq_p_s16 (__ARM_mve_coerce(p0, int8_t *), __ARM_mve_coerce(__p1, int16x8_t), p2), \
> - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrbq_p_s32 (__ARM_mve_coerce(p0, int8_t *), __ARM_mve_coerce(__p1, int32x4_t), p2), \
> - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vstrbq_p_u8 (__ARM_mve_coerce(p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
> - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vstrbq_p_u16 (__ARM_mve_coerce(p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
> - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrbq_p_u32 (__ARM_mve_coerce(p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
> -
> -#define __arm_vstrbq_scatter_offset(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
> - __typeof(p2) __p2 = (p2); \
> - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
> - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]: __arm_vstrbq_scatter_offset_s8 (__ARM_mve_coerce(p0, int8_t *), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, int8x16_t)), \
> - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vstrbq_scatter_offset_s16 (__ARM_mve_coerce(p0, int8_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
> - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vstrbq_scatter_offset_s32 (__ARM_mve_coerce(p0, int8_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int32x4_t)), \
> - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vstrbq_scatter_offset_u8 (__ARM_mve_coerce(p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t)), \
> - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vstrbq_scatter_offset_u16 (__ARM_mve_coerce(p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t)), \
> - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vstrbq_scatter_offset_u32 (__ARM_mve_coerce(p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t)));})
> -
> -
> -#define __arm_vstrbq_scatter_offset_p(p0,p1,p2,p3) ({__typeof(p1) __p1 = (p1); \
> - __typeof(p2) __p2 = (p2); \
> - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
> - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]: __arm_vstrbq_scatter_offset_p_s8 (__ARM_mve_coerce(p0, int8_t *), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
> - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vstrbq_scatter_offset_p_s16 (__ARM_mve_coerce(p0, int8_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
> - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vstrbq_scatter_offset_p_s32 (__ARM_mve_coerce(p0, int8_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
> - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vstrbq_scatter_offset_p_u8 (__ARM_mve_coerce(p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
> - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vstrbq_scatter_offset_p_u16 (__ARM_mve_coerce(p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
> - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vstrbq_scatter_offset_p_u32 (__ARM_mve_coerce(p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
> +#define __arm_vstrbq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> + __typeof(p1) __p1 = (p1); \
> + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
> + int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16_t]: __arm_vstrbq_p_s8 (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, int8x16_t), p2), \
> + int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int16x8_t]: __arm_vstrbq_p_s16 (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, int16x8_t), p2), \
> + int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrbq_p_s32 (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, int32x4_t), p2), \
> + int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vstrbq_p_u8 (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
> + int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vstrbq_p_u16 (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
> + int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrbq_p_u32 (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
>
> #define __arm_vstrdq_scatter_base(p0,p1,p2) ({ __typeof(p2) __p2 = (p2); \
> _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
> @@ -42034,29 +41802,65 @@ extern void *__ARM_undef;
> int (*)[__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_base_p_s64 (p0, p1, __ARM_mve_coerce(__p2, int64x2_t), p3), \
> int (*)[__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_base_p_u64 (p0, p1, __ARM_mve_coerce(__p2, uint64x2_t), p3));})
>
> -#define __arm_vstrdq_scatter_offset(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
> +#define __arm_vrmlaldavhaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> + __typeof(p1) __p1 = (p1); \
> __typeof(p2) __p2 = (p2); \
> - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0, \
> - int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_offset_s64 (__ARM_mve_coerce(p0, int64_t *), __p1, __ARM_mve_coerce(__p2, int64x2_t)), \
> - int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_offset_u64 (__ARM_mve_coerce(p0, uint64_t *), __p1, __ARM_mve_coerce(__p2, uint64x2_t)));})
> + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
> + int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vrmlaldavhaq_s32 (__ARM_mve_coerce(__p0, int64_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)), \
> + int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vrmlaldavhaq_u32 (__ARM_mve_coerce(__p0, uint64_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t)));})
>
> -#define __arm_vstrdq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
> +#define __arm_vrmlaldavhaq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
> + __typeof(p1) __p1 = (p1); \
> __typeof(p2) __p2 = (p2); \
> - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0, \
> - int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_offset_p_s64 (__ARM_mve_coerce(p0, int64_t *), __p1, __ARM_mve_coerce(__p2, int64x2_t), p3), \
> - int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_offset_p_u64 (__ARM_mve_coerce(p0, uint64_t *), __p1, __ARM_mve_coerce(__p2, uint64x2_t), p3));})
> + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
> + int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vrmlaldavhaq_p_s32 (__ARM_mve_coerce(__p0, int64_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
> + int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vrmlaldavhaq_p_u32 (__ARM_mve_coerce(__p0, uint64_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
>
> -#define __arm_vstrdq_scatter_shifted_offset(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
> +#define __arm_vstrbq_scatter_offset(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> + __typeof(p1) __p1 = (p1); \
> __typeof(p2) __p2 = (p2); \
> - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0, \
> - int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_shifted_offset_s64 (__ARM_mve_coerce(p0, int64_t *), __p1, __ARM_mve_coerce(__p2, int64x2_t)), \
> - int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_shifted_offset_u64 (__ARM_mve_coerce(p0, uint64_t *), __p1, __ARM_mve_coerce(__p2, uint64x2_t)));})
> + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
> + int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]: __arm_vstrbq_scatter_offset_s8 (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, int8x16_t)), \
> + int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vstrbq_scatter_offset_s16 (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
> + int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vstrbq_scatter_offset_s32 (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int32x4_t)), \
> + int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vstrbq_scatter_offset_u8 (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t)), \
> + int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vstrbq_scatter_offset_u16 (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t)), \
> + int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vstrbq_scatter_offset_u32 (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t)));})
>
> -#define __arm_vstrdq_scatter_shifted_offset_p(p0,p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
> +#define __arm_vstrbq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
> + __typeof(p1) __p1 = (p1); \
> __typeof(p2) __p2 = (p2); \
> - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0, \
> - int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_shifted_offset_p_s64 (__ARM_mve_coerce(p0, int64_t *), __p1, __ARM_mve_coerce(__p2, int64x2_t), p3), \
> - int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_shifted_offset_p_u64 (__ARM_mve_coerce(p0, uint64_t *), __p1, __ARM_mve_coerce(__p2, uint64x2_t), p3));})
> + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
> + int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]: __arm_vstrbq_scatter_offset_p_s8 (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
> + int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vstrbq_scatter_offset_p_s16 (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
> + int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vstrbq_scatter_offset_p_s32 (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
> + int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vstrbq_scatter_offset_p_u8 (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
> + int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vstrbq_scatter_offset_p_u16 (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
> + int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vstrbq_scatter_offset_p_u32 (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
> +
> +#define __arm_vstrdq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
> + __typeof(p2) __p2 = (p2); \
> + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
> + int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_offset_p_s64 (__ARM_mve_coerce(__p0, int64_t *), p1, __ARM_mve_coerce(__p2, int64x2_t), p3), \
> + int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_offset_p_u64 (__ARM_mve_coerce(__p0, uint64_t *), p1, __ARM_mve_coerce(__p2, uint64x2_t), p3));})
> +
> +#define __arm_vstrdq_scatter_offset(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> + __typeof(p2) __p2 = (p2); \
> + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
> + int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_offset_s64 (__ARM_mve_coerce(__p0, int64_t *), p1, __ARM_mve_coerce(__p2, int64x2_t)), \
> + int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_offset_u64 (__ARM_mve_coerce(__p0, uint64_t *), p1, __ARM_mve_coerce(__p2, uint64x2_t)));})
> +
> +#define __arm_vstrdq_scatter_shifted_offset_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
> + __typeof(p2) __p2 = (p2); \
> + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
> + int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_shifted_offset_p_s64 (__ARM_mve_coerce(__p0, int64_t *), p1, __ARM_mve_coerce(__p2, int64x2_t), p3), \
> + int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_shifted_offset_p_u64 (__ARM_mve_coerce(__p0, uint64_t *), p1, __ARM_mve_coerce(__p2, uint64x2_t), p3));})
> +
> +#define __arm_vstrdq_scatter_shifted_offset(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> + __typeof(p2) __p2 = (p2); \
> + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
> + int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_shifted_offset_s64 (__ARM_mve_coerce(__p0, int64_t *), p1, __ARM_mve_coerce(__p2, int64x2_t)), \
> + int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_shifted_offset_u64 (__ARM_mve_coerce(__p0, uint64_t *), p1, __ARM_mve_coerce(__p2, uint64x2_t)));})
>
> #endif /* __cplusplus */
> #endif /* __ARM_FEATURE_MVE */
>
^ permalink raw reply [flat|nested] 4+ messages in thread
* RE: [GCC][PATCH] arm: Remove duplicate definitions from arm_mve.h (pr100419).
2021-05-05 10:15 ` Richard Earnshaw
@ 2021-05-05 12:39 ` Srinath Parvathaneni
2021-05-10 14:18 ` Richard Earnshaw
0 siblings, 1 reply; 4+ messages in thread
From: Srinath Parvathaneni @ 2021-05-05 12:39 UTC (permalink / raw)
To: Richard Earnshaw, gcc-patches
Cc: Srinath Parvathaneni, Kyrylo Tkachov, Richard Earnshaw
Hi Richard,
> -----Original Message-----
> From: Richard Earnshaw <Richard.Earnshaw@foss.arm.com>
> Sent: 05 May 2021 11:15
> To: Srinath Parvathaneni <Srinath.Parvathaneni@arm.com>; gcc-
> patches@gcc.gnu.org
> Cc: Richard Earnshaw <Richard.Earnshaw@arm.com>
> Subject: Re: [GCC][PATCH] arm: Remove duplicate definitions from
> arm_mve.h (pr100419).
>
>
>
> On 05/05/2021 10:56, Srinath Parvathaneni via Gcc-patches wrote:
> > Hi All,
> >
> > This patch removes several duplicated intrinsic definitions from
> > arm_mve.h mentioned in PR100419 and also fixes the wrong arguments
> > in few of intrinsics polymorphic variants.
> >
> > Regression tested and found no issues.
> >
> > Ok for master ? GCC-11 and GCC-10 branch backports?
> > gcc/ChangeLog:
> >
> > 2021-05-04 Srinath Parvathaneni <srinath.parvathaneni@arm.com>
> >
> > PR target/100419
> > * config/arm/arm_mve.h (__arm_vstrwq_scatter_offset): Fix wrong
> arguments.
> > (__arm_vcmpneq): Remove duplicate definition.
> > (__arm_vstrwq_scatter_offset_p): Likewise.
> > (__arm_vmaxq_x): Likewise.
> > (__arm_vmlsdavaq): Likewise.
> > (__arm_vmlsdavaxq): Likewise.
> > (__arm_vmlsdavq_p): Likewise.
> > (__arm_vmlsdavxq_p): Likewise.
> > (__arm_vrmlaldavhaq): Likewise.
> > (__arm_vstrbq_p): Likewise.
> > (__arm_vstrbq_scatter_offset): Likewise.
> > (__arm_vstrbq_scatter_offset_p): Likewise.
> > (__arm_vstrdq_scatter_offset): Likewise.
> > (__arm_vstrdq_scatter_offset_p): Likewise.
> > (__arm_vstrdq_scatter_shifted_offset): Likewise.
> > (__arm_vstrdq_scatter_shifted_offset_p): Likewise.
> >
> > Co-authored-by: Joe Ramsay <joe.ramsay@arm.com>
>
> Let's take this example:
>
> -#define __arm_vstrwq_scatter_offset(p0,p1,p2) ({ __typeof(p1) __p1 =
> (p1); \
> +#define __arm_vstrwq_scatter_offset(p0,p1,p2) ({ __typeof(p0) __p0 =
> (p0); \
> __typeof(p2) __p2 = (p2); \
> - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0, \
> - int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]:
> __arm_vstrwq_scatter_offset_s32 (__ARM_mve_coerce(p0, int32_t *), __p1,
> __ARM_mve_coerce(__p2, int32x4_t)), \
> - int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]:
> __arm_vstrwq_scatter_offset_u32 (__ARM_mve_coerce(p0, uint32_t *),
> __p1,
> __ARM_mve_coerce(__p2, uint32x4_t)));})
> + _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
> + int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]:
> __arm_vstrwq_scatter_offset_s32 (__ARM_mve_coerce(__p0, int32_t *), p1,
> __ARM_mve_coerce(__p2, int32x4_t)), \
> + int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]:
> __arm_vstrwq_scatter_offset_u32 (__ARM_mve_coerce(__p0, uint32_t *),
> p1,
> __ARM_mve_coerce(__p2, uint32x4_t)));})
>
> It removes the safe shadow copy of p1 but adds a safe shadow copy of p0.
> Why? Isn't it better (and safer) to just create shadow copies of all
> the arguments and let the compiler worry about when it's safe to
> eliminate them?
As you already know polymorphic variants are used to select the intrinsics based on type of their arguments.
Consider the following code from arm_mve.h:
__extension__ extern __inline void
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vstrwq_scatter_offset_s32 (int32_t * __base, uint32x4_t __offset, int32x4_t __value)
{
__builtin_mve_vstrwq_scatter_offset_sv4si ((__builtin_neon_si *) __base, __offset, __value);
}
__extension__ extern __inline void
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vstrwq_scatter_offset_u32 (uint32_t * __base, uint32x4_t __offset, uint32x4_t __value)
{
__builtin_mve_vstrwq_scatter_offset_uv4si ((__builtin_neon_si *) __base, __offset, __value);
}
__extension__ extern __inline void
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vstrwq_scatter_offset_f32 (float32_t * __base, uint32x4_t __offset, float32x4_t __value)
{
__builtin_mve_vstrwq_scatter_offset_fv4sf ((__builtin_neon_si *) __base, __offset, __value);
}
Of above 3 functions, which function is to be called from the following polymorphic variant is
decided based on type of arguments p0, p1 and p2.
#define __arm_vstrwq_scatter_offset(p0,p1,p2)
For the 3 function definitions mentioned above, only type of arguments 1 (p0) and 3 (p2) varies
whereas type of second argument (p1) is same (uint32x4_t).
This is the reason we need only shadow copy of p0 and p2 to determine the actual function to be called
and type of p1 is irrelevant. Previously p1 was wrongly used to determine the function instead of p0
and that is a bug, which got fixed in this patch.
Since type of p1 is irrelevant in deciding the function to be called and I believe adding shadow copy
for p1 (__typeof(p1) __p1 = (p1) ) in this macro expansion is of no use. Considering we have more than
250 polymorphic variants defined in arm_mve.h headers, this results in more than 250 lines of extra code.
Regards,
Srinath.
> R.
>
> >
> >
> > ############### Attachment also inlined for ease of reply
> ###############
> >
> >
> > diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
> > index
> 3a40c6e68161b64319b071f57a5b0d8393303cfd..dc1d874a6366eb5fe755a70c
> 72ed371c915bd04b 100644
> > --- a/gcc/config/arm/arm_mve.h
> > +++ b/gcc/config/arm/arm_mve.h
> > @@ -37808,33 +37808,19 @@ extern void *__ARM_undef;
> > int (*)[__ARM_mve_type_uint32x4_t]:
> __arm_vstrwq_scatter_base_p_u32(p0, p1, __ARM_mve_coerce(__p2,
> uint32x4_t), p3), \
> > int (*)[__ARM_mve_type_float32x4_t]:
> __arm_vstrwq_scatter_base_p_f32(p0, p1, __ARM_mve_coerce(__p2,
> float32x4_t), p3));})
> >
> > -#define __arm_vstrwq_scatter_offset(p0,p1,p2) ({ __typeof(p1) __p1 =
> (p1); \
> > +#define __arm_vstrwq_scatter_offset(p0,p1,p2) ({ __typeof(p0) __p0 =
> (p0); \
> > __typeof(p2) __p2 = (p2); \
> > - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0,
> \
> > - int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]:
> __arm_vstrwq_scatter_offset_s32 (__ARM_mve_coerce(p0, int32_t *), __p1,
> __ARM_mve_coerce(__p2, int32x4_t)), \
> > - int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]:
> __arm_vstrwq_scatter_offset_u32 (__ARM_mve_coerce(p0, uint32_t *),
> __p1, __ARM_mve_coerce(__p2, uint32x4_t)), \
> > - int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4_t]:
> __arm_vstrwq_scatter_offset_f32 (__ARM_mve_coerce(p0, float32_t *),
> __p1, __ARM_mve_coerce(__p2, float32x4_t)));})
> > + _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
> > + int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]:
> __arm_vstrwq_scatter_offset_s32 (__ARM_mve_coerce(__p0, int32_t *), p1,
> __ARM_mve_coerce(__p2, int32x4_t)), \
> > + int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]:
> __arm_vstrwq_scatter_offset_u32 (__ARM_mve_coerce(__p0, uint32_t *),
> p1, __ARM_mve_coerce(__p2, uint32x4_t)), \
> > + int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4_t]:
> __arm_vstrwq_scatter_offset_f32 (__ARM_mve_coerce(__p0, float32_t *),
> p1, __ARM_mve_coerce(__p2, float32x4_t)));})
> >
> > -#define __arm_vstrwq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p1) __p1
> = (p1); \
> > +#define __arm_vstrwq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p0)
> __p0 = (p0); \
> > __typeof(p2) __p2 = (p2); \
> > - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0,
> \
> > - int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]:
> __arm_vstrwq_scatter_offset_p_s32 (__ARM_mve_coerce(p0, int32_t *),
> __p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
> > - int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]:
> __arm_vstrwq_scatter_offset_p_u32 (__ARM_mve_coerce(p0, uint32_t *),
> __p1, __ARM_mve_coerce(__p2, uint32x4_t), p3), \
> > - int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4_t]:
> __arm_vstrwq_scatter_offset_p_f32 (__ARM_mve_coerce(p0, float32_t *),
> __p1, __ARM_mve_coerce(__p2, float32x4_t), p3));})
> > -
> > -#define __arm_vstrwq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p1) __p1
> = (p1); \
> > - __typeof(p2) __p2 = (p2); \
> > - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0,
> \
> > - int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]:
> __arm_vstrwq_scatter_offset_p_s32 (__ARM_mve_coerce(p0, int32_t *),
> __p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
> > - int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]:
> __arm_vstrwq_scatter_offset_p_u32 (__ARM_mve_coerce(p0, uint32_t *),
> __p1, __ARM_mve_coerce(__p2, uint32x4_t), p3), \
> > - int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4_t]:
> __arm_vstrwq_scatter_offset_p_f32 (__ARM_mve_coerce(p0, float32_t *),
> __p1, __ARM_mve_coerce(__p2, float32x4_t), p3));})
> > -
> > -#define __arm_vstrwq_scatter_offset(p0,p1,p2) ({ __typeof(p1) __p1 =
> (p1); \
> > - __typeof(p2) __p2 = (p2); \
> > - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0,
> \
> > - int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]:
> __arm_vstrwq_scatter_offset_s32 (__ARM_mve_coerce(p0, int32_t *), __p1,
> __ARM_mve_coerce(__p2, int32x4_t)), \
> > - int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]:
> __arm_vstrwq_scatter_offset_u32 (__ARM_mve_coerce(p0, uint32_t *),
> __p1, __ARM_mve_coerce(__p2, uint32x4_t)), \
> > - int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4_t]:
> __arm_vstrwq_scatter_offset_f32 (__ARM_mve_coerce(p0, float32_t *),
> __p1, __ARM_mve_coerce(__p2, float32x4_t)));})
> > + _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
> > + int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]:
> __arm_vstrwq_scatter_offset_p_s32 (__ARM_mve_coerce(__p0, int32_t *),
> p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
> > + int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]:
> __arm_vstrwq_scatter_offset_p_u32 (__ARM_mve_coerce(__p0, uint32_t *),
> p1, __ARM_mve_coerce(__p2, uint32x4_t), p3), \
> > + int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4_t]:
> __arm_vstrwq_scatter_offset_p_f32 (__ARM_mve_coerce(__p0, float32_t *),
> p1, __ARM_mve_coerce(__p2, float32x4_t), p3));})
> >
> > #define __arm_vstrwq_scatter_shifted_offset(p0,p1,p2) ({ __typeof(p1)
> __p1 = (p1); \
> > __typeof(p2) __p2 = (p2); \
> > @@ -38422,6 +38408,12 @@ extern void *__ARM_undef;
> > #define __arm_vcmpneq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
> > __typeof(p1) __p1 = (p1); \
> > _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
> > + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]:
> __arm_vcmpneq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int8_t)), \
> > + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]:
> __arm_vcmpneq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int16_t)), \
> > + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]:
> __arm_vcmpneq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce(__p1, int32_t)), \
> > + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]:
> __arm_vcmpneq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, uint8_t)), \
> > + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]:
> __arm_vcmpneq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint16_t)), \
> > + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]:
> __arm_vcmpneq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t),
> __ARM_mve_coerce(__p1, uint32_t)), \
> > int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vcmpneq_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int8x16_t)), \
> > int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vcmpneq_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t)), \
> > int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vcmpneq_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t)), \
> > @@ -38871,23 +38863,6 @@ extern void *__ARM_undef;
> > int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]:
> __arm_vcmpeqq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint16_t)), \
> > int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]:
> __arm_vcmpeqq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t),
> __ARM_mve_coerce(__p1, uint32_t)));})
> >
> > -#define __arm_vcmpneq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
> > - __typeof(p1) __p1 = (p1); \
> > - _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
> > - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vcmpneq_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int8x16_t)), \
> > - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vcmpneq_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t)), \
> > - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vcmpneq_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t)), \
> > - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]:
> __arm_vcmpneq_u8 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, uint8x16_t)), \
> > - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]:
> __arm_vcmpneq_u16 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint16x8_t)), \
> > - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]:
> __arm_vcmpneq_u32 (__ARM_mve_coerce(__p0, uint32x4_t),
> __ARM_mve_coerce(__p1, uint32x4_t)), \
> > - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]:
> __arm_vcmpneq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int8_t)), \
> > - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]:
> __arm_vcmpneq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int16_t)), \
> > - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]:
> __arm_vcmpneq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce(__p1, int32_t)), \
> > - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]:
> __arm_vcmpneq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, uint8_t)), \
> > - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]:
> __arm_vcmpneq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint16_t)), \
> > - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]:
> __arm_vcmpneq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t),
> __ARM_mve_coerce(__p1, uint32_t)));})
> > -
> > -
> > #define __arm_vqmovntq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
> > __typeof(p1) __p1 = (p1); \
> > _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
> > @@ -39036,22 +39011,6 @@ extern void *__ARM_undef;
> > int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]:
> __arm_vcmpneq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint16x8_t), p2), \
> > int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]:
> __arm_vcmpneq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t),
> __ARM_mve_coerce(__p1, uint32x4_t), p2));})
> >
> > -#define __arm_vcmpneq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
> > - __typeof(p1) __p1 = (p1); \
> > - _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
> > - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vcmpneq_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int8x16_t)), \
> > - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vcmpneq_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t)), \
> > - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vcmpneq_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t)), \
> > - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]:
> __arm_vcmpneq_u8 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, uint8x16_t)), \
> > - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]:
> __arm_vcmpneq_u16 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint16x8_t)), \
> > - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]:
> __arm_vcmpneq_u32 (__ARM_mve_coerce(__p0, uint32x4_t),
> __ARM_mve_coerce(__p1, uint32x4_t)), \
> > - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]:
> __arm_vcmpneq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int8_t)), \
> > - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]:
> __arm_vcmpneq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int16_t)), \
> > - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]:
> __arm_vcmpneq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce(__p1, int32_t)), \
> > - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]:
> __arm_vcmpneq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, uint8_t)), \
> > - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]:
> __arm_vcmpneq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint16_t)), \
> > - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]:
> __arm_vcmpneq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t),
> __ARM_mve_coerce(__p1, uint32_t)));})
> > -
> > #define __arm_vshlcq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> > _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
> > int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlcq_s8
> (__ARM_mve_coerce(__p0, int8x16_t), p1, p2), \
> > @@ -39367,52 +39326,6 @@ extern void *__ARM_undef;
> > int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vminaq_m_s16 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2), \
> > int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vminaq_m_s32 (__ARM_mve_coerce(__p0, uint32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2));})
> >
> > -#define __arm_vrmlaldavhaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> > - __typeof(p1) __p1 = (p1); \
> > - _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
> > - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vcmpltq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int8x16_t), p2), \
> > - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vcmpltq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2), \
> > - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vcmpltq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2), \
> > - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]:
> __arm_vcmpltq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int8_t), p2), \
> > - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]:
> __arm_vcmpltq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int16_t), p2), \
> > - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]:
> __arm_vcmpltq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce(__p1, int32_t), p2));})
> > -
> > -#define __arm_vmlsdavxq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> > - __typeof(p1) __p1 = (p1); \
> > - _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
> > - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vcmpleq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int8x16_t), p2), \
> > - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vcmpleq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2), \
> > - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vcmpleq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2), \
> > - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]:
> __arm_vcmpleq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int8_t), p2), \
> > - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]:
> __arm_vcmpleq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int16_t), p2), \
> > - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]:
> __arm_vcmpleq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce(__p1, int32_t), p2));})
> > -
> > -#define __arm_vmlsdavq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> > - __typeof(p1) __p1 = (p1); \
> > - _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
> > - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vcmpgtq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int8x16_t), p2), \
> > - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vcmpgtq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2), \
> > - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vcmpgtq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2), \
> > - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]:
> __arm_vcmpgtq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int8_t), p2), \
> > - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]:
> __arm_vcmpgtq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int16_t), p2), \
> > - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]:
> __arm_vcmpgtq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce(__p1, int32_t), p2));})
> > -
> > -#define __arm_vmlsdavaxq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> > - __typeof(p1) __p1 = (p1); \
> > - _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
> > - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]:
> __arm_vshrntq_n_s16 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2), \
> > - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]:
> __arm_vshrntq_n_s32 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2), \
> > - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]:
> __arm_vshrntq_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, uint16x8_t), p2), \
> > - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]:
> __arm_vshrntq_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint32x4_t), p2));})
> > -
> > -#define __arm_vmlsdavaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> > - __typeof(p1) __p1 = (p1); \
> > - _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
> > - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]:
> __arm_vrshrntq_n_s16 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2), \
> > - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]:
> __arm_vrshrntq_n_s32 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2), \
> > - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]:
> __arm_vrshrntq_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, uint16x8_t), p2), \
> > - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]:
> __arm_vrshrntq_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint32x4_t), p2));})
> > -
> > #define __arm_vmovlbq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> > __typeof(p1) __p1 = (p1); \
> > _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
> > @@ -39711,26 +39624,6 @@ extern void *__ARM_undef;
> > int
> (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_m
> ve_type_uint16x8_t]: __arm_vmulq_m_u16 (__ARM_mve_coerce(__p0,
> uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t),
> __ARM_mve_coerce(__p2, uint16x8_t), p3), \
> > int
> (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_m
> ve_type_uint32x4_t]: __arm_vmulq_m_u32 (__ARM_mve_coerce(__p0,
> uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t),
> __ARM_mve_coerce(__p2, uint32x4_t), p3));})
> >
> > -#define __arm_vstrbq(p0,p1) ({ __typeof(p1) __p1 = (p1); \
> > - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0,
> \
> > - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16_t]:
> __arm_vstrbq_s8 (__ARM_mve_coerce(p0, int8_t *),
> __ARM_mve_coerce(__p1, int8x16_t)), \
> > - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int16x8_t]:
> __arm_vstrbq_s16 (__ARM_mve_coerce(p0, int8_t *),
> __ARM_mve_coerce(__p1, int16x8_t)), \
> > - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int32x4_t]:
> __arm_vstrbq_s32 (__ARM_mve_coerce(p0, int8_t *),
> __ARM_mve_coerce(__p1, int32x4_t)), \
> > - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]:
> __arm_vstrbq_u8 (__ARM_mve_coerce(p0, uint8_t *),
> __ARM_mve_coerce(__p1, uint8x16_t)), \
> > - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]:
> __arm_vstrbq_u16 (__ARM_mve_coerce(p0, uint8_t *),
> __ARM_mve_coerce(__p1, uint16x8_t)), \
> > - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]:
> __arm_vstrbq_u32 (__ARM_mve_coerce(p0, uint8_t *),
> __ARM_mve_coerce(__p1, uint32x4_t)));})
> > -
> > -#define __arm_vstrbq_scatter_offset(p0,p1,p2) ({ __typeof(p0) __p0 =
> (p0); \
> > - __typeof(p1) __p1 = (p1); \
> > - __typeof(p2) __p2 = (p2); \
> > - _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typ
> eid(__p2)])0, \
> > - int
> (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_m
> ve_type_int8x16_t]: __arm_vstrbq_scatter_offset_s8
> (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint8x16_t),
> __ARM_mve_coerce(__p2, int8x16_t)), \
> > - int
> (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_m
> ve_type_int16x8_t]: __arm_vstrbq_scatter_offset_s16
> (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint16x8_t),
> __ARM_mve_coerce(__p2, int16x8_t)), \
> > - int
> (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_m
> ve_type_int32x4_t]: __arm_vstrbq_scatter_offset_s32
> (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint32x4_t),
> __ARM_mve_coerce(__p2, int32x4_t)), \
> > - int
> (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_
> mve_type_uint8x16_t]: __arm_vstrbq_scatter_offset_u8
> (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1,
> uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t)), \
> > - int
> (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_
> mve_type_uint16x8_t]: __arm_vstrbq_scatter_offset_u16
> (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1,
> uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t)), \
> > - int
> (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_
> mve_type_uint32x4_t]: __arm_vstrbq_scatter_offset_u32
> (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1,
> uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t)));})
> > -
> > #define __arm_vstrwq_scatter_base(p0,p1,p2) ({ __typeof(p2) __p2 = (p2);
> \
> > _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
> > int (*)[__ARM_mve_type_int32x4_t]:
> __arm_vstrwq_scatter_base_s32(p0, p1, __ARM_mve_coerce(__p2,
> int32x4_t)), \
> > @@ -39745,27 +39638,6 @@ extern void *__ARM_undef;
> > int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]:
> __arm_vldrbq_gather_offset_u16 (__ARM_mve_coerce(p0, uint8_t const *),
> __ARM_mve_coerce(__p1, uint16x8_t)), \
> > int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]:
> __arm_vldrbq_gather_offset_u32 (__ARM_mve_coerce(p0, uint8_t const *),
> __ARM_mve_coerce(__p1, uint32x4_t)));})
> >
> > -#define __arm_vstrbq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> > - __typeof(p1) __p1 = (p1); \
> > - _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
> > - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16_t]:
> __arm_vstrbq_p_s8 (__ARM_mve_coerce(__p0, int8_t *),
> __ARM_mve_coerce(__p1, int8x16_t), p2), \
> > - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int16x8_t]:
> __arm_vstrbq_p_s16 (__ARM_mve_coerce(__p0, int8_t *),
> __ARM_mve_coerce(__p1, int16x8_t), p2), \
> > - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int32x4_t]:
> __arm_vstrbq_p_s32 (__ARM_mve_coerce(__p0, int8_t *),
> __ARM_mve_coerce(__p1, int32x4_t), p2), \
> > - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]:
> __arm_vstrbq_p_u8 (__ARM_mve_coerce(__p0, uint8_t *),
> __ARM_mve_coerce(__p1, uint8x16_t), p2), \
> > - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]:
> __arm_vstrbq_p_u16 (__ARM_mve_coerce(__p0, uint8_t *),
> __ARM_mve_coerce(__p1, uint16x8_t), p2), \
> > - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]:
> __arm_vstrbq_p_u32 (__ARM_mve_coerce(__p0, uint8_t *),
> __ARM_mve_coerce(__p1, uint32x4_t), p2));})
> > -
> > -#define __arm_vstrbq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p0) __p0
> = (p0); \
> > - __typeof(p1) __p1 = (p1); \
> > - __typeof(p2) __p2 = (p2); \
> > - _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typ
> eid(__p2)])0, \
> > - int
> (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_m
> ve_type_int8x16_t]: __arm_vstrbq_scatter_offset_p_s8
> (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint8x16_t),
> __ARM_mve_coerce(__p2, int8x16_t), p3), \
> > - int
> (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_m
> ve_type_int16x8_t]: __arm_vstrbq_scatter_offset_p_s16
> (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint16x8_t),
> __ARM_mve_coerce(__p2, int16x8_t), p3), \
> > - int
> (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_m
> ve_type_int32x4_t]: __arm_vstrbq_scatter_offset_p_s32
> (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint32x4_t),
> __ARM_mve_coerce(__p2, int32x4_t), p3), \
> > - int
> (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_
> mve_type_uint8x16_t]: __arm_vstrbq_scatter_offset_p_u8
> (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1,
> uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
> > - int
> (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_
> mve_type_uint16x8_t]: __arm_vstrbq_scatter_offset_p_u16
> (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1,
> uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
> > - int
> (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_
> mve_type_uint32x4_t]: __arm_vstrbq_scatter_offset_p_u32
> (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1,
> uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
> > -
> > #define __arm_vstrwq_scatter_base_p(p0,p1,p2,p3) ({ __typeof(p2) __p2
> = (p2); \
> > _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
> > int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_p_s32
> (p0, p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
> > @@ -39921,34 +39793,6 @@ extern void *__ARM_undef;
> > int (*)[__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_base_s64
> (p0, p1, __ARM_mve_coerce(__p2, int64x2_t)), \
> > int (*)[__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_base_u64
> (p0, p1, __ARM_mve_coerce(__p2, uint64x2_t)));})
> >
> > -#define __arm_vstrdq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p0) __p0
> = (p0); \
> > - __typeof(p1) __p1 = (p1); \
> > - __typeof(p2) __p2 = (p2); \
> > - _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
> > - int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]:
> __arm_vstrdq_scatter_offset_p_s64 (__ARM_mve_coerce(__p0, int64_t *),
> __p1, __ARM_mve_coerce(__p2, int64x2_t), p3), \
> > - int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]:
> __arm_vstrdq_scatter_offset_p_u64 (__ARM_mve_coerce(__p0, uint64_t *),
> __p1, __ARM_mve_coerce(__p2, uint64x2_t), p3));})
> > -
> > -#define __arm_vstrdq_scatter_offset(p0,p1,p2) ({ __typeof(p0) __p0 =
> (p0); \
> > - __typeof(p1) __p1 = (p1); \
> > - __typeof(p2) __p2 = (p2); \
> > - _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
> > - int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]:
> __arm_vstrdq_scatter_offset_s64 (__ARM_mve_coerce(__p0, int64_t *),
> __p1, __ARM_mve_coerce(__p2, int64x2_t)), \
> > - int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]:
> __arm_vstrdq_scatter_offset_u64 (__ARM_mve_coerce(__p0, uint64_t *),
> __p1, __ARM_mve_coerce(__p2, uint64x2_t)));})
> > -
> > -#define __arm_vstrdq_scatter_shifted_offset_p(p0,p1,p2,p3)
> ({ __typeof(p0) __p0 = (p0); \
> > - __typeof(p1) __p1 = (p1); \
> > - __typeof(p2) __p2 = (p2); \
> > - _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
> > - int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]:
> __arm_vstrdq_scatter_shifted_offset_p_s64 (__ARM_mve_coerce(__p0,
> int64_t *), __p1, __ARM_mve_coerce(__p2, int64x2_t), p3), \
> > - int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]:
> __arm_vstrdq_scatter_shifted_offset_p_u64 (__ARM_mve_coerce(__p0,
> uint64_t *), __p1, __ARM_mve_coerce(__p2, uint64x2_t), p3));})
> > -
> > -#define __arm_vstrdq_scatter_shifted_offset(p0,p1,p2) ({ __typeof(p0)
> __p0 = (p0); \
> > - __typeof(p1) __p1 = (p1); \
> > - __typeof(p2) __p2 = (p2); \
> > - _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
> > - int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]:
> __arm_vstrdq_scatter_shifted_offset_s64 (__ARM_mve_coerce(__p0,
> int64_t *), __p1, __ARM_mve_coerce(__p2, int64x2_t)), \
> > - int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]:
> __arm_vstrdq_scatter_shifted_offset_u64 (__ARM_mve_coerce(__p0,
> uint64_t *), __p1, __ARM_mve_coerce(__p2, uint64x2_t)));})
> > -
> > #define __arm_vstrhq_scatter_offset(p0,p1,p2) ({ __typeof(p1) __p1 =
> (p1); \
> > __typeof(p2) __p2 = (p2); \
> > _Generic( (int
> (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typei
> d(__p2)])0, \
> > @@ -39981,29 +39825,17 @@ extern void *__ARM_undef;
> > int
> (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_
> mve_type_uint16x8_t]: __arm_vstrhq_scatter_shifted_offset_p_u16
> (__ARM_mve_coerce(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t),
> __ARM_mve_coerce(__p2, uint16x8_t), p3), \
> > int
> (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_
> mve_type_uint32x4_t]: __arm_vstrhq_scatter_shifted_offset_p_u32
> (__ARM_mve_coerce(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t),
> __ARM_mve_coerce(__p2, uint32x4_t), p3));})
> >
> > -#define __arm_vstrwq_scatter_offset(p0,p1,p2) ({ __typeof(p1) __p1 =
> (p1); \
> > +#define __arm_vstrwq_scatter_offset(p0,p1,p2) ({ __typeof(p0) __p0 =
> (p0); \
> > __typeof(p2) __p2 = (p2); \
> > - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0,
> \
> > - int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]:
> __arm_vstrwq_scatter_offset_s32 (__ARM_mve_coerce(p0, int32_t *), __p1,
> __ARM_mve_coerce(__p2, int32x4_t)), \
> > - int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]:
> __arm_vstrwq_scatter_offset_u32 (__ARM_mve_coerce(p0, uint32_t *),
> __p1, __ARM_mve_coerce(__p2, uint32x4_t)));})
> > + _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
> > + int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]:
> __arm_vstrwq_scatter_offset_s32 (__ARM_mve_coerce(__p0, int32_t *), p1,
> __ARM_mve_coerce(__p2, int32x4_t)), \
> > + int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]:
> __arm_vstrwq_scatter_offset_u32 (__ARM_mve_coerce(__p0, uint32_t *),
> p1, __ARM_mve_coerce(__p2, uint32x4_t)));})
> >
> > -#define __arm_vstrwq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p1) __p1
> = (p1); \
> > +#define __arm_vstrwq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p0)
> __p0 = (p0); \
> > __typeof(p2) __p2 = (p2); \
> > - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0,
> \
> > - int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]:
> __arm_vstrwq_scatter_offset_p_s32 (__ARM_mve_coerce(p0, int32_t *),
> __p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
> > - int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]:
> __arm_vstrwq_scatter_offset_p_u32 (__ARM_mve_coerce(p0, uint32_t *),
> __p1, __ARM_mve_coerce(__p2, uint32x4_t)));})
> > -
> > -#define __arm_vstrwq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p1) __p1
> = (p1); \
> > - __typeof(p2) __p2 = (p2); \
> > - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0,
> \
> > - int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]:
> __arm_vstrwq_scatter_offset_p_s32 (__ARM_mve_coerce(p0, int32_t *),
> __p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
> > - int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]:
> __arm_vstrwq_scatter_offset_p_u32 (__ARM_mve_coerce(p0, uint32_t *),
> __p1, __ARM_mve_coerce(__p2, uint32x4_t), p3));})
> > -
> > -#define __arm_vstrwq_scatter_offset(p0,p1,p2) ({ __typeof(p1) __p1 =
> (p1); \
> > - __typeof(p2) __p2 = (p2); \
> > - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0,
> \
> > - int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]:
> __arm_vstrwq_scatter_offset_s32 (__ARM_mve_coerce(p0, int32_t *), __p1,
> __ARM_mve_coerce(__p2, int32x4_t)), \
> > - int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]:
> __arm_vstrwq_scatter_offset_u32 (__ARM_mve_coerce(p0, uint32_t *),
> __p1, __ARM_mve_coerce(__p2, uint32x4_t)));})
> > + _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
> > + int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]:
> __arm_vstrwq_scatter_offset_p_s32 (__ARM_mve_coerce(__p0, int32_t *),
> p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
> > + int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]:
> __arm_vstrwq_scatter_offset_p_u32 (__ARM_mve_coerce(__p0, uint32_t *),
> p1, __ARM_mve_coerce(__p2, uint32x4_t), p3));})
> >
> > #define __arm_vstrwq_scatter_shifted_offset(p0,p1,p2) ({ __typeof(p1)
> __p1 = (p1); \
> > __typeof(p2) __p2 = (p2); \
> > @@ -40160,32 +39992,6 @@ extern void *__ARM_undef;
> > int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]:
> __arm_veorq_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t),
> __ARM_mve_coerce(__p2, uint16x8_t), p3), \
> > int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]:
> __arm_veorq_x_u32 (__ARM_mve_coerce(__p1, uint32x4_t),
> __ARM_mve_coerce(__p2, uint32x4_t), p3));})
> >
> > -#define __arm_vmaxq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
> > - __typeof(p2) __p2 = (p2); \
> > - _Generic( (int
> (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
> > - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vmulq_x_s8 (__ARM_mve_coerce(__p1, int8x16_t),
> __ARM_mve_coerce(__p2, int8x16_t), p3), \
> > - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vmulq_x_s16 (__ARM_mve_coerce(__p1, int16x8_t),
> __ARM_mve_coerce(__p2, int16x8_t), p3), \
> > - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vmulq_x_s32 (__ARM_mve_coerce(__p1, int32x4_t),
> __ARM_mve_coerce(__p2, int32x4_t), p3), \
> > - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]:
> __arm_vmulq_x_n_s8 (__ARM_mve_coerce(__p1, int8x16_t),
> __ARM_mve_coerce(__p2, int8_t), p3), \
> > - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]:
> __arm_vmulq_x_n_s16 (__ARM_mve_coerce(__p1, int16x8_t),
> __ARM_mve_coerce(__p2, int16_t), p3), \
> > - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]:
> __arm_vmulq_x_n_s32 (__ARM_mve_coerce(__p1, int32x4_t),
> __ARM_mve_coerce(__p2, int32_t), p3), \
> > - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]:
> __arm_vmulq_x_u8 (__ARM_mve_coerce(__p1, uint8x16_t),
> __ARM_mve_coerce(__p2, uint8x16_t), p3), \
> > - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]:
> __arm_vmulq_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t),
> __ARM_mve_coerce(__p2, uint16x8_t), p3), \
> > - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]:
> __arm_vmulq_x_u32 (__ARM_mve_coerce(__p1, uint32x4_t),
> __ARM_mve_coerce(__p2, uint32x4_t), p3), \
> > - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]:
> __arm_vmulq_x_n_u8 (__ARM_mve_coerce(__p1, uint8x16_t),
> __ARM_mve_coerce(__p2, uint8_t), p3), \
> > - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]:
> __arm_vmulq_x_n_u16 (__ARM_mve_coerce(__p1, uint16x8_t),
> __ARM_mve_coerce(__p2, uint16_t), p3), \
> > - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]:
> __arm_vmulq_x_n_u32 (__ARM_mve_coerce(__p1, uint32x4_t),
> __ARM_mve_coerce(__p2, uint32_t), p3));})
> > -
> > -#define __arm_vminq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
> > - __typeof(p2) __p2 = (p2); \
> > - _Generic( (int
> (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
> > - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vminq_x_s8 (__ARM_mve_coerce(__p1, int8x16_t),
> __ARM_mve_coerce(__p2, int8x16_t), p3), \
> > - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vminq_x_s16 (__ARM_mve_coerce(__p1, int16x8_t),
> __ARM_mve_coerce(__p2, int16x8_t), p3), \
> > - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vminq_x_s32 (__ARM_mve_coerce(__p1, int32x4_t),
> __ARM_mve_coerce(__p2, int32x4_t), p3), \
> > - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]:
> __arm_vminq_x_u8 (__ARM_mve_coerce(__p1, uint8x16_t),
> __ARM_mve_coerce(__p2, uint8x16_t), p3), \
> > - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]:
> __arm_vminq_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t),
> __ARM_mve_coerce(__p2, uint16x8_t), p3), \
> > - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]:
> __arm_vminq_x_u32 (__ARM_mve_coerce(__p1, uint32x4_t),
> __ARM_mve_coerce(__p2, uint32x4_t), p3));})
> > -
> > #define __arm_vmovlbq_x(p1,p2) ({ __typeof(p1) __p1 = (p1); \
> > _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
> > int (*)[__ARM_mve_type_int8x16_t]: __arm_vmovlbq_x_s8
> (__ARM_mve_coerce(__p1, int8x16_t), p2), \
> > @@ -41013,13 +40819,6 @@ extern void *__ARM_undef;
> > int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vmlsldavaxq_p_s16(__p0, __ARM_mve_coerce(__p1, int16x8_t),
> __ARM_mve_coerce(__p2, int16x8_t), p3), \
> > int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vmlsldavaxq_p_s32(__p0, __ARM_mve_coerce(__p1, int32x4_t),
> __ARM_mve_coerce(__p2, int32x4_t), p3));})
> >
> > -#define __arm_vrmlaldavhaq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0);
> \
> > - __typeof(p1) __p1 = (p1); \
> > - __typeof(p2) __p2 = (p2); \
> > - _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typ
> eid(__p2)])0, \
> > - int
> (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t][__ARM_mve_typ
> e_int32x4_t]: __arm_vrmlaldavhaq_p_s32 (__ARM_mve_coerce(__p0,
> int64_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2,
> int32x4_t), p3), \
> > - int
> (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t][__ARM_mve_ty
> pe_uint32x4_t]: __arm_vrmlaldavhaq_p_u32 (__ARM_mve_coerce(__p0,
> uint64_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2,
> uint32x4_t), p3));})
> > -
> > #define __arm_vrmlaldavhaxq_p(p0,p1,p2,p3)
> __arm_vrmlaldavhaxq_p_s32(p0,p1,p2,p3)
> >
> > #define __arm_vrmlsldavhaq_p(p0,p1,p2,p3)
> __arm_vrmlsldavhaq_p_s32(p0,p1,p2,p3)
> > @@ -41343,21 +41142,47 @@ extern void *__ARM_undef;
> > int
> (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve
> _type_int16x8_t]: __arm_vqrdmladhxq_m_s16 (__ARM_mve_coerce(__p0,
> int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2,
> int16x8_t), p3), \
> > int
> (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve
> _type_int32x4_t]: __arm_vqrdmladhxq_m_s32 (__ARM_mve_coerce(__p0,
> int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2,
> int32x4_t), p3));})
> >
> > -#define __arm_vmlsdavaxq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
> > - __typeof(p1) __p1 = (p1); \
> > +#define __arm_vmlsdavaxq_p(p0,p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
> > __typeof(p2) __p2 = (p2); \
> > _Generic( (int
> (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
> > - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vmlsdavaxq_p_s8 (__p0, __ARM_mve_coerce(__p1, int8x16_t),
> __ARM_mve_coerce(__p2, int8x16_t), p3), \
> > - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vmlsdavaxq_p_s16 (__p0, __ARM_mve_coerce(__p1, int16x8_t),
> __ARM_mve_coerce(__p2, int16x8_t), p3), \
> > - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vmlsdavaxq_p_s32 (__p0, __ARM_mve_coerce(__p1, int32x4_t),
> __ARM_mve_coerce(__p2, int32x4_t), p3));})
> > + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vmlsdavaxq_p_s8 (p0, __ARM_mve_coerce(__p1, int8x16_t),
> __ARM_mve_coerce(__p2, int8x16_t), p3), \
> > + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vmlsdavaxq_p_s16 (p0, __ARM_mve_coerce(__p1, int16x8_t),
> __ARM_mve_coerce(__p2, int16x8_t), p3), \
> > + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vmlsdavaxq_p_s32 (p0, __ARM_mve_coerce(__p1, int32x4_t),
> __ARM_mve_coerce(__p2, int32x4_t), p3));})
> > +
> > +#define __arm_vmlsdavaq(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
> > + __typeof(p2) __p2 = (p2); \
> > + _Generic( (int
> (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
> > + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vmlsdavaq_s8(p0, __ARM_mve_coerce(__p1, int8x16_t),
> __ARM_mve_coerce(__p2, int8x16_t)), \
> > + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vmlsdavaq_s16(p0, __ARM_mve_coerce(__p1, int16x8_t),
> __ARM_mve_coerce(__p2, int16x8_t)), \
> > + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vmlsdavaq_s32(p0, __ARM_mve_coerce(__p1, int32x4_t),
> __ARM_mve_coerce(__p2, int32x4_t)));})
> > +
> > +#define __arm_vmlsdavaxq(p0,p1,p2) ({ __typeof(p2) __p2 = (p2); \
> > + __typeof(p1) __p1 = (p1); \
> > + _Generic( (int
> (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
> > + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vmlsdavaxq_s8(p0, __ARM_mve_coerce(__p1, int8x16_t),
> __ARM_mve_coerce(__p2, int8x16_t)), \
> > + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vmlsdavaxq_s16(p0, __ARM_mve_coerce(__p1, int16x8_t),
> __ARM_mve_coerce(__p2, int16x8_t)), \
> > + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vmlsdavaxq_s32(p0, __ARM_mve_coerce(__p1, int32x4_t),
> __ARM_mve_coerce(__p2, int32x4_t)));})
> >
> > -#define __arm_vmlsdavaq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
> > +#define __arm_vmlsdavq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> > __typeof(p1) __p1 = (p1); \
> > + _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
> > + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vmlsdavq_p_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int8x16_t), p2), \
> > + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vmlsdavq_p_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2), \
> > + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vmlsdavq_p_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2));})
> > +
> > +#define __arm_vmlsdavxq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> > + __typeof(p1) __p1 = (p1); \
> > + _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
> > + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vmlsdavxq_p_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int8x16_t), p2), \
> > + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vmlsdavxq_p_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2), \
> > + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vmlsdavxq_p_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2));})
> > +
> > +#define __arm_vmlsdavaq_p(p0,p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
> > __typeof(p2) __p2 = (p2); \
> > _Generic( (int
> (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
> > - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vmlsdavaq_p_s8(__p0, __ARM_mve_coerce(__p1, int8x16_t),
> __ARM_mve_coerce(__p2, int8x16_t), p3), \
> > - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vmlsdavaq_p_s16(__p0, __ARM_mve_coerce(__p1, int16x8_t),
> __ARM_mve_coerce(__p2, int16x8_t), p3), \
> > - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vmlsdavaq_p_s32(__p0, __ARM_mve_coerce(__p1, int32x4_t),
> __ARM_mve_coerce(__p2, int32x4_t), p3));})
> > + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vmlsdavaq_p_s8(p0, __ARM_mve_coerce(__p1, int8x16_t),
> __ARM_mve_coerce(__p2, int8x16_t), p3), \
> > + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vmlsdavaq_p_s16(p0, __ARM_mve_coerce(__p1, int16x8_t),
> __ARM_mve_coerce(__p2, int16x8_t), p3), \
> > + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vmlsdavaq_p_s32(p0, __ARM_mve_coerce(__p1, int32x4_t),
> __ARM_mve_coerce(__p2, int32x4_t), p3));})
> >
> > #define __arm_vmladavaxq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
> > __typeof(p1) __p1 = (p1); \
> > @@ -41445,8 +41270,8 @@ extern void *__ARM_undef;
> >
> > #define __arm_viwdupq_u16(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> > _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
> > - int (*)[__ARM_mve_type_int_n]: __arm_viwdupq_n_u16
> (__ARM_mve_coerce(__p0, uint32_t), p1, p2), \
> > - int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_viwdupq_wb_u16
> (__ARM_mve_coerce(__p0, uint32_t *), p1, p2));})
> > + int (*)[__ARM_mve_type_int_n]: __arm_viwdupq_n_u16
> (__ARM_mve_coerce(__p0, uint32_t), p1, (const int) p2), \
> > + int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_viwdupq_wb_u16
> (__ARM_mve_coerce(__p0, uint32_t *), p1, (const int) p2));})
> >
> > #define __arm_viwdupq_u32(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> > _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
> > @@ -41628,16 +41453,6 @@ extern void *__ARM_undef;
> > int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]:
> __arm_vmaxavq_p_s16 (__p0, __ARM_mve_coerce(__p1, int16x8_t), p2), \
> > int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]:
> __arm_vmaxavq_p_s32 (__p0, __ARM_mve_coerce(__p1, int32x4_t), p2));})
> >
> > -#define __arm_vmaxq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
> > - __typeof(p2) __p2 = (p2); \
> > - _Generic( (int
> (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
> > - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vmaxq_x_s8 (__ARM_mve_coerce(__p1, int8x16_t),
> __ARM_mve_coerce(__p2, int8x16_t), p3), \
> > - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vmaxq_x_s16 (__ARM_mve_coerce(__p1, int16x8_t),
> __ARM_mve_coerce(__p2, int16x8_t), p3), \
> > - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vmaxq_x_s32 (__ARM_mve_coerce(__p1, int32x4_t),
> __ARM_mve_coerce(__p2, int32x4_t), p3), \
> > - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]:
> __arm_vmaxq_x_u8( __ARM_mve_coerce(__p1, uint8x16_t),
> __ARM_mve_coerce(__p2, uint8x16_t), p3), \
> > - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]:
> __arm_vmaxq_x_u16( __ARM_mve_coerce(__p1, uint16x8_t),
> __ARM_mve_coerce(__p2, uint16x8_t), p3), \
> > - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]:
> __arm_vmaxq_x_u32( __ARM_mve_coerce(__p1, uint32x4_t),
> __ARM_mve_coerce(__p2, uint32x4_t), p3));})
> > -
> > #define __arm_vmaxvq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
> > __typeof(p1) __p1 = (p1); \
> > _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
> > @@ -41672,6 +41487,16 @@ extern void *__ARM_undef;
> > int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]:
> __arm_vminavq_p_s16 (__p0, __ARM_mve_coerce(__p1, int16x8_t), p2), \
> > int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]:
> __arm_vminavq_p_s32 (__p0, __ARM_mve_coerce(__p1, int32x4_t), p2));})
> >
> > +#define __arm_vmaxq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
> > + __typeof(p2) __p2 = (p2); \
> > + _Generic( (int
> (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
> > + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vmaxq_x_s8 (__ARM_mve_coerce(__p1, int8x16_t),
> __ARM_mve_coerce(__p2, int8x16_t), p3), \
> > + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vmaxq_x_s16 (__ARM_mve_coerce(__p1, int16x8_t),
> __ARM_mve_coerce(__p2, int16x8_t), p3), \
> > + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vmaxq_x_s32 (__ARM_mve_coerce(__p1, int32x4_t),
> __ARM_mve_coerce(__p2, int32x4_t), p3), \
> > + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]:
> __arm_vmaxq_x_u8 (__ARM_mve_coerce(__p1, uint8x16_t),
> __ARM_mve_coerce(__p2, uint8x16_t), p3), \
> > + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]:
> __arm_vmaxq_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t),
> __ARM_mve_coerce(__p2, uint16x8_t), p3), \
> > + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]:
> __arm_vmaxq_x_u32 (__ARM_mve_coerce(__p1, uint32x4_t),
> __ARM_mve_coerce(__p2, uint32x4_t), p3));})
> > +
> > #define __arm_vminq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
> > __typeof(p2) __p2 = (p2); \
> > _Generic( (int
> (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
> > @@ -41810,22 +41635,6 @@ extern void *__ARM_undef;
> > int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vmlaldavxq_p_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2), \
> > int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vmlaldavxq_p_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2));})
> >
> > -#define __arm_vmlsdavaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> > - __typeof(p1) __p1 = (p1); \
> > - __typeof(p2) __p2 = (p2); \
> > - _Generic( (int
> (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
> > - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vmlsdavaq_s8(__p0, __ARM_mve_coerce(__p1, int8x16_t),
> __ARM_mve_coerce(__p2, int8x16_t)), \
> > - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vmlsdavaq_s16(__p0, __ARM_mve_coerce(__p1, int16x8_t),
> __ARM_mve_coerce(__p2, int16x8_t)), \
> > - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vmlsdavaq_s32(__p0, __ARM_mve_coerce(__p1, int32x4_t),
> __ARM_mve_coerce(__p2, int32x4_t)));})
> > -
> > -#define __arm_vmlsdavaxq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> > - __typeof(p1) __p1 = (p1); \
> > - __typeof(p2) __p2 = (p2); \
> > - _Generic( (int
> (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
> > - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vmlsdavaxq_s8(__p0, __ARM_mve_coerce(__p1, int8x16_t),
> __ARM_mve_coerce(__p2, int8x16_t)), \
> > - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vmlsdavaxq_s16(__p0, __ARM_mve_coerce(__p1, int16x8_t),
> __ARM_mve_coerce(__p2, int16x8_t)), \
> > - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vmlsdavaxq_s32(__p0, __ARM_mve_coerce(__p1, int32x4_t),
> __ARM_mve_coerce(__p2, int32x4_t)));})
> > -
> > #define __arm_vmlsdavq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
> > __typeof(p1) __p1 = (p1); \
> > _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
> > @@ -41833,13 +41642,6 @@ extern void *__ARM_undef;
> > int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vmlsdavq_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t)), \
> > int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vmlsdavq_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t)));})
> >
> > -#define __arm_vmlsdavq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> > - __typeof(p1) __p1 = (p1); \
> > - _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
> > - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vmlsdavq_p_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int8x16_t), p2), \
> > - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vmlsdavq_p_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2), \
> > - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vmlsdavq_p_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2));})
> > -
> > #define __arm_vmlsdavxq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
> > __typeof(p1) __p1 = (p1); \
> > _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
> > @@ -41847,13 +41649,6 @@ extern void *__ARM_undef;
> > int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vmlsdavxq_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t)), \
> > int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vmlsdavxq_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t)));})
> >
> > -#define __arm_vmlsdavxq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> > - __typeof(p1) __p1 = (p1); \
> > - _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
> > - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vmlsdavxq_p_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int8x16_t), p2), \
> > - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vmlsdavxq_p_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2), \
> > - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vmlsdavxq_p_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2));})
> > -
> > #define __arm_vmlsldavaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> > __typeof(p1) __p1 = (p1); \
> > __typeof(p2) __p2 = (p2); \
> > @@ -41948,13 +41743,6 @@ extern void *__ARM_undef;
> > int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]:
> __arm_vmulltq_poly_x_p8 (__ARM_mve_coerce(__p1, uint8x16_t),
> __ARM_mve_coerce(__p2, uint8x16_t), p3), \
> > int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]:
> __arm_vmulltq_poly_x_p16 (__ARM_mve_coerce(__p1, uint16x8_t),
> __ARM_mve_coerce(__p2, uint16x8_t), p3));})
> >
> > -#define __arm_vrmlaldavhaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> > - __typeof(p1) __p1 = (p1); \
> > - __typeof(p2) __p2 = (p2); \
> > - _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typ
> eid(__p2)])0, \
> > - int
> (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t][__ARM_mve_typ
> e_int32x4_t]: __arm_vrmlaldavhaq_s32 (__ARM_mve_coerce(__p0, int64_t),
> __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)),
> \
> > - int
> (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t][__ARM_mve_ty
> pe_uint32x4_t]: __arm_vrmlaldavhaq_u32 (__ARM_mve_coerce(__p0,
> uint64_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2,
> uint32x4_t)));})
> > -
> > #define __arm_vrmlaldavhaxq(p0,p1,p2)
> __arm_vrmlaldavhaxq_s32(p0,p1,p2)
> >
> > #define __arm_vrmlaldavhq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
> > @@ -41994,35 +41782,15 @@ extern void *__ARM_undef;
> > int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]:
> __arm_vstrbq_u16 (__ARM_mve_coerce(p0, uint8_t *),
> __ARM_mve_coerce(__p1, uint16x8_t)), \
> > int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]:
> __arm_vstrbq_u32 (__ARM_mve_coerce(p0, uint8_t *),
> __ARM_mve_coerce(__p1, uint32x4_t)));})
> >
> > -#define __arm_vstrbq_p(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
> > - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0,
> \
> > - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16_t]:
> __arm_vstrbq_p_s8 (__ARM_mve_coerce(p0, int8_t *),
> __ARM_mve_coerce(__p1, int8x16_t), p2), \
> > - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int16x8_t]:
> __arm_vstrbq_p_s16 (__ARM_mve_coerce(p0, int8_t *),
> __ARM_mve_coerce(__p1, int16x8_t), p2), \
> > - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int32x4_t]:
> __arm_vstrbq_p_s32 (__ARM_mve_coerce(p0, int8_t *),
> __ARM_mve_coerce(__p1, int32x4_t), p2), \
> > - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]:
> __arm_vstrbq_p_u8 (__ARM_mve_coerce(p0, uint8_t *),
> __ARM_mve_coerce(__p1, uint8x16_t), p2), \
> > - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]:
> __arm_vstrbq_p_u16 (__ARM_mve_coerce(p0, uint8_t *),
> __ARM_mve_coerce(__p1, uint16x8_t), p2), \
> > - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]:
> __arm_vstrbq_p_u32 (__ARM_mve_coerce(p0, uint8_t *),
> __ARM_mve_coerce(__p1, uint32x4_t), p2));})
> > -
> > -#define __arm_vstrbq_scatter_offset(p0,p1,p2) ({ __typeof(p1) __p1 =
> (p1); \
> > - __typeof(p2) __p2 = (p2); \
> > - _Generic( (int
> (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typei
> d(__p2)])0, \
> > - int
> (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_m
> ve_type_int8x16_t]: __arm_vstrbq_scatter_offset_s8
> (__ARM_mve_coerce(p0, int8_t *), __ARM_mve_coerce(__p1, uint8x16_t),
> __ARM_mve_coerce(__p2, int8x16_t)), \
> > - int
> (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_m
> ve_type_int16x8_t]: __arm_vstrbq_scatter_offset_s16
> (__ARM_mve_coerce(p0, int8_t *), __ARM_mve_coerce(__p1, uint16x8_t),
> __ARM_mve_coerce(__p2, int16x8_t)), \
> > - int
> (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_m
> ve_type_int32x4_t]: __arm_vstrbq_scatter_offset_s32
> (__ARM_mve_coerce(p0, int8_t *), __ARM_mve_coerce(__p1, uint32x4_t),
> __ARM_mve_coerce(__p2, int32x4_t)), \
> > - int
> (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_
> mve_type_uint8x16_t]: __arm_vstrbq_scatter_offset_u8
> (__ARM_mve_coerce(p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t),
> __ARM_mve_coerce(__p2, uint8x16_t)), \
> > - int
> (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_
> mve_type_uint16x8_t]: __arm_vstrbq_scatter_offset_u16
> (__ARM_mve_coerce(p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t),
> __ARM_mve_coerce(__p2, uint16x8_t)), \
> > - int
> (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_
> mve_type_uint32x4_t]: __arm_vstrbq_scatter_offset_u32
> (__ARM_mve_coerce(p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t),
> __ARM_mve_coerce(__p2, uint32x4_t)));})
> > -
> > -
> > -#define __arm_vstrbq_scatter_offset_p(p0,p1,p2,p3) ({__typeof(p1) __p1
> = (p1); \
> > - __typeof(p2) __p2 = (p2); \
> > - _Generic( (int
> (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typei
> d(__p2)])0, \
> > - int
> (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_m
> ve_type_int8x16_t]: __arm_vstrbq_scatter_offset_p_s8
> (__ARM_mve_coerce(p0, int8_t *), __ARM_mve_coerce(__p1, uint8x16_t),
> __ARM_mve_coerce(__p2, int8x16_t), p3), \
> > - int
> (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_m
> ve_type_int16x8_t]: __arm_vstrbq_scatter_offset_p_s16
> (__ARM_mve_coerce(p0, int8_t *), __ARM_mve_coerce(__p1, uint16x8_t),
> __ARM_mve_coerce(__p2, int16x8_t), p3), \
> > - int
> (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_m
> ve_type_int32x4_t]: __arm_vstrbq_scatter_offset_p_s32
> (__ARM_mve_coerce(p0, int8_t *), __ARM_mve_coerce(__p1, uint32x4_t),
> __ARM_mve_coerce(__p2, int32x4_t), p3), \
> > - int
> (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_
> mve_type_uint8x16_t]: __arm_vstrbq_scatter_offset_p_u8
> (__ARM_mve_coerce(p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t),
> __ARM_mve_coerce(__p2, uint8x16_t), p3), \
> > - int
> (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_
> mve_type_uint16x8_t]: __arm_vstrbq_scatter_offset_p_u16
> (__ARM_mve_coerce(p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t),
> __ARM_mve_coerce(__p2, uint16x8_t), p3), \
> > - int
> (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_
> mve_type_uint32x4_t]: __arm_vstrbq_scatter_offset_p_u32
> (__ARM_mve_coerce(p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t),
> __ARM_mve_coerce(__p2, uint32x4_t), p3));})
> > +#define __arm_vstrbq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> > + __typeof(p1) __p1 = (p1); \
> > + _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
> > + int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16_t]:
> __arm_vstrbq_p_s8 (__ARM_mve_coerce(__p0, int8_t *),
> __ARM_mve_coerce(__p1, int8x16_t), p2), \
> > + int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int16x8_t]:
> __arm_vstrbq_p_s16 (__ARM_mve_coerce(__p0, int8_t *),
> __ARM_mve_coerce(__p1, int16x8_t), p2), \
> > + int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int32x4_t]:
> __arm_vstrbq_p_s32 (__ARM_mve_coerce(__p0, int8_t *),
> __ARM_mve_coerce(__p1, int32x4_t), p2), \
> > + int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]:
> __arm_vstrbq_p_u8 (__ARM_mve_coerce(__p0, uint8_t *),
> __ARM_mve_coerce(__p1, uint8x16_t), p2), \
> > + int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]:
> __arm_vstrbq_p_u16 (__ARM_mve_coerce(__p0, uint8_t *),
> __ARM_mve_coerce(__p1, uint16x8_t), p2), \
> > + int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]:
> __arm_vstrbq_p_u32 (__ARM_mve_coerce(__p0, uint8_t *),
> __ARM_mve_coerce(__p1, uint32x4_t), p2));})
> >
> > #define __arm_vstrdq_scatter_base(p0,p1,p2) ({ __typeof(p2) __p2 = (p2);
> \
> > _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
> > @@ -42034,29 +41802,65 @@ extern void *__ARM_undef;
> > int (*)[__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_base_p_s64
> (p0, p1, __ARM_mve_coerce(__p2, int64x2_t), p3), \
> > int (*)[__ARM_mve_type_uint64x2_t]:
> __arm_vstrdq_scatter_base_p_u64 (p0, p1, __ARM_mve_coerce(__p2,
> uint64x2_t), p3));})
> >
> > -#define __arm_vstrdq_scatter_offset(p0,p1,p2) ({ __typeof(p1) __p1 =
> (p1); \
> > +#define __arm_vrmlaldavhaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> > + __typeof(p1) __p1 = (p1); \
> > __typeof(p2) __p2 = (p2); \
> > - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0,
> \
> > - int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]:
> __arm_vstrdq_scatter_offset_s64 (__ARM_mve_coerce(p0, int64_t *), __p1,
> __ARM_mve_coerce(__p2, int64x2_t)), \
> > - int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]:
> __arm_vstrdq_scatter_offset_u64 (__ARM_mve_coerce(p0, uint64_t *), __p1,
> __ARM_mve_coerce(__p2, uint64x2_t)));})
> > + _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typ
> eid(__p2)])0, \
> > + int
> (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t][__ARM_mve_typ
> e_int32x4_t]: __arm_vrmlaldavhaq_s32 (__ARM_mve_coerce(__p0, int64_t),
> __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)),
> \
> > + int
> (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t][__ARM_mve_ty
> pe_uint32x4_t]: __arm_vrmlaldavhaq_u32 (__ARM_mve_coerce(__p0,
> uint64_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2,
> uint32x4_t)));})
> >
> > -#define __arm_vstrdq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p1) __p1
> = (p1); \
> > +#define __arm_vrmlaldavhaq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0);
> \
> > + __typeof(p1) __p1 = (p1); \
> > __typeof(p2) __p2 = (p2); \
> > - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0,
> \
> > - int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]:
> __arm_vstrdq_scatter_offset_p_s64 (__ARM_mve_coerce(p0, int64_t *),
> __p1, __ARM_mve_coerce(__p2, int64x2_t), p3), \
> > - int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]:
> __arm_vstrdq_scatter_offset_p_u64 (__ARM_mve_coerce(p0, uint64_t *),
> __p1, __ARM_mve_coerce(__p2, uint64x2_t), p3));})
> > + _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typ
> eid(__p2)])0, \
> > + int
> (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t][__ARM_mve_typ
> e_int32x4_t]: __arm_vrmlaldavhaq_p_s32 (__ARM_mve_coerce(__p0,
> int64_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2,
> int32x4_t), p3), \
> > + int
> (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t][__ARM_mve_ty
> pe_uint32x4_t]: __arm_vrmlaldavhaq_p_u32 (__ARM_mve_coerce(__p0,
> uint64_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2,
> uint32x4_t), p3));})
> >
> > -#define __arm_vstrdq_scatter_shifted_offset(p0,p1,p2) ({ __typeof(p1)
> __p1 = (p1); \
> > +#define __arm_vstrbq_scatter_offset(p0,p1,p2) ({ __typeof(p0) __p0 =
> (p0); \
> > + __typeof(p1) __p1 = (p1); \
> > __typeof(p2) __p2 = (p2); \
> > - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0,
> \
> > - int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]:
> __arm_vstrdq_scatter_shifted_offset_s64 (__ARM_mve_coerce(p0, int64_t
> *), __p1, __ARM_mve_coerce(__p2, int64x2_t)), \
> > - int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]:
> __arm_vstrdq_scatter_shifted_offset_u64 (__ARM_mve_coerce(p0, uint64_t
> *), __p1, __ARM_mve_coerce(__p2, uint64x2_t)));})
> > + _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typ
> eid(__p2)])0, \
> > + int
> (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_m
> ve_type_int8x16_t]: __arm_vstrbq_scatter_offset_s8
> (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint8x16_t),
> __ARM_mve_coerce(__p2, int8x16_t)), \
> > + int
> (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_m
> ve_type_int16x8_t]: __arm_vstrbq_scatter_offset_s16
> (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint16x8_t),
> __ARM_mve_coerce(__p2, int16x8_t)), \
> > + int
> (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_m
> ve_type_int32x4_t]: __arm_vstrbq_scatter_offset_s32
> (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint32x4_t),
> __ARM_mve_coerce(__p2, int32x4_t)), \
> > + int
> (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_
> mve_type_uint8x16_t]: __arm_vstrbq_scatter_offset_u8
> (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1,
> uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t)), \
> > + int
> (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_
> mve_type_uint16x8_t]: __arm_vstrbq_scatter_offset_u16
> (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1,
> uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t)), \
> > + int
> (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_
> mve_type_uint32x4_t]: __arm_vstrbq_scatter_offset_u32
> (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1,
> uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t)));})
> >
> > -#define __arm_vstrdq_scatter_shifted_offset_p(p0,p1,p2,p3)
> ({ __typeof(p1) __p1 = (p1); \
> > +#define __arm_vstrbq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p0) __p0
> = (p0); \
> > + __typeof(p1) __p1 = (p1); \
> > __typeof(p2) __p2 = (p2); \
> > - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0,
> \
> > - int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]:
> __arm_vstrdq_scatter_shifted_offset_p_s64 (__ARM_mve_coerce(p0,
> int64_t *), __p1, __ARM_mve_coerce(__p2, int64x2_t), p3), \
> > - int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]:
> __arm_vstrdq_scatter_shifted_offset_p_u64 (__ARM_mve_coerce(p0,
> uint64_t *), __p1, __ARM_mve_coerce(__p2, uint64x2_t), p3));})
> > + _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typ
> eid(__p2)])0, \
> > + int
> (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_m
> ve_type_int8x16_t]: __arm_vstrbq_scatter_offset_p_s8
> (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint8x16_t),
> __ARM_mve_coerce(__p2, int8x16_t), p3), \
> > + int
> (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_m
> ve_type_int16x8_t]: __arm_vstrbq_scatter_offset_p_s16
> (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint16x8_t),
> __ARM_mve_coerce(__p2, int16x8_t), p3), \
> > + int
> (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_m
> ve_type_int32x4_t]: __arm_vstrbq_scatter_offset_p_s32
> (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint32x4_t),
> __ARM_mve_coerce(__p2, int32x4_t), p3), \
> > + int
> (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_
> mve_type_uint8x16_t]: __arm_vstrbq_scatter_offset_p_u8
> (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1,
> uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
> > + int
> (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_
> mve_type_uint16x8_t]: __arm_vstrbq_scatter_offset_p_u16
> (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1,
> uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
> > + int
> (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_
> mve_type_uint32x4_t]: __arm_vstrbq_scatter_offset_p_u32
> (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1,
> uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
> > +
> > +#define __arm_vstrdq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p0) __p0
> = (p0); \
> > + __typeof(p2) __p2 = (p2); \
> > + _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
> > + int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]:
> __arm_vstrdq_scatter_offset_p_s64 (__ARM_mve_coerce(__p0, int64_t *),
> p1, __ARM_mve_coerce(__p2, int64x2_t), p3), \
> > + int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]:
> __arm_vstrdq_scatter_offset_p_u64 (__ARM_mve_coerce(__p0, uint64_t *),
> p1, __ARM_mve_coerce(__p2, uint64x2_t), p3));})
> > +
> > +#define __arm_vstrdq_scatter_offset(p0,p1,p2) ({ __typeof(p0) __p0 =
> (p0); \
> > + __typeof(p2) __p2 = (p2); \
> > + _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
> > + int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]:
> __arm_vstrdq_scatter_offset_s64 (__ARM_mve_coerce(__p0, int64_t *), p1,
> __ARM_mve_coerce(__p2, int64x2_t)), \
> > + int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]:
> __arm_vstrdq_scatter_offset_u64 (__ARM_mve_coerce(__p0, uint64_t *), p1,
> __ARM_mve_coerce(__p2, uint64x2_t)));})
> > +
> > +#define __arm_vstrdq_scatter_shifted_offset_p(p0,p1,p2,p3)
> ({ __typeof(p0) __p0 = (p0); \
> > + __typeof(p2) __p2 = (p2); \
> > + _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
> > + int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]:
> __arm_vstrdq_scatter_shifted_offset_p_s64 (__ARM_mve_coerce(__p0,
> int64_t *), p1, __ARM_mve_coerce(__p2, int64x2_t), p3), \
> > + int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]:
> __arm_vstrdq_scatter_shifted_offset_p_u64 (__ARM_mve_coerce(__p0,
> uint64_t *), p1, __ARM_mve_coerce(__p2, uint64x2_t), p3));})
> > +
> > +#define __arm_vstrdq_scatter_shifted_offset(p0,p1,p2) ({ __typeof(p0)
> __p0 = (p0); \
> > + __typeof(p2) __p2 = (p2); \
> > + _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
> > + int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]:
> __arm_vstrdq_scatter_shifted_offset_s64 (__ARM_mve_coerce(__p0,
> int64_t *), p1, __ARM_mve_coerce(__p2, int64x2_t)), \
> > + int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]:
> __arm_vstrdq_scatter_shifted_offset_u64 (__ARM_mve_coerce(__p0,
> uint64_t *), p1, __ARM_mve_coerce(__p2, uint64x2_t)));})
> >
> > #endif /* __cplusplus */
> > #endif /* __ARM_FEATURE_MVE */
> >
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [GCC][PATCH] arm: Remove duplicate definitions from arm_mve.h (pr100419).
2021-05-05 12:39 ` Srinath Parvathaneni
@ 2021-05-10 14:18 ` Richard Earnshaw
0 siblings, 0 replies; 4+ messages in thread
From: Richard Earnshaw @ 2021-05-10 14:18 UTC (permalink / raw)
To: Srinath Parvathaneni, gcc-patches; +Cc: Richard Earnshaw
On 05/05/2021 13:39, Srinath Parvathaneni via Gcc-patches wrote:
> Hi Richard,
>
>> -----Original Message-----
>> From: Richard Earnshaw <Richard.Earnshaw@foss.arm.com>
>> Sent: 05 May 2021 11:15
>> To: Srinath Parvathaneni <Srinath.Parvathaneni@arm.com>; gcc-
>> patches@gcc.gnu.org
>> Cc: Richard Earnshaw <Richard.Earnshaw@arm.com>
>> Subject: Re: [GCC][PATCH] arm: Remove duplicate definitions from
>> arm_mve.h (pr100419).
>>
>>
>>
>> On 05/05/2021 10:56, Srinath Parvathaneni via Gcc-patches wrote:
>>> Hi All,
>>>
>>> This patch removes several duplicated intrinsic definitions from
>>> arm_mve.h mentioned in PR100419 and also fixes the wrong arguments
>>> in few of intrinsics polymorphic variants.
>>>
>>> Regression tested and found no issues.
>>>
>>> Ok for master ? GCC-11 and GCC-10 branch backports?
>>> gcc/ChangeLog:
>>>
>>> 2021-05-04 Srinath Parvathaneni <srinath.parvathaneni@arm.com>
>>>
>>> PR target/100419
>>> * config/arm/arm_mve.h (__arm_vstrwq_scatter_offset): Fix wrong
>> arguments.
>>> (__arm_vcmpneq): Remove duplicate definition.
>>> (__arm_vstrwq_scatter_offset_p): Likewise.
>>> (__arm_vmaxq_x): Likewise.
>>> (__arm_vmlsdavaq): Likewise.
>>> (__arm_vmlsdavaxq): Likewise.
>>> (__arm_vmlsdavq_p): Likewise.
>>> (__arm_vmlsdavxq_p): Likewise.
>>> (__arm_vrmlaldavhaq): Likewise.
>>> (__arm_vstrbq_p): Likewise.
>>> (__arm_vstrbq_scatter_offset): Likewise.
>>> (__arm_vstrbq_scatter_offset_p): Likewise.
>>> (__arm_vstrdq_scatter_offset): Likewise.
>>> (__arm_vstrdq_scatter_offset_p): Likewise.
>>> (__arm_vstrdq_scatter_shifted_offset): Likewise.
>>> (__arm_vstrdq_scatter_shifted_offset_p): Likewise.
>>>
>>> Co-authored-by: Joe Ramsay <joe.ramsay@arm.com>
>>
>> Let's take this example:
>>
>> -#define __arm_vstrwq_scatter_offset(p0,p1,p2) ({ __typeof(p1) __p1 =
>> (p1); \
>> +#define __arm_vstrwq_scatter_offset(p0,p1,p2) ({ __typeof(p0) __p0 =
>> (p0); \
>> __typeof(p2) __p2 = (p2); \
>> - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0, \
>> - int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]:
>> __arm_vstrwq_scatter_offset_s32 (__ARM_mve_coerce(p0, int32_t *), __p1,
>> __ARM_mve_coerce(__p2, int32x4_t)), \
>> - int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]:
>> __arm_vstrwq_scatter_offset_u32 (__ARM_mve_coerce(p0, uint32_t *),
>> __p1,
>> __ARM_mve_coerce(__p2, uint32x4_t)));})
>> + _Generic( (int
>> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
>> + int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]:
>> __arm_vstrwq_scatter_offset_s32 (__ARM_mve_coerce(__p0, int32_t *), p1,
>> __ARM_mve_coerce(__p2, int32x4_t)), \
>> + int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]:
>> __arm_vstrwq_scatter_offset_u32 (__ARM_mve_coerce(__p0, uint32_t *),
>> p1,
>> __ARM_mve_coerce(__p2, uint32x4_t)));})
>>
>> It removes the safe shadow copy of p1 but adds a safe shadow copy of p0.
>> Why? Isn't it better (and safer) to just create shadow copies of all
>> the arguments and let the compiler worry about when it's safe to
>> eliminate them?
>
> As you already know polymorphic variants are used to select the intrinsics based on type of their arguments.
>
> Consider the following code from arm_mve.h:
> __extension__ extern __inline void
> __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> __arm_vstrwq_scatter_offset_s32 (int32_t * __base, uint32x4_t __offset, int32x4_t __value)
> {
> __builtin_mve_vstrwq_scatter_offset_sv4si ((__builtin_neon_si *) __base, __offset, __value);
> }
>
> __extension__ extern __inline void
> __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> __arm_vstrwq_scatter_offset_u32 (uint32_t * __base, uint32x4_t __offset, uint32x4_t __value)
> {
> __builtin_mve_vstrwq_scatter_offset_uv4si ((__builtin_neon_si *) __base, __offset, __value);
> }
>
> __extension__ extern __inline void
> __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> __arm_vstrwq_scatter_offset_f32 (float32_t * __base, uint32x4_t __offset, float32x4_t __value)
> {
> __builtin_mve_vstrwq_scatter_offset_fv4sf ((__builtin_neon_si *) __base, __offset, __value);
> }
>
> Of above 3 functions, which function is to be called from the following polymorphic variant is
> decided based on type of arguments p0, p1 and p2.
> #define __arm_vstrwq_scatter_offset(p0,p1,p2)
>
> For the 3 function definitions mentioned above, only type of arguments 1 (p0) and 3 (p2) varies
> whereas type of second argument (p1) is same (uint32x4_t).
>
> This is the reason we need only shadow copy of p0 and p2 to determine the actual function to be called
> and type of p1 is irrelevant. Previously p1 was wrongly used to determine the function instead of p0
> and that is a bug, which got fixed in this patch.
>
> Since type of p1 is irrelevant in deciding the function to be called and I believe adding shadow copy
> for p1 (__typeof(p1) __p1 = (p1) ) in this macro expansion is of no use. Considering we have more than
> 250 polymorphic variants defined in arm_mve.h headers, this results in more than 250 lines of extra code.
>
Ah sorry, I'd missed that this was using the _Generic() feature and that
p1 was only being used once in each variant.
On that basis, OK.
R.
> Regards,
> Srinath.
>
>> R.
>>
>>>
>>>
>>> ############### Attachment also inlined for ease of reply
>> ###############
>>>
>>>
>>> diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
>>> index
>> 3a40c6e68161b64319b071f57a5b0d8393303cfd..dc1d874a6366eb5fe755a70c
>> 72ed371c915bd04b 100644
>>> --- a/gcc/config/arm/arm_mve.h
>>> +++ b/gcc/config/arm/arm_mve.h
>>> @@ -37808,33 +37808,19 @@ extern void *__ARM_undef;
>>> int (*)[__ARM_mve_type_uint32x4_t]:
>> __arm_vstrwq_scatter_base_p_u32(p0, p1, __ARM_mve_coerce(__p2,
>> uint32x4_t), p3), \
>>> int (*)[__ARM_mve_type_float32x4_t]:
>> __arm_vstrwq_scatter_base_p_f32(p0, p1, __ARM_mve_coerce(__p2,
>> float32x4_t), p3));})
>>>
>>> -#define __arm_vstrwq_scatter_offset(p0,p1,p2) ({ __typeof(p1) __p1 =
>> (p1); \
>>> +#define __arm_vstrwq_scatter_offset(p0,p1,p2) ({ __typeof(p0) __p0 =
>> (p0); \
>>> __typeof(p2) __p2 = (p2); \
>>> - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0,
>> \
>>> - int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]:
>> __arm_vstrwq_scatter_offset_s32 (__ARM_mve_coerce(p0, int32_t *), __p1,
>> __ARM_mve_coerce(__p2, int32x4_t)), \
>>> - int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]:
>> __arm_vstrwq_scatter_offset_u32 (__ARM_mve_coerce(p0, uint32_t *),
>> __p1, __ARM_mve_coerce(__p2, uint32x4_t)), \
>>> - int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4_t]:
>> __arm_vstrwq_scatter_offset_f32 (__ARM_mve_coerce(p0, float32_t *),
>> __p1, __ARM_mve_coerce(__p2, float32x4_t)));})
>>> + _Generic( (int
>> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
>>> + int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]:
>> __arm_vstrwq_scatter_offset_s32 (__ARM_mve_coerce(__p0, int32_t *), p1,
>> __ARM_mve_coerce(__p2, int32x4_t)), \
>>> + int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]:
>> __arm_vstrwq_scatter_offset_u32 (__ARM_mve_coerce(__p0, uint32_t *),
>> p1, __ARM_mve_coerce(__p2, uint32x4_t)), \
>>> + int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4_t]:
>> __arm_vstrwq_scatter_offset_f32 (__ARM_mve_coerce(__p0, float32_t *),
>> p1, __ARM_mve_coerce(__p2, float32x4_t)));})
>>>
>>> -#define __arm_vstrwq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p1) __p1
>> = (p1); \
>>> +#define __arm_vstrwq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p0)
>> __p0 = (p0); \
>>> __typeof(p2) __p2 = (p2); \
>>> - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0,
>> \
>>> - int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]:
>> __arm_vstrwq_scatter_offset_p_s32 (__ARM_mve_coerce(p0, int32_t *),
>> __p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
>>> - int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]:
>> __arm_vstrwq_scatter_offset_p_u32 (__ARM_mve_coerce(p0, uint32_t *),
>> __p1, __ARM_mve_coerce(__p2, uint32x4_t), p3), \
>>> - int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4_t]:
>> __arm_vstrwq_scatter_offset_p_f32 (__ARM_mve_coerce(p0, float32_t *),
>> __p1, __ARM_mve_coerce(__p2, float32x4_t), p3));})
>>> -
>>> -#define __arm_vstrwq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p1) __p1
>> = (p1); \
>>> - __typeof(p2) __p2 = (p2); \
>>> - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0,
>> \
>>> - int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]:
>> __arm_vstrwq_scatter_offset_p_s32 (__ARM_mve_coerce(p0, int32_t *),
>> __p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
>>> - int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]:
>> __arm_vstrwq_scatter_offset_p_u32 (__ARM_mve_coerce(p0, uint32_t *),
>> __p1, __ARM_mve_coerce(__p2, uint32x4_t), p3), \
>>> - int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4_t]:
>> __arm_vstrwq_scatter_offset_p_f32 (__ARM_mve_coerce(p0, float32_t *),
>> __p1, __ARM_mve_coerce(__p2, float32x4_t), p3));})
>>> -
>>> -#define __arm_vstrwq_scatter_offset(p0,p1,p2) ({ __typeof(p1) __p1 =
>> (p1); \
>>> - __typeof(p2) __p2 = (p2); \
>>> - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0,
>> \
>>> - int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]:
>> __arm_vstrwq_scatter_offset_s32 (__ARM_mve_coerce(p0, int32_t *), __p1,
>> __ARM_mve_coerce(__p2, int32x4_t)), \
>>> - int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]:
>> __arm_vstrwq_scatter_offset_u32 (__ARM_mve_coerce(p0, uint32_t *),
>> __p1, __ARM_mve_coerce(__p2, uint32x4_t)), \
>>> - int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4_t]:
>> __arm_vstrwq_scatter_offset_f32 (__ARM_mve_coerce(p0, float32_t *),
>> __p1, __ARM_mve_coerce(__p2, float32x4_t)));})
>>> + _Generic( (int
>> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
>>> + int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]:
>> __arm_vstrwq_scatter_offset_p_s32 (__ARM_mve_coerce(__p0, int32_t *),
>> p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
>>> + int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]:
>> __arm_vstrwq_scatter_offset_p_u32 (__ARM_mve_coerce(__p0, uint32_t *),
>> p1, __ARM_mve_coerce(__p2, uint32x4_t), p3), \
>>> + int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4_t]:
>> __arm_vstrwq_scatter_offset_p_f32 (__ARM_mve_coerce(__p0, float32_t *),
>> p1, __ARM_mve_coerce(__p2, float32x4_t), p3));})
>>>
>>> #define __arm_vstrwq_scatter_shifted_offset(p0,p1,p2) ({ __typeof(p1)
>> __p1 = (p1); \
>>> __typeof(p2) __p2 = (p2); \
>>> @@ -38422,6 +38408,12 @@ extern void *__ARM_undef;
>>> #define __arm_vcmpneq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
>>> __typeof(p1) __p1 = (p1); \
>>> _Generic( (int
>> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
>>> + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]:
>> __arm_vcmpneq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t),
>> __ARM_mve_coerce(__p1, int8_t)), \
>>> + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]:
>> __arm_vcmpneq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t),
>> __ARM_mve_coerce(__p1, int16_t)), \
>>> + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]:
>> __arm_vcmpneq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t),
>> __ARM_mve_coerce(__p1, int32_t)), \
>>> + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]:
>> __arm_vcmpneq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t),
>> __ARM_mve_coerce(__p1, uint8_t)), \
>>> + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]:
>> __arm_vcmpneq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t),
>> __ARM_mve_coerce(__p1, uint16_t)), \
>>> + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]:
>> __arm_vcmpneq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t),
>> __ARM_mve_coerce(__p1, uint32_t)), \
>>> int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
>> __arm_vcmpneq_s8 (__ARM_mve_coerce(__p0, int8x16_t),
>> __ARM_mve_coerce(__p1, int8x16_t)), \
>>> int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
>> __arm_vcmpneq_s16 (__ARM_mve_coerce(__p0, int16x8_t),
>> __ARM_mve_coerce(__p1, int16x8_t)), \
>>> int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
>> __arm_vcmpneq_s32 (__ARM_mve_coerce(__p0, int32x4_t),
>> __ARM_mve_coerce(__p1, int32x4_t)), \
>>> @@ -38871,23 +38863,6 @@ extern void *__ARM_undef;
>>> int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]:
>> __arm_vcmpeqq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t),
>> __ARM_mve_coerce(__p1, uint16_t)), \
>>> int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]:
>> __arm_vcmpeqq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t),
>> __ARM_mve_coerce(__p1, uint32_t)));})
>>>
>>> -#define __arm_vcmpneq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
>>> - __typeof(p1) __p1 = (p1); \
>>> - _Generic( (int
>> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
>>> - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
>> __arm_vcmpneq_s8 (__ARM_mve_coerce(__p0, int8x16_t),
>> __ARM_mve_coerce(__p1, int8x16_t)), \
>>> - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
>> __arm_vcmpneq_s16 (__ARM_mve_coerce(__p0, int16x8_t),
>> __ARM_mve_coerce(__p1, int16x8_t)), \
>>> - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
>> __arm_vcmpneq_s32 (__ARM_mve_coerce(__p0, int32x4_t),
>> __ARM_mve_coerce(__p1, int32x4_t)), \
>>> - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]:
>> __arm_vcmpneq_u8 (__ARM_mve_coerce(__p0, uint8x16_t),
>> __ARM_mve_coerce(__p1, uint8x16_t)), \
>>> - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]:
>> __arm_vcmpneq_u16 (__ARM_mve_coerce(__p0, uint16x8_t),
>> __ARM_mve_coerce(__p1, uint16x8_t)), \
>>> - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]:
>> __arm_vcmpneq_u32 (__ARM_mve_coerce(__p0, uint32x4_t),
>> __ARM_mve_coerce(__p1, uint32x4_t)), \
>>> - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]:
>> __arm_vcmpneq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t),
>> __ARM_mve_coerce(__p1, int8_t)), \
>>> - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]:
>> __arm_vcmpneq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t),
>> __ARM_mve_coerce(__p1, int16_t)), \
>>> - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]:
>> __arm_vcmpneq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t),
>> __ARM_mve_coerce(__p1, int32_t)), \
>>> - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]:
>> __arm_vcmpneq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t),
>> __ARM_mve_coerce(__p1, uint8_t)), \
>>> - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]:
>> __arm_vcmpneq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t),
>> __ARM_mve_coerce(__p1, uint16_t)), \
>>> - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]:
>> __arm_vcmpneq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t),
>> __ARM_mve_coerce(__p1, uint32_t)));})
>>> -
>>> -
>>> #define __arm_vqmovntq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
>>> __typeof(p1) __p1 = (p1); \
>>> _Generic( (int
>> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
>>> @@ -39036,22 +39011,6 @@ extern void *__ARM_undef;
>>> int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]:
>> __arm_vcmpneq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t),
>> __ARM_mve_coerce(__p1, uint16x8_t), p2), \
>>> int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]:
>> __arm_vcmpneq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t),
>> __ARM_mve_coerce(__p1, uint32x4_t), p2));})
>>>
>>> -#define __arm_vcmpneq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
>>> - __typeof(p1) __p1 = (p1); \
>>> - _Generic( (int
>> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
>>> - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
>> __arm_vcmpneq_s8 (__ARM_mve_coerce(__p0, int8x16_t),
>> __ARM_mve_coerce(__p1, int8x16_t)), \
>>> - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
>> __arm_vcmpneq_s16 (__ARM_mve_coerce(__p0, int16x8_t),
>> __ARM_mve_coerce(__p1, int16x8_t)), \
>>> - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
>> __arm_vcmpneq_s32 (__ARM_mve_coerce(__p0, int32x4_t),
>> __ARM_mve_coerce(__p1, int32x4_t)), \
>>> - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]:
>> __arm_vcmpneq_u8 (__ARM_mve_coerce(__p0, uint8x16_t),
>> __ARM_mve_coerce(__p1, uint8x16_t)), \
>>> - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]:
>> __arm_vcmpneq_u16 (__ARM_mve_coerce(__p0, uint16x8_t),
>> __ARM_mve_coerce(__p1, uint16x8_t)), \
>>> - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]:
>> __arm_vcmpneq_u32 (__ARM_mve_coerce(__p0, uint32x4_t),
>> __ARM_mve_coerce(__p1, uint32x4_t)), \
>>> - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]:
>> __arm_vcmpneq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t),
>> __ARM_mve_coerce(__p1, int8_t)), \
>>> - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]:
>> __arm_vcmpneq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t),
>> __ARM_mve_coerce(__p1, int16_t)), \
>>> - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]:
>> __arm_vcmpneq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t),
>> __ARM_mve_coerce(__p1, int32_t)), \
>>> - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]:
>> __arm_vcmpneq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t),
>> __ARM_mve_coerce(__p1, uint8_t)), \
>>> - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]:
>> __arm_vcmpneq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t),
>> __ARM_mve_coerce(__p1, uint16_t)), \
>>> - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]:
>> __arm_vcmpneq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t),
>> __ARM_mve_coerce(__p1, uint32_t)));})
>>> -
>>> #define __arm_vshlcq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
>>> _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
>>> int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlcq_s8
>> (__ARM_mve_coerce(__p0, int8x16_t), p1, p2), \
>>> @@ -39367,52 +39326,6 @@ extern void *__ARM_undef;
>>> int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]:
>> __arm_vminaq_m_s16 (__ARM_mve_coerce(__p0, uint16x8_t),
>> __ARM_mve_coerce(__p1, int16x8_t), p2), \
>>> int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]:
>> __arm_vminaq_m_s32 (__ARM_mve_coerce(__p0, uint32x4_t),
>> __ARM_mve_coerce(__p1, int32x4_t), p2));})
>>>
>>> -#define __arm_vrmlaldavhaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
>>> - __typeof(p1) __p1 = (p1); \
>>> - _Generic( (int
>> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
>>> - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
>> __arm_vcmpltq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t),
>> __ARM_mve_coerce(__p1, int8x16_t), p2), \
>>> - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
>> __arm_vcmpltq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t),
>> __ARM_mve_coerce(__p1, int16x8_t), p2), \
>>> - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
>> __arm_vcmpltq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t),
>> __ARM_mve_coerce(__p1, int32x4_t), p2), \
>>> - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]:
>> __arm_vcmpltq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t),
>> __ARM_mve_coerce(__p1, int8_t), p2), \
>>> - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]:
>> __arm_vcmpltq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t),
>> __ARM_mve_coerce(__p1, int16_t), p2), \
>>> - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]:
>> __arm_vcmpltq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t),
>> __ARM_mve_coerce(__p1, int32_t), p2));})
>>> -
>>> -#define __arm_vmlsdavxq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
>>> - __typeof(p1) __p1 = (p1); \
>>> - _Generic( (int
>> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
>>> - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
>> __arm_vcmpleq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t),
>> __ARM_mve_coerce(__p1, int8x16_t), p2), \
>>> - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
>> __arm_vcmpleq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t),
>> __ARM_mve_coerce(__p1, int16x8_t), p2), \
>>> - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
>> __arm_vcmpleq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t),
>> __ARM_mve_coerce(__p1, int32x4_t), p2), \
>>> - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]:
>> __arm_vcmpleq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t),
>> __ARM_mve_coerce(__p1, int8_t), p2), \
>>> - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]:
>> __arm_vcmpleq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t),
>> __ARM_mve_coerce(__p1, int16_t), p2), \
>>> - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]:
>> __arm_vcmpleq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t),
>> __ARM_mve_coerce(__p1, int32_t), p2));})
>>> -
>>> -#define __arm_vmlsdavq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
>>> - __typeof(p1) __p1 = (p1); \
>>> - _Generic( (int
>> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
>>> - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
>> __arm_vcmpgtq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t),
>> __ARM_mve_coerce(__p1, int8x16_t), p2), \
>>> - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
>> __arm_vcmpgtq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t),
>> __ARM_mve_coerce(__p1, int16x8_t), p2), \
>>> - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
>> __arm_vcmpgtq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t),
>> __ARM_mve_coerce(__p1, int32x4_t), p2), \
>>> - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]:
>> __arm_vcmpgtq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t),
>> __ARM_mve_coerce(__p1, int8_t), p2), \
>>> - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]:
>> __arm_vcmpgtq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t),
>> __ARM_mve_coerce(__p1, int16_t), p2), \
>>> - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]:
>> __arm_vcmpgtq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t),
>> __ARM_mve_coerce(__p1, int32_t), p2));})
>>> -
>>> -#define __arm_vmlsdavaxq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
>>> - __typeof(p1) __p1 = (p1); \
>>> - _Generic( (int
>> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
>>> - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]:
>> __arm_vshrntq_n_s16 (__ARM_mve_coerce(__p0, int8x16_t),
>> __ARM_mve_coerce(__p1, int16x8_t), p2), \
>>> - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]:
>> __arm_vshrntq_n_s32 (__ARM_mve_coerce(__p0, int16x8_t),
>> __ARM_mve_coerce(__p1, int32x4_t), p2), \
>>> - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]:
>> __arm_vshrntq_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t),
>> __ARM_mve_coerce(__p1, uint16x8_t), p2), \
>>> - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]:
>> __arm_vshrntq_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t),
>> __ARM_mve_coerce(__p1, uint32x4_t), p2));})
>>> -
>>> -#define __arm_vmlsdavaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
>>> - __typeof(p1) __p1 = (p1); \
>>> - _Generic( (int
>> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
>>> - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]:
>> __arm_vrshrntq_n_s16 (__ARM_mve_coerce(__p0, int8x16_t),
>> __ARM_mve_coerce(__p1, int16x8_t), p2), \
>>> - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]:
>> __arm_vrshrntq_n_s32 (__ARM_mve_coerce(__p0, int16x8_t),
>> __ARM_mve_coerce(__p1, int32x4_t), p2), \
>>> - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]:
>> __arm_vrshrntq_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t),
>> __ARM_mve_coerce(__p1, uint16x8_t), p2), \
>>> - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]:
>> __arm_vrshrntq_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t),
>> __ARM_mve_coerce(__p1, uint32x4_t), p2));})
>>> -
>>> #define __arm_vmovlbq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
>>> __typeof(p1) __p1 = (p1); \
>>> _Generic( (int
>> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
>>> @@ -39711,26 +39624,6 @@ extern void *__ARM_undef;
>>> int
>> (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_m
>> ve_type_uint16x8_t]: __arm_vmulq_m_u16 (__ARM_mve_coerce(__p0,
>> uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t),
>> __ARM_mve_coerce(__p2, uint16x8_t), p3), \
>>> int
>> (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_m
>> ve_type_uint32x4_t]: __arm_vmulq_m_u32 (__ARM_mve_coerce(__p0,
>> uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t),
>> __ARM_mve_coerce(__p2, uint32x4_t), p3));})
>>>
>>> -#define __arm_vstrbq(p0,p1) ({ __typeof(p1) __p1 = (p1); \
>>> - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0,
>> \
>>> - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16_t]:
>> __arm_vstrbq_s8 (__ARM_mve_coerce(p0, int8_t *),
>> __ARM_mve_coerce(__p1, int8x16_t)), \
>>> - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int16x8_t]:
>> __arm_vstrbq_s16 (__ARM_mve_coerce(p0, int8_t *),
>> __ARM_mve_coerce(__p1, int16x8_t)), \
>>> - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int32x4_t]:
>> __arm_vstrbq_s32 (__ARM_mve_coerce(p0, int8_t *),
>> __ARM_mve_coerce(__p1, int32x4_t)), \
>>> - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]:
>> __arm_vstrbq_u8 (__ARM_mve_coerce(p0, uint8_t *),
>> __ARM_mve_coerce(__p1, uint8x16_t)), \
>>> - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]:
>> __arm_vstrbq_u16 (__ARM_mve_coerce(p0, uint8_t *),
>> __ARM_mve_coerce(__p1, uint16x8_t)), \
>>> - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]:
>> __arm_vstrbq_u32 (__ARM_mve_coerce(p0, uint8_t *),
>> __ARM_mve_coerce(__p1, uint32x4_t)));})
>>> -
>>> -#define __arm_vstrbq_scatter_offset(p0,p1,p2) ({ __typeof(p0) __p0 =
>> (p0); \
>>> - __typeof(p1) __p1 = (p1); \
>>> - __typeof(p2) __p2 = (p2); \
>>> - _Generic( (int
>> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typ
>> eid(__p2)])0, \
>>> - int
>> (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_m
>> ve_type_int8x16_t]: __arm_vstrbq_scatter_offset_s8
>> (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint8x16_t),
>> __ARM_mve_coerce(__p2, int8x16_t)), \
>>> - int
>> (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_m
>> ve_type_int16x8_t]: __arm_vstrbq_scatter_offset_s16
>> (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint16x8_t),
>> __ARM_mve_coerce(__p2, int16x8_t)), \
>>> - int
>> (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_m
>> ve_type_int32x4_t]: __arm_vstrbq_scatter_offset_s32
>> (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint32x4_t),
>> __ARM_mve_coerce(__p2, int32x4_t)), \
>>> - int
>> (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_
>> mve_type_uint8x16_t]: __arm_vstrbq_scatter_offset_u8
>> (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1,
>> uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t)), \
>>> - int
>> (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_
>> mve_type_uint16x8_t]: __arm_vstrbq_scatter_offset_u16
>> (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1,
>> uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t)), \
>>> - int
>> (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_
>> mve_type_uint32x4_t]: __arm_vstrbq_scatter_offset_u32
>> (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1,
>> uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t)));})
>>> -
>>> #define __arm_vstrwq_scatter_base(p0,p1,p2) ({ __typeof(p2) __p2 = (p2);
>> \
>>> _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
>>> int (*)[__ARM_mve_type_int32x4_t]:
>> __arm_vstrwq_scatter_base_s32(p0, p1, __ARM_mve_coerce(__p2,
>> int32x4_t)), \
>>> @@ -39745,27 +39638,6 @@ extern void *__ARM_undef;
>>> int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]:
>> __arm_vldrbq_gather_offset_u16 (__ARM_mve_coerce(p0, uint8_t const *),
>> __ARM_mve_coerce(__p1, uint16x8_t)), \
>>> int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]:
>> __arm_vldrbq_gather_offset_u32 (__ARM_mve_coerce(p0, uint8_t const *),
>> __ARM_mve_coerce(__p1, uint32x4_t)));})
>>>
>>> -#define __arm_vstrbq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
>>> - __typeof(p1) __p1 = (p1); \
>>> - _Generic( (int
>> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
>>> - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16_t]:
>> __arm_vstrbq_p_s8 (__ARM_mve_coerce(__p0, int8_t *),
>> __ARM_mve_coerce(__p1, int8x16_t), p2), \
>>> - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int16x8_t]:
>> __arm_vstrbq_p_s16 (__ARM_mve_coerce(__p0, int8_t *),
>> __ARM_mve_coerce(__p1, int16x8_t), p2), \
>>> - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int32x4_t]:
>> __arm_vstrbq_p_s32 (__ARM_mve_coerce(__p0, int8_t *),
>> __ARM_mve_coerce(__p1, int32x4_t), p2), \
>>> - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]:
>> __arm_vstrbq_p_u8 (__ARM_mve_coerce(__p0, uint8_t *),
>> __ARM_mve_coerce(__p1, uint8x16_t), p2), \
>>> - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]:
>> __arm_vstrbq_p_u16 (__ARM_mve_coerce(__p0, uint8_t *),
>> __ARM_mve_coerce(__p1, uint16x8_t), p2), \
>>> - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]:
>> __arm_vstrbq_p_u32 (__ARM_mve_coerce(__p0, uint8_t *),
>> __ARM_mve_coerce(__p1, uint32x4_t), p2));})
>>> -
>>> -#define __arm_vstrbq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p0) __p0
>> = (p0); \
>>> - __typeof(p1) __p1 = (p1); \
>>> - __typeof(p2) __p2 = (p2); \
>>> - _Generic( (int
>> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typ
>> eid(__p2)])0, \
>>> - int
>> (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_m
>> ve_type_int8x16_t]: __arm_vstrbq_scatter_offset_p_s8
>> (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint8x16_t),
>> __ARM_mve_coerce(__p2, int8x16_t), p3), \
>>> - int
>> (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_m
>> ve_type_int16x8_t]: __arm_vstrbq_scatter_offset_p_s16
>> (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint16x8_t),
>> __ARM_mve_coerce(__p2, int16x8_t), p3), \
>>> - int
>> (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_m
>> ve_type_int32x4_t]: __arm_vstrbq_scatter_offset_p_s32
>> (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint32x4_t),
>> __ARM_mve_coerce(__p2, int32x4_t), p3), \
>>> - int
>> (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_
>> mve_type_uint8x16_t]: __arm_vstrbq_scatter_offset_p_u8
>> (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1,
>> uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
>>> - int
>> (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_
>> mve_type_uint16x8_t]: __arm_vstrbq_scatter_offset_p_u16
>> (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1,
>> uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
>>> - int
>> (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_
>> mve_type_uint32x4_t]: __arm_vstrbq_scatter_offset_p_u32
>> (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1,
>> uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
>>> -
>>> #define __arm_vstrwq_scatter_base_p(p0,p1,p2,p3) ({ __typeof(p2) __p2
>> = (p2); \
>>> _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
>>> int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_p_s32
>> (p0, p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
>>> @@ -39921,34 +39793,6 @@ extern void *__ARM_undef;
>>> int (*)[__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_base_s64
>> (p0, p1, __ARM_mve_coerce(__p2, int64x2_t)), \
>>> int (*)[__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_base_u64
>> (p0, p1, __ARM_mve_coerce(__p2, uint64x2_t)));})
>>>
>>> -#define __arm_vstrdq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p0) __p0
>> = (p0); \
>>> - __typeof(p1) __p1 = (p1); \
>>> - __typeof(p2) __p2 = (p2); \
>>> - _Generic( (int
>> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
>>> - int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]:
>> __arm_vstrdq_scatter_offset_p_s64 (__ARM_mve_coerce(__p0, int64_t *),
>> __p1, __ARM_mve_coerce(__p2, int64x2_t), p3), \
>>> - int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]:
>> __arm_vstrdq_scatter_offset_p_u64 (__ARM_mve_coerce(__p0, uint64_t *),
>> __p1, __ARM_mve_coerce(__p2, uint64x2_t), p3));})
>>> -
>>> -#define __arm_vstrdq_scatter_offset(p0,p1,p2) ({ __typeof(p0) __p0 =
>> (p0); \
>>> - __typeof(p1) __p1 = (p1); \
>>> - __typeof(p2) __p2 = (p2); \
>>> - _Generic( (int
>> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
>>> - int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]:
>> __arm_vstrdq_scatter_offset_s64 (__ARM_mve_coerce(__p0, int64_t *),
>> __p1, __ARM_mve_coerce(__p2, int64x2_t)), \
>>> - int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]:
>> __arm_vstrdq_scatter_offset_u64 (__ARM_mve_coerce(__p0, uint64_t *),
>> __p1, __ARM_mve_coerce(__p2, uint64x2_t)));})
>>> -
>>> -#define __arm_vstrdq_scatter_shifted_offset_p(p0,p1,p2,p3)
>> ({ __typeof(p0) __p0 = (p0); \
>>> - __typeof(p1) __p1 = (p1); \
>>> - __typeof(p2) __p2 = (p2); \
>>> - _Generic( (int
>> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
>>> - int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]:
>> __arm_vstrdq_scatter_shifted_offset_p_s64 (__ARM_mve_coerce(__p0,
>> int64_t *), __p1, __ARM_mve_coerce(__p2, int64x2_t), p3), \
>>> - int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]:
>> __arm_vstrdq_scatter_shifted_offset_p_u64 (__ARM_mve_coerce(__p0,
>> uint64_t *), __p1, __ARM_mve_coerce(__p2, uint64x2_t), p3));})
>>> -
>>> -#define __arm_vstrdq_scatter_shifted_offset(p0,p1,p2) ({ __typeof(p0)
>> __p0 = (p0); \
>>> - __typeof(p1) __p1 = (p1); \
>>> - __typeof(p2) __p2 = (p2); \
>>> - _Generic( (int
>> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
>>> - int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]:
>> __arm_vstrdq_scatter_shifted_offset_s64 (__ARM_mve_coerce(__p0,
>> int64_t *), __p1, __ARM_mve_coerce(__p2, int64x2_t)), \
>>> - int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]:
>> __arm_vstrdq_scatter_shifted_offset_u64 (__ARM_mve_coerce(__p0,
>> uint64_t *), __p1, __ARM_mve_coerce(__p2, uint64x2_t)));})
>>> -
>>> #define __arm_vstrhq_scatter_offset(p0,p1,p2) ({ __typeof(p1) __p1 =
>> (p1); \
>>> __typeof(p2) __p2 = (p2); \
>>> _Generic( (int
>> (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typei
>> d(__p2)])0, \
>>> @@ -39981,29 +39825,17 @@ extern void *__ARM_undef;
>>> int
>> (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_
>> mve_type_uint16x8_t]: __arm_vstrhq_scatter_shifted_offset_p_u16
>> (__ARM_mve_coerce(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t),
>> __ARM_mve_coerce(__p2, uint16x8_t), p3), \
>>> int
>> (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_
>> mve_type_uint32x4_t]: __arm_vstrhq_scatter_shifted_offset_p_u32
>> (__ARM_mve_coerce(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t),
>> __ARM_mve_coerce(__p2, uint32x4_t), p3));})
>>>
>>> -#define __arm_vstrwq_scatter_offset(p0,p1,p2) ({ __typeof(p1) __p1 =
>> (p1); \
>>> +#define __arm_vstrwq_scatter_offset(p0,p1,p2) ({ __typeof(p0) __p0 =
>> (p0); \
>>> __typeof(p2) __p2 = (p2); \
>>> - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0,
>> \
>>> - int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]:
>> __arm_vstrwq_scatter_offset_s32 (__ARM_mve_coerce(p0, int32_t *), __p1,
>> __ARM_mve_coerce(__p2, int32x4_t)), \
>>> - int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]:
>> __arm_vstrwq_scatter_offset_u32 (__ARM_mve_coerce(p0, uint32_t *),
>> __p1, __ARM_mve_coerce(__p2, uint32x4_t)));})
>>> + _Generic( (int
>> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
>>> + int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]:
>> __arm_vstrwq_scatter_offset_s32 (__ARM_mve_coerce(__p0, int32_t *), p1,
>> __ARM_mve_coerce(__p2, int32x4_t)), \
>>> + int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]:
>> __arm_vstrwq_scatter_offset_u32 (__ARM_mve_coerce(__p0, uint32_t *),
>> p1, __ARM_mve_coerce(__p2, uint32x4_t)));})
>>>
>>> -#define __arm_vstrwq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p1) __p1
>> = (p1); \
>>> +#define __arm_vstrwq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p0)
>> __p0 = (p0); \
>>> __typeof(p2) __p2 = (p2); \
>>> - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0,
>> \
>>> - int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]:
>> __arm_vstrwq_scatter_offset_p_s32 (__ARM_mve_coerce(p0, int32_t *),
>> __p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
>>> - int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]:
>> __arm_vstrwq_scatter_offset_p_u32 (__ARM_mve_coerce(p0, uint32_t *),
>> __p1, __ARM_mve_coerce(__p2, uint32x4_t)));})
>>> -
>>> -#define __arm_vstrwq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p1) __p1
>> = (p1); \
>>> - __typeof(p2) __p2 = (p2); \
>>> - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0,
>> \
>>> - int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]:
>> __arm_vstrwq_scatter_offset_p_s32 (__ARM_mve_coerce(p0, int32_t *),
>> __p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
>>> - int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]:
>> __arm_vstrwq_scatter_offset_p_u32 (__ARM_mve_coerce(p0, uint32_t *),
>> __p1, __ARM_mve_coerce(__p2, uint32x4_t), p3));})
>>> -
>>> -#define __arm_vstrwq_scatter_offset(p0,p1,p2) ({ __typeof(p1) __p1 =
>> (p1); \
>>> - __typeof(p2) __p2 = (p2); \
>>> - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0,
>> \
>>> - int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]:
>> __arm_vstrwq_scatter_offset_s32 (__ARM_mve_coerce(p0, int32_t *), __p1,
>> __ARM_mve_coerce(__p2, int32x4_t)), \
>>> - int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]:
>> __arm_vstrwq_scatter_offset_u32 (__ARM_mve_coerce(p0, uint32_t *),
>> __p1, __ARM_mve_coerce(__p2, uint32x4_t)));})
>>> + _Generic( (int
>> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
>>> + int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]:
>> __arm_vstrwq_scatter_offset_p_s32 (__ARM_mve_coerce(__p0, int32_t *),
>> p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
>>> + int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]:
>> __arm_vstrwq_scatter_offset_p_u32 (__ARM_mve_coerce(__p0, uint32_t *),
>> p1, __ARM_mve_coerce(__p2, uint32x4_t), p3));})
>>>
>>> #define __arm_vstrwq_scatter_shifted_offset(p0,p1,p2) ({ __typeof(p1)
>> __p1 = (p1); \
>>> __typeof(p2) __p2 = (p2); \
>>> @@ -40160,32 +39992,6 @@ extern void *__ARM_undef;
>>> int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]:
>> __arm_veorq_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t),
>> __ARM_mve_coerce(__p2, uint16x8_t), p3), \
>>> int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]:
>> __arm_veorq_x_u32 (__ARM_mve_coerce(__p1, uint32x4_t),
>> __ARM_mve_coerce(__p2, uint32x4_t), p3));})
>>>
>>> -#define __arm_vmaxq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
>>> - __typeof(p2) __p2 = (p2); \
>>> - _Generic( (int
>> (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
>>> - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
>> __arm_vmulq_x_s8 (__ARM_mve_coerce(__p1, int8x16_t),
>> __ARM_mve_coerce(__p2, int8x16_t), p3), \
>>> - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
>> __arm_vmulq_x_s16 (__ARM_mve_coerce(__p1, int16x8_t),
>> __ARM_mve_coerce(__p2, int16x8_t), p3), \
>>> - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
>> __arm_vmulq_x_s32 (__ARM_mve_coerce(__p1, int32x4_t),
>> __ARM_mve_coerce(__p2, int32x4_t), p3), \
>>> - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]:
>> __arm_vmulq_x_n_s8 (__ARM_mve_coerce(__p1, int8x16_t),
>> __ARM_mve_coerce(__p2, int8_t), p3), \
>>> - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]:
>> __arm_vmulq_x_n_s16 (__ARM_mve_coerce(__p1, int16x8_t),
>> __ARM_mve_coerce(__p2, int16_t), p3), \
>>> - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]:
>> __arm_vmulq_x_n_s32 (__ARM_mve_coerce(__p1, int32x4_t),
>> __ARM_mve_coerce(__p2, int32_t), p3), \
>>> - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]:
>> __arm_vmulq_x_u8 (__ARM_mve_coerce(__p1, uint8x16_t),
>> __ARM_mve_coerce(__p2, uint8x16_t), p3), \
>>> - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]:
>> __arm_vmulq_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t),
>> __ARM_mve_coerce(__p2, uint16x8_t), p3), \
>>> - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]:
>> __arm_vmulq_x_u32 (__ARM_mve_coerce(__p1, uint32x4_t),
>> __ARM_mve_coerce(__p2, uint32x4_t), p3), \
>>> - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]:
>> __arm_vmulq_x_n_u8 (__ARM_mve_coerce(__p1, uint8x16_t),
>> __ARM_mve_coerce(__p2, uint8_t), p3), \
>>> - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]:
>> __arm_vmulq_x_n_u16 (__ARM_mve_coerce(__p1, uint16x8_t),
>> __ARM_mve_coerce(__p2, uint16_t), p3), \
>>> - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]:
>> __arm_vmulq_x_n_u32 (__ARM_mve_coerce(__p1, uint32x4_t),
>> __ARM_mve_coerce(__p2, uint32_t), p3));})
>>> -
>>> -#define __arm_vminq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
>>> - __typeof(p2) __p2 = (p2); \
>>> - _Generic( (int
>> (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
>>> - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
>> __arm_vminq_x_s8 (__ARM_mve_coerce(__p1, int8x16_t),
>> __ARM_mve_coerce(__p2, int8x16_t), p3), \
>>> - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
>> __arm_vminq_x_s16 (__ARM_mve_coerce(__p1, int16x8_t),
>> __ARM_mve_coerce(__p2, int16x8_t), p3), \
>>> - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
>> __arm_vminq_x_s32 (__ARM_mve_coerce(__p1, int32x4_t),
>> __ARM_mve_coerce(__p2, int32x4_t), p3), \
>>> - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]:
>> __arm_vminq_x_u8 (__ARM_mve_coerce(__p1, uint8x16_t),
>> __ARM_mve_coerce(__p2, uint8x16_t), p3), \
>>> - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]:
>> __arm_vminq_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t),
>> __ARM_mve_coerce(__p2, uint16x8_t), p3), \
>>> - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]:
>> __arm_vminq_x_u32 (__ARM_mve_coerce(__p1, uint32x4_t),
>> __ARM_mve_coerce(__p2, uint32x4_t), p3));})
>>> -
>>> #define __arm_vmovlbq_x(p1,p2) ({ __typeof(p1) __p1 = (p1); \
>>> _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
>>> int (*)[__ARM_mve_type_int8x16_t]: __arm_vmovlbq_x_s8
>> (__ARM_mve_coerce(__p1, int8x16_t), p2), \
>>> @@ -41013,13 +40819,6 @@ extern void *__ARM_undef;
>>> int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
>> __arm_vmlsldavaxq_p_s16(__p0, __ARM_mve_coerce(__p1, int16x8_t),
>> __ARM_mve_coerce(__p2, int16x8_t), p3), \
>>> int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
>> __arm_vmlsldavaxq_p_s32(__p0, __ARM_mve_coerce(__p1, int32x4_t),
>> __ARM_mve_coerce(__p2, int32x4_t), p3));})
>>>
>>> -#define __arm_vrmlaldavhaq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0);
>> \
>>> - __typeof(p1) __p1 = (p1); \
>>> - __typeof(p2) __p2 = (p2); \
>>> - _Generic( (int
>> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typ
>> eid(__p2)])0, \
>>> - int
>> (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t][__ARM_mve_typ
>> e_int32x4_t]: __arm_vrmlaldavhaq_p_s32 (__ARM_mve_coerce(__p0,
>> int64_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2,
>> int32x4_t), p3), \
>>> - int
>> (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t][__ARM_mve_ty
>> pe_uint32x4_t]: __arm_vrmlaldavhaq_p_u32 (__ARM_mve_coerce(__p0,
>> uint64_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2,
>> uint32x4_t), p3));})
>>> -
>>> #define __arm_vrmlaldavhaxq_p(p0,p1,p2,p3)
>> __arm_vrmlaldavhaxq_p_s32(p0,p1,p2,p3)
>>>
>>> #define __arm_vrmlsldavhaq_p(p0,p1,p2,p3)
>> __arm_vrmlsldavhaq_p_s32(p0,p1,p2,p3)
>>> @@ -41343,21 +41142,47 @@ extern void *__ARM_undef;
>>> int
>> (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve
>> _type_int16x8_t]: __arm_vqrdmladhxq_m_s16 (__ARM_mve_coerce(__p0,
>> int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2,
>> int16x8_t), p3), \
>>> int
>> (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve
>> _type_int32x4_t]: __arm_vqrdmladhxq_m_s32 (__ARM_mve_coerce(__p0,
>> int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2,
>> int32x4_t), p3));})
>>>
>>> -#define __arm_vmlsdavaxq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
>>> - __typeof(p1) __p1 = (p1); \
>>> +#define __arm_vmlsdavaxq_p(p0,p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
>>> __typeof(p2) __p2 = (p2); \
>>> _Generic( (int
>> (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
>>> - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
>> __arm_vmlsdavaxq_p_s8 (__p0, __ARM_mve_coerce(__p1, int8x16_t),
>> __ARM_mve_coerce(__p2, int8x16_t), p3), \
>>> - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
>> __arm_vmlsdavaxq_p_s16 (__p0, __ARM_mve_coerce(__p1, int16x8_t),
>> __ARM_mve_coerce(__p2, int16x8_t), p3), \
>>> - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
>> __arm_vmlsdavaxq_p_s32 (__p0, __ARM_mve_coerce(__p1, int32x4_t),
>> __ARM_mve_coerce(__p2, int32x4_t), p3));})
>>> + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
>> __arm_vmlsdavaxq_p_s8 (p0, __ARM_mve_coerce(__p1, int8x16_t),
>> __ARM_mve_coerce(__p2, int8x16_t), p3), \
>>> + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
>> __arm_vmlsdavaxq_p_s16 (p0, __ARM_mve_coerce(__p1, int16x8_t),
>> __ARM_mve_coerce(__p2, int16x8_t), p3), \
>>> + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
>> __arm_vmlsdavaxq_p_s32 (p0, __ARM_mve_coerce(__p1, int32x4_t),
>> __ARM_mve_coerce(__p2, int32x4_t), p3));})
>>> +
>>> +#define __arm_vmlsdavaq(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
>>> + __typeof(p2) __p2 = (p2); \
>>> + _Generic( (int
>> (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
>>> + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
>> __arm_vmlsdavaq_s8(p0, __ARM_mve_coerce(__p1, int8x16_t),
>> __ARM_mve_coerce(__p2, int8x16_t)), \
>>> + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
>> __arm_vmlsdavaq_s16(p0, __ARM_mve_coerce(__p1, int16x8_t),
>> __ARM_mve_coerce(__p2, int16x8_t)), \
>>> + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
>> __arm_vmlsdavaq_s32(p0, __ARM_mve_coerce(__p1, int32x4_t),
>> __ARM_mve_coerce(__p2, int32x4_t)));})
>>> +
>>> +#define __arm_vmlsdavaxq(p0,p1,p2) ({ __typeof(p2) __p2 = (p2); \
>>> + __typeof(p1) __p1 = (p1); \
>>> + _Generic( (int
>> (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
>>> + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
>> __arm_vmlsdavaxq_s8(p0, __ARM_mve_coerce(__p1, int8x16_t),
>> __ARM_mve_coerce(__p2, int8x16_t)), \
>>> + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
>> __arm_vmlsdavaxq_s16(p0, __ARM_mve_coerce(__p1, int16x8_t),
>> __ARM_mve_coerce(__p2, int16x8_t)), \
>>> + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
>> __arm_vmlsdavaxq_s32(p0, __ARM_mve_coerce(__p1, int32x4_t),
>> __ARM_mve_coerce(__p2, int32x4_t)));})
>>>
>>> -#define __arm_vmlsdavaq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
>>> +#define __arm_vmlsdavq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
>>> __typeof(p1) __p1 = (p1); \
>>> + _Generic( (int
>> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
>>> + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
>> __arm_vmlsdavq_p_s8 (__ARM_mve_coerce(__p0, int8x16_t),
>> __ARM_mve_coerce(__p1, int8x16_t), p2), \
>>> + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
>> __arm_vmlsdavq_p_s16 (__ARM_mve_coerce(__p0, int16x8_t),
>> __ARM_mve_coerce(__p1, int16x8_t), p2), \
>>> + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
>> __arm_vmlsdavq_p_s32 (__ARM_mve_coerce(__p0, int32x4_t),
>> __ARM_mve_coerce(__p1, int32x4_t), p2));})
>>> +
>>> +#define __arm_vmlsdavxq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
>>> + __typeof(p1) __p1 = (p1); \
>>> + _Generic( (int
>> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
>>> + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
>> __arm_vmlsdavxq_p_s8 (__ARM_mve_coerce(__p0, int8x16_t),
>> __ARM_mve_coerce(__p1, int8x16_t), p2), \
>>> + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
>> __arm_vmlsdavxq_p_s16 (__ARM_mve_coerce(__p0, int16x8_t),
>> __ARM_mve_coerce(__p1, int16x8_t), p2), \
>>> + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
>> __arm_vmlsdavxq_p_s32 (__ARM_mve_coerce(__p0, int32x4_t),
>> __ARM_mve_coerce(__p1, int32x4_t), p2));})
>>> +
>>> +#define __arm_vmlsdavaq_p(p0,p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
>>> __typeof(p2) __p2 = (p2); \
>>> _Generic( (int
>> (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
>>> - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
>> __arm_vmlsdavaq_p_s8(__p0, __ARM_mve_coerce(__p1, int8x16_t),
>> __ARM_mve_coerce(__p2, int8x16_t), p3), \
>>> - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
>> __arm_vmlsdavaq_p_s16(__p0, __ARM_mve_coerce(__p1, int16x8_t),
>> __ARM_mve_coerce(__p2, int16x8_t), p3), \
>>> - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
>> __arm_vmlsdavaq_p_s32(__p0, __ARM_mve_coerce(__p1, int32x4_t),
>> __ARM_mve_coerce(__p2, int32x4_t), p3));})
>>> + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
>> __arm_vmlsdavaq_p_s8(p0, __ARM_mve_coerce(__p1, int8x16_t),
>> __ARM_mve_coerce(__p2, int8x16_t), p3), \
>>> + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
>> __arm_vmlsdavaq_p_s16(p0, __ARM_mve_coerce(__p1, int16x8_t),
>> __ARM_mve_coerce(__p2, int16x8_t), p3), \
>>> + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
>> __arm_vmlsdavaq_p_s32(p0, __ARM_mve_coerce(__p1, int32x4_t),
>> __ARM_mve_coerce(__p2, int32x4_t), p3));})
>>>
>>> #define __arm_vmladavaxq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
>>> __typeof(p1) __p1 = (p1); \
>>> @@ -41445,8 +41270,8 @@ extern void *__ARM_undef;
>>>
>>> #define __arm_viwdupq_u16(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
>>> _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
>>> - int (*)[__ARM_mve_type_int_n]: __arm_viwdupq_n_u16
>> (__ARM_mve_coerce(__p0, uint32_t), p1, p2), \
>>> - int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_viwdupq_wb_u16
>> (__ARM_mve_coerce(__p0, uint32_t *), p1, p2));})
>>> + int (*)[__ARM_mve_type_int_n]: __arm_viwdupq_n_u16
>> (__ARM_mve_coerce(__p0, uint32_t), p1, (const int) p2), \
>>> + int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_viwdupq_wb_u16
>> (__ARM_mve_coerce(__p0, uint32_t *), p1, (const int) p2));})
>>>
>>> #define __arm_viwdupq_u32(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
>>> _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
>>> @@ -41628,16 +41453,6 @@ extern void *__ARM_undef;
>>> int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]:
>> __arm_vmaxavq_p_s16 (__p0, __ARM_mve_coerce(__p1, int16x8_t), p2), \
>>> int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]:
>> __arm_vmaxavq_p_s32 (__p0, __ARM_mve_coerce(__p1, int32x4_t), p2));})
>>>
>>> -#define __arm_vmaxq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
>>> - __typeof(p2) __p2 = (p2); \
>>> - _Generic( (int
>> (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
>>> - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
>> __arm_vmaxq_x_s8 (__ARM_mve_coerce(__p1, int8x16_t),
>> __ARM_mve_coerce(__p2, int8x16_t), p3), \
>>> - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
>> __arm_vmaxq_x_s16 (__ARM_mve_coerce(__p1, int16x8_t),
>> __ARM_mve_coerce(__p2, int16x8_t), p3), \
>>> - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
>> __arm_vmaxq_x_s32 (__ARM_mve_coerce(__p1, int32x4_t),
>> __ARM_mve_coerce(__p2, int32x4_t), p3), \
>>> - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]:
>> __arm_vmaxq_x_u8( __ARM_mve_coerce(__p1, uint8x16_t),
>> __ARM_mve_coerce(__p2, uint8x16_t), p3), \
>>> - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]:
>> __arm_vmaxq_x_u16( __ARM_mve_coerce(__p1, uint16x8_t),
>> __ARM_mve_coerce(__p2, uint16x8_t), p3), \
>>> - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]:
>> __arm_vmaxq_x_u32( __ARM_mve_coerce(__p1, uint32x4_t),
>> __ARM_mve_coerce(__p2, uint32x4_t), p3));})
>>> -
>>> #define __arm_vmaxvq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
>>> __typeof(p1) __p1 = (p1); \
>>> _Generic( (int
>> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
>>> @@ -41672,6 +41487,16 @@ extern void *__ARM_undef;
>>> int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]:
>> __arm_vminavq_p_s16 (__p0, __ARM_mve_coerce(__p1, int16x8_t), p2), \
>>> int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]:
>> __arm_vminavq_p_s32 (__p0, __ARM_mve_coerce(__p1, int32x4_t), p2));})
>>>
>>> +#define __arm_vmaxq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
>>> + __typeof(p2) __p2 = (p2); \
>>> + _Generic( (int
>> (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
>>> + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
>> __arm_vmaxq_x_s8 (__ARM_mve_coerce(__p1, int8x16_t),
>> __ARM_mve_coerce(__p2, int8x16_t), p3), \
>>> + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
>> __arm_vmaxq_x_s16 (__ARM_mve_coerce(__p1, int16x8_t),
>> __ARM_mve_coerce(__p2, int16x8_t), p3), \
>>> + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
>> __arm_vmaxq_x_s32 (__ARM_mve_coerce(__p1, int32x4_t),
>> __ARM_mve_coerce(__p2, int32x4_t), p3), \
>>> + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]:
>> __arm_vmaxq_x_u8 (__ARM_mve_coerce(__p1, uint8x16_t),
>> __ARM_mve_coerce(__p2, uint8x16_t), p3), \
>>> + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]:
>> __arm_vmaxq_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t),
>> __ARM_mve_coerce(__p2, uint16x8_t), p3), \
>>> + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]:
>> __arm_vmaxq_x_u32 (__ARM_mve_coerce(__p1, uint32x4_t),
>> __ARM_mve_coerce(__p2, uint32x4_t), p3));})
>>> +
>>> #define __arm_vminq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
>>> __typeof(p2) __p2 = (p2); \
>>> _Generic( (int
>> (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
>>> @@ -41810,22 +41635,6 @@ extern void *__ARM_undef;
>>> int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
>> __arm_vmlaldavxq_p_s16 (__ARM_mve_coerce(__p0, int16x8_t),
>> __ARM_mve_coerce(__p1, int16x8_t), p2), \
>>> int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
>> __arm_vmlaldavxq_p_s32 (__ARM_mve_coerce(__p0, int32x4_t),
>> __ARM_mve_coerce(__p1, int32x4_t), p2));})
>>>
>>> -#define __arm_vmlsdavaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
>>> - __typeof(p1) __p1 = (p1); \
>>> - __typeof(p2) __p2 = (p2); \
>>> - _Generic( (int
>> (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
>>> - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
>> __arm_vmlsdavaq_s8(__p0, __ARM_mve_coerce(__p1, int8x16_t),
>> __ARM_mve_coerce(__p2, int8x16_t)), \
>>> - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
>> __arm_vmlsdavaq_s16(__p0, __ARM_mve_coerce(__p1, int16x8_t),
>> __ARM_mve_coerce(__p2, int16x8_t)), \
>>> - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
>> __arm_vmlsdavaq_s32(__p0, __ARM_mve_coerce(__p1, int32x4_t),
>> __ARM_mve_coerce(__p2, int32x4_t)));})
>>> -
>>> -#define __arm_vmlsdavaxq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
>>> - __typeof(p1) __p1 = (p1); \
>>> - __typeof(p2) __p2 = (p2); \
>>> - _Generic( (int
>> (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
>>> - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
>> __arm_vmlsdavaxq_s8(__p0, __ARM_mve_coerce(__p1, int8x16_t),
>> __ARM_mve_coerce(__p2, int8x16_t)), \
>>> - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
>> __arm_vmlsdavaxq_s16(__p0, __ARM_mve_coerce(__p1, int16x8_t),
>> __ARM_mve_coerce(__p2, int16x8_t)), \
>>> - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
>> __arm_vmlsdavaxq_s32(__p0, __ARM_mve_coerce(__p1, int32x4_t),
>> __ARM_mve_coerce(__p2, int32x4_t)));})
>>> -
>>> #define __arm_vmlsdavq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
>>> __typeof(p1) __p1 = (p1); \
>>> _Generic( (int
>> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
>>> @@ -41833,13 +41642,6 @@ extern void *__ARM_undef;
>>> int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
>> __arm_vmlsdavq_s16 (__ARM_mve_coerce(__p0, int16x8_t),
>> __ARM_mve_coerce(__p1, int16x8_t)), \
>>> int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
>> __arm_vmlsdavq_s32 (__ARM_mve_coerce(__p0, int32x4_t),
>> __ARM_mve_coerce(__p1, int32x4_t)));})
>>>
>>> -#define __arm_vmlsdavq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
>>> - __typeof(p1) __p1 = (p1); \
>>> - _Generic( (int
>> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
>>> - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
>> __arm_vmlsdavq_p_s8 (__ARM_mve_coerce(__p0, int8x16_t),
>> __ARM_mve_coerce(__p1, int8x16_t), p2), \
>>> - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
>> __arm_vmlsdavq_p_s16 (__ARM_mve_coerce(__p0, int16x8_t),
>> __ARM_mve_coerce(__p1, int16x8_t), p2), \
>>> - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
>> __arm_vmlsdavq_p_s32 (__ARM_mve_coerce(__p0, int32x4_t),
>> __ARM_mve_coerce(__p1, int32x4_t), p2));})
>>> -
>>> #define __arm_vmlsdavxq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
>>> __typeof(p1) __p1 = (p1); \
>>> _Generic( (int
>> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
>>> @@ -41847,13 +41649,6 @@ extern void *__ARM_undef;
>>> int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
>> __arm_vmlsdavxq_s16 (__ARM_mve_coerce(__p0, int16x8_t),
>> __ARM_mve_coerce(__p1, int16x8_t)), \
>>> int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
>> __arm_vmlsdavxq_s32 (__ARM_mve_coerce(__p0, int32x4_t),
>> __ARM_mve_coerce(__p1, int32x4_t)));})
>>>
>>> -#define __arm_vmlsdavxq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
>>> - __typeof(p1) __p1 = (p1); \
>>> - _Generic( (int
>> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
>>> - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
>> __arm_vmlsdavxq_p_s8 (__ARM_mve_coerce(__p0, int8x16_t),
>> __ARM_mve_coerce(__p1, int8x16_t), p2), \
>>> - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
>> __arm_vmlsdavxq_p_s16 (__ARM_mve_coerce(__p0, int16x8_t),
>> __ARM_mve_coerce(__p1, int16x8_t), p2), \
>>> - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
>> __arm_vmlsdavxq_p_s32 (__ARM_mve_coerce(__p0, int32x4_t),
>> __ARM_mve_coerce(__p1, int32x4_t), p2));})
>>> -
>>> #define __arm_vmlsldavaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
>>> __typeof(p1) __p1 = (p1); \
>>> __typeof(p2) __p2 = (p2); \
>>> @@ -41948,13 +41743,6 @@ extern void *__ARM_undef;
>>> int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]:
>> __arm_vmulltq_poly_x_p8 (__ARM_mve_coerce(__p1, uint8x16_t),
>> __ARM_mve_coerce(__p2, uint8x16_t), p3), \
>>> int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]:
>> __arm_vmulltq_poly_x_p16 (__ARM_mve_coerce(__p1, uint16x8_t),
>> __ARM_mve_coerce(__p2, uint16x8_t), p3));})
>>>
>>> -#define __arm_vrmlaldavhaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
>>> - __typeof(p1) __p1 = (p1); \
>>> - __typeof(p2) __p2 = (p2); \
>>> - _Generic( (int
>> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typ
>> eid(__p2)])0, \
>>> - int
>> (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t][__ARM_mve_typ
>> e_int32x4_t]: __arm_vrmlaldavhaq_s32 (__ARM_mve_coerce(__p0, int64_t),
>> __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)),
>> \
>>> - int
>> (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t][__ARM_mve_ty
>> pe_uint32x4_t]: __arm_vrmlaldavhaq_u32 (__ARM_mve_coerce(__p0,
>> uint64_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2,
>> uint32x4_t)));})
>>> -
>>> #define __arm_vrmlaldavhaxq(p0,p1,p2)
>> __arm_vrmlaldavhaxq_s32(p0,p1,p2)
>>>
>>> #define __arm_vrmlaldavhq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
>>> @@ -41994,35 +41782,15 @@ extern void *__ARM_undef;
>>> int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]:
>> __arm_vstrbq_u16 (__ARM_mve_coerce(p0, uint8_t *),
>> __ARM_mve_coerce(__p1, uint16x8_t)), \
>>> int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]:
>> __arm_vstrbq_u32 (__ARM_mve_coerce(p0, uint8_t *),
>> __ARM_mve_coerce(__p1, uint32x4_t)));})
>>>
>>> -#define __arm_vstrbq_p(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
>>> - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0,
>> \
>>> - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16_t]:
>> __arm_vstrbq_p_s8 (__ARM_mve_coerce(p0, int8_t *),
>> __ARM_mve_coerce(__p1, int8x16_t), p2), \
>>> - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int16x8_t]:
>> __arm_vstrbq_p_s16 (__ARM_mve_coerce(p0, int8_t *),
>> __ARM_mve_coerce(__p1, int16x8_t), p2), \
>>> - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int32x4_t]:
>> __arm_vstrbq_p_s32 (__ARM_mve_coerce(p0, int8_t *),
>> __ARM_mve_coerce(__p1, int32x4_t), p2), \
>>> - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]:
>> __arm_vstrbq_p_u8 (__ARM_mve_coerce(p0, uint8_t *),
>> __ARM_mve_coerce(__p1, uint8x16_t), p2), \
>>> - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]:
>> __arm_vstrbq_p_u16 (__ARM_mve_coerce(p0, uint8_t *),
>> __ARM_mve_coerce(__p1, uint16x8_t), p2), \
>>> - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]:
>> __arm_vstrbq_p_u32 (__ARM_mve_coerce(p0, uint8_t *),
>> __ARM_mve_coerce(__p1, uint32x4_t), p2));})
>>> -
>>> -#define __arm_vstrbq_scatter_offset(p0,p1,p2) ({ __typeof(p1) __p1 =
>> (p1); \
>>> - __typeof(p2) __p2 = (p2); \
>>> - _Generic( (int
>> (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typei
>> d(__p2)])0, \
>>> - int
>> (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_m
>> ve_type_int8x16_t]: __arm_vstrbq_scatter_offset_s8
>> (__ARM_mve_coerce(p0, int8_t *), __ARM_mve_coerce(__p1, uint8x16_t),
>> __ARM_mve_coerce(__p2, int8x16_t)), \
>>> - int
>> (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_m
>> ve_type_int16x8_t]: __arm_vstrbq_scatter_offset_s16
>> (__ARM_mve_coerce(p0, int8_t *), __ARM_mve_coerce(__p1, uint16x8_t),
>> __ARM_mve_coerce(__p2, int16x8_t)), \
>>> - int
>> (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_m
>> ve_type_int32x4_t]: __arm_vstrbq_scatter_offset_s32
>> (__ARM_mve_coerce(p0, int8_t *), __ARM_mve_coerce(__p1, uint32x4_t),
>> __ARM_mve_coerce(__p2, int32x4_t)), \
>>> - int
>> (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_
>> mve_type_uint8x16_t]: __arm_vstrbq_scatter_offset_u8
>> (__ARM_mve_coerce(p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t),
>> __ARM_mve_coerce(__p2, uint8x16_t)), \
>>> - int
>> (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_
>> mve_type_uint16x8_t]: __arm_vstrbq_scatter_offset_u16
>> (__ARM_mve_coerce(p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t),
>> __ARM_mve_coerce(__p2, uint16x8_t)), \
>>> - int
>> (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_
>> mve_type_uint32x4_t]: __arm_vstrbq_scatter_offset_u32
>> (__ARM_mve_coerce(p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t),
>> __ARM_mve_coerce(__p2, uint32x4_t)));})
>>> -
>>> -
>>> -#define __arm_vstrbq_scatter_offset_p(p0,p1,p2,p3) ({__typeof(p1) __p1
>> = (p1); \
>>> - __typeof(p2) __p2 = (p2); \
>>> - _Generic( (int
>> (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typei
>> d(__p2)])0, \
>>> - int
>> (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_m
>> ve_type_int8x16_t]: __arm_vstrbq_scatter_offset_p_s8
>> (__ARM_mve_coerce(p0, int8_t *), __ARM_mve_coerce(__p1, uint8x16_t),
>> __ARM_mve_coerce(__p2, int8x16_t), p3), \
>>> - int
>> (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_m
>> ve_type_int16x8_t]: __arm_vstrbq_scatter_offset_p_s16
>> (__ARM_mve_coerce(p0, int8_t *), __ARM_mve_coerce(__p1, uint16x8_t),
>> __ARM_mve_coerce(__p2, int16x8_t), p3), \
>>> - int
>> (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_m
>> ve_type_int32x4_t]: __arm_vstrbq_scatter_offset_p_s32
>> (__ARM_mve_coerce(p0, int8_t *), __ARM_mve_coerce(__p1, uint32x4_t),
>> __ARM_mve_coerce(__p2, int32x4_t), p3), \
>>> - int
>> (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_
>> mve_type_uint8x16_t]: __arm_vstrbq_scatter_offset_p_u8
>> (__ARM_mve_coerce(p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t),
>> __ARM_mve_coerce(__p2, uint8x16_t), p3), \
>>> - int
>> (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_
>> mve_type_uint16x8_t]: __arm_vstrbq_scatter_offset_p_u16
>> (__ARM_mve_coerce(p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t),
>> __ARM_mve_coerce(__p2, uint16x8_t), p3), \
>>> - int
>> (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_
>> mve_type_uint32x4_t]: __arm_vstrbq_scatter_offset_p_u32
>> (__ARM_mve_coerce(p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t),
>> __ARM_mve_coerce(__p2, uint32x4_t), p3));})
>>> +#define __arm_vstrbq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
>>> + __typeof(p1) __p1 = (p1); \
>>> + _Generic( (int
>> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
>>> + int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16_t]:
>> __arm_vstrbq_p_s8 (__ARM_mve_coerce(__p0, int8_t *),
>> __ARM_mve_coerce(__p1, int8x16_t), p2), \
>>> + int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int16x8_t]:
>> __arm_vstrbq_p_s16 (__ARM_mve_coerce(__p0, int8_t *),
>> __ARM_mve_coerce(__p1, int16x8_t), p2), \
>>> + int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int32x4_t]:
>> __arm_vstrbq_p_s32 (__ARM_mve_coerce(__p0, int8_t *),
>> __ARM_mve_coerce(__p1, int32x4_t), p2), \
>>> + int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]:
>> __arm_vstrbq_p_u8 (__ARM_mve_coerce(__p0, uint8_t *),
>> __ARM_mve_coerce(__p1, uint8x16_t), p2), \
>>> + int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]:
>> __arm_vstrbq_p_u16 (__ARM_mve_coerce(__p0, uint8_t *),
>> __ARM_mve_coerce(__p1, uint16x8_t), p2), \
>>> + int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]:
>> __arm_vstrbq_p_u32 (__ARM_mve_coerce(__p0, uint8_t *),
>> __ARM_mve_coerce(__p1, uint32x4_t), p2));})
>>>
>>> #define __arm_vstrdq_scatter_base(p0,p1,p2) ({ __typeof(p2) __p2 = (p2);
>> \
>>> _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
>>> @@ -42034,29 +41802,65 @@ extern void *__ARM_undef;
>>> int (*)[__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_base_p_s64
>> (p0, p1, __ARM_mve_coerce(__p2, int64x2_t), p3), \
>>> int (*)[__ARM_mve_type_uint64x2_t]:
>> __arm_vstrdq_scatter_base_p_u64 (p0, p1, __ARM_mve_coerce(__p2,
>> uint64x2_t), p3));})
>>>
>>> -#define __arm_vstrdq_scatter_offset(p0,p1,p2) ({ __typeof(p1) __p1 =
>> (p1); \
>>> +#define __arm_vrmlaldavhaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
>>> + __typeof(p1) __p1 = (p1); \
>>> __typeof(p2) __p2 = (p2); \
>>> - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0,
>> \
>>> - int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]:
>> __arm_vstrdq_scatter_offset_s64 (__ARM_mve_coerce(p0, int64_t *), __p1,
>> __ARM_mve_coerce(__p2, int64x2_t)), \
>>> - int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]:
>> __arm_vstrdq_scatter_offset_u64 (__ARM_mve_coerce(p0, uint64_t *), __p1,
>> __ARM_mve_coerce(__p2, uint64x2_t)));})
>>> + _Generic( (int
>> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typ
>> eid(__p2)])0, \
>>> + int
>> (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t][__ARM_mve_typ
>> e_int32x4_t]: __arm_vrmlaldavhaq_s32 (__ARM_mve_coerce(__p0, int64_t),
>> __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)),
>> \
>>> + int
>> (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t][__ARM_mve_ty
>> pe_uint32x4_t]: __arm_vrmlaldavhaq_u32 (__ARM_mve_coerce(__p0,
>> uint64_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2,
>> uint32x4_t)));})
>>>
>>> -#define __arm_vstrdq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p1) __p1
>> = (p1); \
>>> +#define __arm_vrmlaldavhaq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0);
>> \
>>> + __typeof(p1) __p1 = (p1); \
>>> __typeof(p2) __p2 = (p2); \
>>> - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0,
>> \
>>> - int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]:
>> __arm_vstrdq_scatter_offset_p_s64 (__ARM_mve_coerce(p0, int64_t *),
>> __p1, __ARM_mve_coerce(__p2, int64x2_t), p3), \
>>> - int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]:
>> __arm_vstrdq_scatter_offset_p_u64 (__ARM_mve_coerce(p0, uint64_t *),
>> __p1, __ARM_mve_coerce(__p2, uint64x2_t), p3));})
>>> + _Generic( (int
>> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typ
>> eid(__p2)])0, \
>>> + int
>> (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t][__ARM_mve_typ
>> e_int32x4_t]: __arm_vrmlaldavhaq_p_s32 (__ARM_mve_coerce(__p0,
>> int64_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2,
>> int32x4_t), p3), \
>>> + int
>> (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t][__ARM_mve_ty
>> pe_uint32x4_t]: __arm_vrmlaldavhaq_p_u32 (__ARM_mve_coerce(__p0,
>> uint64_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2,
>> uint32x4_t), p3));})
>>>
>>> -#define __arm_vstrdq_scatter_shifted_offset(p0,p1,p2) ({ __typeof(p1)
>> __p1 = (p1); \
>>> +#define __arm_vstrbq_scatter_offset(p0,p1,p2) ({ __typeof(p0) __p0 =
>> (p0); \
>>> + __typeof(p1) __p1 = (p1); \
>>> __typeof(p2) __p2 = (p2); \
>>> - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0,
>> \
>>> - int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]:
>> __arm_vstrdq_scatter_shifted_offset_s64 (__ARM_mve_coerce(p0, int64_t
>> *), __p1, __ARM_mve_coerce(__p2, int64x2_t)), \
>>> - int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]:
>> __arm_vstrdq_scatter_shifted_offset_u64 (__ARM_mve_coerce(p0, uint64_t
>> *), __p1, __ARM_mve_coerce(__p2, uint64x2_t)));})
>>> + _Generic( (int
>> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typ
>> eid(__p2)])0, \
>>> + int
>> (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_m
>> ve_type_int8x16_t]: __arm_vstrbq_scatter_offset_s8
>> (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint8x16_t),
>> __ARM_mve_coerce(__p2, int8x16_t)), \
>>> + int
>> (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_m
>> ve_type_int16x8_t]: __arm_vstrbq_scatter_offset_s16
>> (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint16x8_t),
>> __ARM_mve_coerce(__p2, int16x8_t)), \
>>> + int
>> (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_m
>> ve_type_int32x4_t]: __arm_vstrbq_scatter_offset_s32
>> (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint32x4_t),
>> __ARM_mve_coerce(__p2, int32x4_t)), \
>>> + int
>> (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_
>> mve_type_uint8x16_t]: __arm_vstrbq_scatter_offset_u8
>> (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1,
>> uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t)), \
>>> + int
>> (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_
>> mve_type_uint16x8_t]: __arm_vstrbq_scatter_offset_u16
>> (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1,
>> uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t)), \
>>> + int
>> (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_
>> mve_type_uint32x4_t]: __arm_vstrbq_scatter_offset_u32
>> (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1,
>> uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t)));})
>>>
>>> -#define __arm_vstrdq_scatter_shifted_offset_p(p0,p1,p2,p3)
>> ({ __typeof(p1) __p1 = (p1); \
>>> +#define __arm_vstrbq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p0) __p0
>> = (p0); \
>>> + __typeof(p1) __p1 = (p1); \
>>> __typeof(p2) __p2 = (p2); \
>>> - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0,
>> \
>>> - int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]:
>> __arm_vstrdq_scatter_shifted_offset_p_s64 (__ARM_mve_coerce(p0,
>> int64_t *), __p1, __ARM_mve_coerce(__p2, int64x2_t), p3), \
>>> - int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]:
>> __arm_vstrdq_scatter_shifted_offset_p_u64 (__ARM_mve_coerce(p0,
>> uint64_t *), __p1, __ARM_mve_coerce(__p2, uint64x2_t), p3));})
>>> + _Generic( (int
>> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typ
>> eid(__p2)])0, \
>>> + int
>> (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_m
>> ve_type_int8x16_t]: __arm_vstrbq_scatter_offset_p_s8
>> (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint8x16_t),
>> __ARM_mve_coerce(__p2, int8x16_t), p3), \
>>> + int
>> (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_m
>> ve_type_int16x8_t]: __arm_vstrbq_scatter_offset_p_s16
>> (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint16x8_t),
>> __ARM_mve_coerce(__p2, int16x8_t), p3), \
>>> + int
>> (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_m
>> ve_type_int32x4_t]: __arm_vstrbq_scatter_offset_p_s32
>> (__ARM_mve_coerce(__p0, int8_t *), __ARM_mve_coerce(__p1, uint32x4_t),
>> __ARM_mve_coerce(__p2, int32x4_t), p3), \
>>> + int
>> (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_
>> mve_type_uint8x16_t]: __arm_vstrbq_scatter_offset_p_u8
>> (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1,
>> uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
>>> + int
>> (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_
>> mve_type_uint16x8_t]: __arm_vstrbq_scatter_offset_p_u16
>> (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1,
>> uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
>>> + int
>> (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_
>> mve_type_uint32x4_t]: __arm_vstrbq_scatter_offset_p_u32
>> (__ARM_mve_coerce(__p0, uint8_t *), __ARM_mve_coerce(__p1,
>> uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
>>> +
>>> +#define __arm_vstrdq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p0) __p0
>> = (p0); \
>>> + __typeof(p2) __p2 = (p2); \
>>> + _Generic( (int
>> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
>>> + int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]:
>> __arm_vstrdq_scatter_offset_p_s64 (__ARM_mve_coerce(__p0, int64_t *),
>> p1, __ARM_mve_coerce(__p2, int64x2_t), p3), \
>>> + int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]:
>> __arm_vstrdq_scatter_offset_p_u64 (__ARM_mve_coerce(__p0, uint64_t *),
>> p1, __ARM_mve_coerce(__p2, uint64x2_t), p3));})
>>> +
>>> +#define __arm_vstrdq_scatter_offset(p0,p1,p2) ({ __typeof(p0) __p0 =
>> (p0); \
>>> + __typeof(p2) __p2 = (p2); \
>>> + _Generic( (int
>> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
>>> + int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]:
>> __arm_vstrdq_scatter_offset_s64 (__ARM_mve_coerce(__p0, int64_t *), p1,
>> __ARM_mve_coerce(__p2, int64x2_t)), \
>>> + int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]:
>> __arm_vstrdq_scatter_offset_u64 (__ARM_mve_coerce(__p0, uint64_t *), p1,
>> __ARM_mve_coerce(__p2, uint64x2_t)));})
>>> +
>>> +#define __arm_vstrdq_scatter_shifted_offset_p(p0,p1,p2,p3)
>> ({ __typeof(p0) __p0 = (p0); \
>>> + __typeof(p2) __p2 = (p2); \
>>> + _Generic( (int
>> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
>>> + int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]:
>> __arm_vstrdq_scatter_shifted_offset_p_s64 (__ARM_mve_coerce(__p0,
>> int64_t *), p1, __ARM_mve_coerce(__p2, int64x2_t), p3), \
>>> + int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]:
>> __arm_vstrdq_scatter_shifted_offset_p_u64 (__ARM_mve_coerce(__p0,
>> uint64_t *), p1, __ARM_mve_coerce(__p2, uint64x2_t), p3));})
>>> +
>>> +#define __arm_vstrdq_scatter_shifted_offset(p0,p1,p2) ({ __typeof(p0)
>> __p0 = (p0); \
>>> + __typeof(p2) __p2 = (p2); \
>>> + _Generic( (int
>> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
>>> + int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]:
>> __arm_vstrdq_scatter_shifted_offset_s64 (__ARM_mve_coerce(__p0,
>> int64_t *), p1, __ARM_mve_coerce(__p2, int64x2_t)), \
>>> + int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]:
>> __arm_vstrdq_scatter_shifted_offset_u64 (__ARM_mve_coerce(__p0,
>> uint64_t *), p1, __ARM_mve_coerce(__p2, uint64x2_t)));})
>>>
>>> #endif /* __cplusplus */
>>> #endif /* __ARM_FEATURE_MVE */
>>>
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2021-05-10 14:18 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-05 9:56 [GCC][PATCH] arm: Remove duplicate definitions from arm_mve.h (pr100419) Srinath Parvathaneni
2021-05-05 10:15 ` Richard Earnshaw
2021-05-05 12:39 ` Srinath Parvathaneni
2021-05-10 14:18 ` Richard Earnshaw
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).