public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-2492] aarch64: Use memcpy to copy vector tables in vst2[q] intrinsics
@ 2021-07-23 11:18 Jonathan Wright
  0 siblings, 0 replies; only message in thread
From: Jonathan Wright @ 2021-07-23 11:18 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:03148b8e508ea09ce62259ffb95844182c0b90c6

commit r12-2492-g03148b8e508ea09ce62259ffb95844182c0b90c6
Author: Jonathan Wright <jonathan.wright@arm.com>
Date:   Wed Jul 21 12:37:01 2021 +0100

    aarch64: Use memcpy to copy vector tables in vst2[q] intrinsics
    
    Use __builtin_memcpy to copy vector structures instead of building
    a new opaque structure one vector at a time in each of the vst2[q]
    Neon intrinsics in arm_neon.h. This simplifies the header file and
    also improves code generation - superfluous move instructions were
    emitted for every register extraction/set in this additional
    structure.
    
    Add new code generation tests to verify that superfluous move
    instructions are no longer generated for the vst2q intrinsics.
    
    gcc/ChangeLog:
    
    2021-07-21  Jonathan Wrightt  <jonathan.wright@arm.com>
    
            * config/aarch64/arm_neon.h (vst2_s64): Use __builtin_memcpy
            instead of constructing __builtin_aarch64_simd_oi one vector
            at a time.
            (vst2_u64): Likewise.
            (vst2_f64): Likewise.
            (vst2_s8): Likewise.
            (vst2_p8): Likewise.
            (vst2_s16): Likewise.
            (vst2_p16): Likewise.
            (vst2_s32): Likewise.
            (vst2_u8): Likewise.
            (vst2_u16): Likewise.
            (vst2_u32): Likewise.
            (vst2_f16): Likewise.
            (vst2_f32): Likewise.
            (vst2_p64): Likewise.
            (vst2q_s8): Likewise.
            (vst2q_p8): Likewise.
            (vst2q_s16): Likewise.
            (vst2q_p16): Likewise.
            (vst2q_s32): Likewise.
            (vst2q_s64): Likewise.
            (vst2q_u8): Likewise.
            (vst2q_u16): Likewise.
            (vst2q_u32): Likewise.
            (vst2q_u64): Likewise.
            (vst2q_f16): Likewise.
            (vst2q_f32): Likewise.
            (vst2q_f64): Likewise.
            (vst2q_p64): Likewise.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/aarch64/vector_structure_intrinsics.c: Add new
            tests.

Diff:
---
 gcc/config/aarch64/arm_neon.h                      | 88 +++++++---------------
 .../aarch64/vector_structure_intrinsics.c          | 16 ++++
 2 files changed, 44 insertions(+), 60 deletions(-)

diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index fde321e328a..0e4ab354a77 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -27216,8 +27216,7 @@ vst2_s64 (int64_t * __a, int64x1x2_t __val)
   int64x2x2_t __temp;
   __temp.val[0] = vcombine_s64 (__val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
   __temp.val[1] = vcombine_s64 (__val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[1], 1);
+  __builtin_memcpy (&__o, &__temp, sizeof (__temp));
   __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
 }
 
@@ -27229,8 +27228,7 @@ vst2_u64 (uint64_t * __a, uint64x1x2_t __val)
   uint64x2x2_t __temp;
   __temp.val[0] = vcombine_u64 (__val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
   __temp.val[1] = vcombine_u64 (__val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[1], 1);
+  __builtin_memcpy (&__o, &__temp, sizeof (__temp));
   __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
 }
 
@@ -27242,8 +27240,7 @@ vst2_f64 (float64_t * __a, float64x1x2_t __val)
   float64x2x2_t __temp;
   __temp.val[0] = vcombine_f64 (__val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
   __temp.val[1] = vcombine_f64 (__val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __temp.val[1], 1);
+  __builtin_memcpy (&__o, &__temp, sizeof (__temp));
   __builtin_aarch64_st2df ((__builtin_aarch64_simd_df *) __a, __o);
 }
 
@@ -27255,8 +27252,7 @@ vst2_s8 (int8_t * __a, int8x8x2_t __val)
   int8x16x2_t __temp;
   __temp.val[0] = vcombine_s8 (__val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
   __temp.val[1] = vcombine_s8 (__val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1);
+  __builtin_memcpy (&__o, &__temp, sizeof (__temp));
   __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
 }
 
@@ -27268,8 +27264,7 @@ vst2_p8 (poly8_t * __a, poly8x8x2_t __val)
   poly8x16x2_t __temp;
   __temp.val[0] = vcombine_p8 (__val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
   __temp.val[1] = vcombine_p8 (__val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1);
+  __builtin_memcpy (&__o, &__temp, sizeof (__temp));
   __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
 }
 
@@ -27281,8 +27276,7 @@ vst2_s16 (int16_t * __a, int16x4x2_t __val)
   int16x8x2_t __temp;
   __temp.val[0] = vcombine_s16 (__val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
   __temp.val[1] = vcombine_s16 (__val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[1], 1);
+  __builtin_memcpy (&__o, &__temp, sizeof (__temp));
   __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
 }
 
@@ -27294,8 +27288,7 @@ vst2_p16 (poly16_t * __a, poly16x4x2_t __val)
   poly16x8x2_t __temp;
   __temp.val[0] = vcombine_p16 (__val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
   __temp.val[1] = vcombine_p16 (__val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[1], 1);
+  __builtin_memcpy (&__o, &__temp, sizeof (__temp));
   __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
 }
 
@@ -27307,8 +27300,7 @@ vst2_s32 (int32_t * __a, int32x2x2_t __val)
   int32x4x2_t __temp;
   __temp.val[0] = vcombine_s32 (__val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
   __temp.val[1] = vcombine_s32 (__val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[1], 1);
+  __builtin_memcpy (&__o, &__temp, sizeof (__temp));
   __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
 }
 
@@ -27320,8 +27312,7 @@ vst2_u8 (uint8_t * __a, uint8x8x2_t __val)
   uint8x16x2_t __temp;
   __temp.val[0] = vcombine_u8 (__val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
   __temp.val[1] = vcombine_u8 (__val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1);
+  __builtin_memcpy (&__o, &__temp, sizeof (__temp));
   __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
 }
 
@@ -27333,8 +27324,7 @@ vst2_u16 (uint16_t * __a, uint16x4x2_t __val)
   uint16x8x2_t __temp;
   __temp.val[0] = vcombine_u16 (__val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
   __temp.val[1] = vcombine_u16 (__val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[1], 1);
+  __builtin_memcpy (&__o, &__temp, sizeof (__temp));
   __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
 }
 
@@ -27346,8 +27336,7 @@ vst2_u32 (uint32_t * __a, uint32x2x2_t __val)
   uint32x4x2_t __temp;
   __temp.val[0] = vcombine_u32 (__val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
   __temp.val[1] = vcombine_u32 (__val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[1], 1);
+  __builtin_memcpy (&__o, &__temp, sizeof (__temp));
   __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
 }
 
@@ -27359,8 +27348,7 @@ vst2_f16 (float16_t * __a, float16x4x2_t __val)
   float16x8x2_t __temp;
   __temp.val[0] = vcombine_f16 (__val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0)));
   __temp.val[1] = vcombine_f16 (__val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv8hf (__o, __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv8hf (__o, __temp.val[1], 1);
+  __builtin_memcpy (&__o, &__temp, sizeof (__temp));
   __builtin_aarch64_st2v4hf (__a, __o);
 }
 
@@ -27372,8 +27360,7 @@ vst2_f32 (float32_t * __a, float32x2x2_t __val)
   float32x4x2_t __temp;
   __temp.val[0] = vcombine_f32 (__val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
   __temp.val[1] = vcombine_f32 (__val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __temp.val[1], 1);
+  __builtin_memcpy (&__o, &__temp, sizeof (__temp));
   __builtin_aarch64_st2v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
 }
 
@@ -27385,10 +27372,7 @@ vst2_p64 (poly64_t * __a, poly64x1x2_t __val)
   poly64x2x2_t __temp;
   __temp.val[0] = vcombine_p64 (__val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0)));
   __temp.val[1] = vcombine_p64 (__val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv2di_ssps (__o,
-					       (poly64x2_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv2di_ssps (__o,
-					       (poly64x2_t) __temp.val[1], 1);
+  __builtin_memcpy (&__o, &__temp, sizeof (__temp));
   __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
 }
 
@@ -27397,8 +27381,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst2q_s8 (int8_t * __a, int8x16x2_t __val)
 {
   __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[1], 1);
+  __builtin_memcpy (&__o, &__val, sizeof (__val));
   __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
 }
 
@@ -27407,8 +27390,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst2q_p8 (poly8_t * __a, poly8x16x2_t __val)
 {
   __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[1], 1);
+  __builtin_memcpy (&__o, &__val, sizeof (__val));
   __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
 }
 
@@ -27417,8 +27399,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst2q_s16 (int16_t * __a, int16x8x2_t __val)
 {
   __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[1], 1);
+  __builtin_memcpy (&__o, &__val, sizeof (__val));
   __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
 }
 
@@ -27427,8 +27408,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst2q_p16 (poly16_t * __a, poly16x8x2_t __val)
 {
   __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[1], 1);
+  __builtin_memcpy (&__o, &__val, sizeof (__val));
   __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
 }
 
@@ -27437,8 +27417,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst2q_s32 (int32_t * __a, int32x4x2_t __val)
 {
   __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[1], 1);
+  __builtin_memcpy (&__o, &__val, sizeof (__val));
   __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
 }
 
@@ -27447,8 +27426,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst2q_s64 (int64_t * __a, int64x2x2_t __val)
 {
   __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[1], 1);
+  __builtin_memcpy (&__o, &__val, sizeof (__val));
   __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
 }
 
@@ -27457,8 +27435,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst2q_u8 (uint8_t * __a, uint8x16x2_t __val)
 {
   __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[1], 1);
+  __builtin_memcpy (&__o, &__val, sizeof (__val));
   __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
 }
 
@@ -27467,8 +27444,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst2q_u16 (uint16_t * __a, uint16x8x2_t __val)
 {
   __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[1], 1);
+  __builtin_memcpy (&__o, &__val, sizeof (__val));
   __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
 }
 
@@ -27477,8 +27453,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst2q_u32 (uint32_t * __a, uint32x4x2_t __val)
 {
   __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[1], 1);
+  __builtin_memcpy (&__o, &__val, sizeof (__val));
   __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
 }
 
@@ -27487,8 +27462,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst2q_u64 (uint64_t * __a, uint64x2x2_t __val)
 {
   __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[1], 1);
+  __builtin_memcpy (&__o, &__val, sizeof (__val));
   __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
 }
 
@@ -27497,8 +27471,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst2q_f16 (float16_t * __a, float16x8x2_t __val)
 {
   __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv8hf (__o, __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv8hf (__o, __val.val[1], 1);
+  __builtin_memcpy (&__o, &__val, sizeof (__val));
   __builtin_aarch64_st2v8hf (__a, __o);
 }
 
@@ -27507,8 +27480,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst2q_f32 (float32_t * __a, float32x4x2_t __val)
 {
   __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __val.val[1], 1);
+  __builtin_memcpy (&__o, &__val, sizeof (__val));
   __builtin_aarch64_st2v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
 }
 
@@ -27517,8 +27489,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst2q_f64 (float64_t * __a, float64x2x2_t __val)
 {
   __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __val.val[1], 1);
+  __builtin_memcpy (&__o, &__val, sizeof (__val));
   __builtin_aarch64_st2v2df ((__builtin_aarch64_simd_df *) __a, __o);
 }
 
@@ -27527,10 +27498,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst2q_p64 (poly64_t * __a, poly64x2x2_t __val)
 {
   __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv2di_ssps (__o,
-					       (poly64x2_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv2di_ssps (__o,
-					       (poly64x2_t) __val.val[1], 1);
+  __builtin_memcpy (&__o, &__val, sizeof (__val));
   __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
 }
 
diff --git a/gcc/testsuite/gcc.target/aarch64/vector_structure_intrinsics.c b/gcc/testsuite/gcc.target/aarch64/vector_structure_intrinsics.c
index 664de1f971d..5a6663a72e5 100644
--- a/gcc/testsuite/gcc.target/aarch64/vector_structure_intrinsics.c
+++ b/gcc/testsuite/gcc.target/aarch64/vector_structure_intrinsics.c
@@ -103,6 +103,21 @@ TEST_STX (vst4q, uint64x2x4_t, uint64_t*, u64);
 TEST_STX (vst4q, float64x2x4_t, float64_t*, f64);
 TEST_STX (vst4q, poly64x2x4_t, poly64_t*, p64);
 
+TEST_STX (vst2q, int8x16x2_t, int8_t*, s8);
+TEST_STX (vst2q, uint8x16x2_t, uint8_t*, u8);
+TEST_STX (vst2q, poly8x16x2_t, poly8_t*, p8);
+TEST_STX (vst2q, int16x8x2_t, int16_t*, s16);
+TEST_STX (vst2q, uint16x8x2_t, uint16_t*, u16);
+TEST_STX (vst2q, poly16x8x2_t, poly16_t*, p16);
+TEST_STX (vst2q, float16x8x2_t, float16_t*, f16);
+TEST_STX (vst2q, int32x4x2_t, int32_t*, s32);
+TEST_STX (vst2q, uint32x4x2_t, uint32_t*, u32);
+TEST_STX (vst2q, float32x4x2_t, float32_t*, f32);
+TEST_STX (vst2q, int64x2x2_t, int64_t*, s64);
+TEST_STX (vst2q, uint64x2x2_t, uint64_t*, u64);
+TEST_STX (vst2q, float64x2x2_t, float64_t*, f64);
+TEST_STX (vst2q, poly64x2x2_t, poly64_t*, p64);
+
 #define TEST_ST3(name, tbltype, ptrtype, ts) \
   void test_ ## name ## _ ## ts (ptrtype a, int8x8_t dummy, tbltype b) \
 	{ \
@@ -130,3 +145,4 @@ TEST_ST3 (vst3q, poly64x2x3_t, poly64_t*, p64);
 /* { dg-final { scan-assembler-times "tbx\\t" 18} }  */
 /* { dg-final { scan-assembler-times "st4\\t" 14} }  */
 /* { dg-final { scan-assembler-times "st3\\t" 14} }  */
+/* { dg-final { scan-assembler-times "st2\\t" 14} }  */


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-07-23 11:18 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-23 11:18 [gcc r12-2492] aarch64: Use memcpy to copy vector tables in vst2[q] intrinsics Jonathan Wright

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).