public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [AARCH64] implements neon vld1_*_x2 intrinsics
@ 2017-11-07  4:56 Kugan Vivekanandarajah
  2017-11-15  0:52 ` Kugan Vivekanandarajah
  2017-11-15 10:04 ` Kyrill Tkachov
  0 siblings, 2 replies; 5+ messages in thread
From: Kugan Vivekanandarajah @ 2017-11-07  4:56 UTC (permalink / raw)
  To: gcc-patches, James Greenhalgh, Richard Earnshaw

[-- Attachment #1: Type: text/plain, Size: 1470 bytes --]

Hi,

Attached patch implements the  vld1_*_x2 intrinsics as defined by the
neon document.

Bootstrap for the latest patch is ongoing on aarch64-linux-gnu. Is
this OK for trunk if no regressions?

Thanks,
Kugan

gcc/ChangeLog:

2017-11-06  Kugan Vivekanandarajah  <kuganv@linaro.org>

    * config/aarch64/aarch64-simd.md (aarch64_ld1x2<VQ:mode>): New.
    (aarch64_ld1x2<VDC:mode>): Likewise.
    (aarch64_simd_ld1<mode>_x2): Likewise.
    (aarch64_simd_ld1<mode>_x2): Likewise.
    * config/aarch64/arm_neon.h (vld1_u8_x2): New.
    (vld1_s8_x2): Likewise.
    (vld1_u16_x2): Likewise.
    (vld1_s16_x2): Likewise.
    (vld1_u32_x2): Likewise.
    (vld1_s32_x2): Likewise.
    (vld1_u64_x2): Likewise.
    (vld1_s64_x2): Likewise.
    (vld1_f16_x2): Likewise.
    (vld1_f32_x2): Likewise.
    (vld1_f64_x2): Likewise.
    (vld1_p8_x2): Likewise.
    (vld1_p16_x2): Likewise.
    (vld1_p64_x2): Likewise.
    (vld1q_u8_x2): Likewise.
    (vld1q_s8_x2): Likewise.
    (vld1q_u16_x2): Likewise.
    (vld1q_s16_x2): Likewise.
    (vld1q_u32_x2): Likewise.
    (vld1q_s32_x2): Likewise.
    (vld1q_u64_x2): Likewise.
    (vld1q_s64_x2): Likewise.
    (vld1q_f16_x2): Likewise.
    (vld1q_f32_x2): Likewise.
    (vld1q_f64_x2): Likewise.
    (vld1q_p8_x2): Likewise.
    (vld1q_p16_x2): Likewise.
    (vld1q_p64_x2): Likewise.

gcc/testsuite/ChangeLog:

2017-11-06  Kugan Vivekanandarajah  <kuganv@linaro.org>

    * gcc.target/aarch64/advsimd-intrinsics/vld1x2.c: New test.

[-- Attachment #2: 0001-add-missing-ld1-x2-builtins.patch --]
[-- Type: text/x-patch, Size: 18731 bytes --]

From dfdd8eba9fb49a776cdf8d82c0e34db0fb30d1b5 Mon Sep 17 00:00:00 2001
From: Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org>
Date: Sat, 30 Sep 2017 04:51:08 +1000
Subject: [PATCH] add missing ld1 x2 builtins

---
 gcc/config/aarch64/aarch64-simd-builtins.def       |   6 +-
 gcc/config/aarch64/aarch64-simd.md                 |  48 +++
 gcc/config/aarch64/arm_neon.h                      | 336 +++++++++++++++++++++
 .../gcc.target/aarch64/advsimd-intrinsics/vld1x2.c |  71 +++++
 4 files changed, 460 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1x2.c

diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index d713d5d..90736ba 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -86,6 +86,10 @@
   VAR1 (SETREGP, set_qregoi, 0, v2di)
   VAR1 (SETREGP, set_qregci, 0, v2di)
   VAR1 (SETREGP, set_qregxi, 0, v2di)
+  /* Implemented by aarch64_ld1x2<VQ:mode>. */
+  BUILTIN_VQ (LOADSTRUCT, ld1x2, 0)
+  /* Implemented by aarch64_ld1x2<VDC:mode>. */
+  BUILTIN_VDC (LOADSTRUCT, ld1x2, 0)
   /* Implemented by aarch64_ld<VSTRUCT:nregs><VDC:mode>.  */
   BUILTIN_VDC (LOADSTRUCT, ld2, 0)
   BUILTIN_VDC (LOADSTRUCT, ld3, 0)
@@ -563,4 +567,4 @@
   BUILTIN_GPI (UNOP, fix_truncdf, 2)
   BUILTIN_GPI_I16 (UNOPUS, fixuns_trunchf, 2)
   BUILTIN_GPI (UNOPUS, fixuns_truncsf, 2)
-  BUILTIN_GPI (UNOPUS, fixuns_truncdf, 2)
\ No newline at end of file
+  BUILTIN_GPI (UNOPUS, fixuns_truncdf, 2)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 70e9339..a7ed594 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -5071,6 +5071,33 @@
   DONE;
 })
 
+(define_expand "aarch64_ld1x2<VQ:mode>"
+ [(match_operand:OI 0 "register_operand" "=w")
+  (match_operand:DI 1 "register_operand" "r")
+  (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+  "TARGET_SIMD"
+{
+  machine_mode mode = OImode;
+  rtx mem = gen_rtx_MEM (mode, operands[1]);
+
+  emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
+  DONE;
+})
+
+(define_expand "aarch64_ld1x2<VDC:mode>"
+ [(match_operand:OI 0 "register_operand" "=w")
+  (match_operand:DI 1 "register_operand" "r")
+  (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+  "TARGET_SIMD"
+{
+  machine_mode mode = OImode;
+  rtx mem = gen_rtx_MEM (mode, operands[1]);
+
+  emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
+  DONE;
+})
+
+
 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
   [(match_operand:VSTRUCT 0 "register_operand" "=w")
 	(match_operand:DI 1 "register_operand" "w")
@@ -5458,6 +5485,27 @@
   [(set_attr "type" "neon_load1_all_lanes")]
 )
 
+(define_insn "aarch64_simd_ld1<mode>_x2"
+  [(set (match_operand:OI 0 "register_operand" "=w")
+	(unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
+		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+		   UNSPEC_LD1))]
+  "TARGET_SIMD"
+  "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
+  [(set_attr "type" "neon_load1_2reg<q>")]
+)
+
+(define_insn "aarch64_simd_ld1<mode>_x2"
+  [(set (match_operand:OI 0 "register_operand" "=w")
+	(unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
+		    (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+		   UNSPEC_LD1))]
+  "TARGET_SIMD"
+  "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
+  [(set_attr "type" "neon_load1_2reg<q>")]
+)
+
+
 (define_insn "aarch64_frecpe<mode>"
   [(set (match_operand:VHSDF 0 "register_operand" "=w")
 	(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index d7b30b0..0f49cfd 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -17228,6 +17228,342 @@ vld1q_u8 (const uint8_t *a)
     __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
 }
 
+__extension__ extern __inline uint8x8x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_u8_x2 (const uint8_t *__a)
+{
+  uint8x8x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v8qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
+  ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline int8x8x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_s8_x2 (const uint8_t *__a)
+{
+  int8x8x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v8qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
+  ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline uint16x4x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_u16_x2 (const uint16_t *__a)
+{
+  uint16x4x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v4hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
+  ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline int16x4x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_s16_x2 (const int16_t *__a)
+{
+  int16x4x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v4hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
+  ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline uint32x2x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_u32_x2 (const uint32_t *__a)
+{
+  uint32x2x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v2si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
+  ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline int32x2x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_s32_x2 (const uint32_t *__a)
+{
+  int32x2x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v2si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
+  ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline uint64x1x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_u64_x2 (const uint64_t *__a)
+{
+  uint64x1x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
+  ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline int64x1x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_s64_x2 (const int64_t *__a)
+{
+  int64x1x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
+  ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline float16x4x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_f16_x2 (const float16_t *__a)
+{
+  float16x4x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v4hf ((const __builtin_aarch64_simd_hf *) __a);
+  ret.val[0] = (float16x4_t) __builtin_aarch64_get_dregoiv4hf (__o, 0);
+  ret.val[1] = (float16x4_t) __builtin_aarch64_get_dregoiv4hf (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline float32x2x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_f32_x2 (const float32_t *__a)
+{
+  float32x2x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v2sf ((const __builtin_aarch64_simd_sf *) __a);
+  ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0);
+  ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline float64x1x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_f64_x2 (const float64_t *__a)
+{
+  float64x1x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2df ((const __builtin_aarch64_simd_df *) __a);
+  ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 0)};
+  ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 1)};
+  return ret;
+}
+
+__extension__ extern __inline poly8x8x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_p8_x2 (const poly8_t *__a)
+{
+  poly8x8x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v8qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
+  ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline poly16x4x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_p16_x2 (const poly16_t *__a)
+{
+  poly16x4x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v4hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
+  ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline poly64x1x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_p64_x2 (const poly64_t *__a)
+{
+  poly64x1x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
+  ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline uint8x16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_u8_x2 (const uint8_t *__a)
+{
+  uint8x16x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v16qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
+  ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline int8x16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_s8_x2 (const int8_t *__a)
+{
+  int8x16x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v16qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
+  ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline uint16x8x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_u16_x2 (const uint16_t *__a)
+{
+  uint16x8x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v8hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
+  ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline int16x8x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_s16_x2 (const int16_t *__a)
+{
+  int16x8x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v8hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
+  ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline uint32x4x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_u32_x2 (const uint32_t *__a)
+{
+  uint32x4x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v4si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
+  ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline int32x4x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_s32_x2 (const int32_t *__a)
+{
+  int32x4x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v4si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
+  ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline uint64x2x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_u64_x2 (const uint64_t *__a)
+{
+  uint64x2x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
+  ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline int64x2x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_s64_x2 (const int64_t *__a)
+{
+  int64x2x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
+  ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline float16x8x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_f16_x2 (const float16_t *__a)
+{
+  float16x8x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v8hf ((const __builtin_aarch64_simd_hf *) __a);
+  ret.val[0] = (float16x8_t) __builtin_aarch64_get_qregoiv8hf (__o, 0);
+  ret.val[1] = (float16x8_t) __builtin_aarch64_get_qregoiv8hf (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline float32x4x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_f32_x2 (const float32_t *__a)
+{
+  float32x4x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v4sf ((const __builtin_aarch64_simd_sf *) __a);
+  ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0);
+  ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline float64x2x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_f64_x2 (const float64_t *__a)
+{
+  float64x2x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v2df ((const __builtin_aarch64_simd_df *) __a);
+  ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0);
+  ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline poly8x16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_p8_x2 (const poly8_t *__a)
+{
+  poly8x16x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v16qi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
+  ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline poly16x8x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_p16_x2 (const poly16_t *__a)
+{
+  poly16x8x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v8hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
+  ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
+  return ret;
+}
+
+__extension__ extern __inline poly64x2x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_p64_x2 (const poly64_t *__a)
+{
+  poly64x2x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
+  ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
+  return ret;
+}
+
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_u16 (const uint16_t *a)
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1x2.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1x2.c
new file mode 100644
index 0000000..0a43d0d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1x2.c
@@ -0,0 +1,71 @@
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+
+#include <arm_neon.h>
+
+extern void abort (void);
+
+#define TESTMETH(BASE, ELTS, SUFFIX)	\
+int __attribute__ ((noinline))			\
+test_vld##SUFFIX##_x2 ()			\
+{						\
+  BASE##_t data[ELTS * 2];			\
+  BASE##_t temp[ELTS * 2];			\
+  BASE##x##ELTS##x##2##_t vectors;		\
+  int i,j;					\
+  for (i = 0; i < ELTS * 2; i++)		\
+    data [i] = (BASE##_t) 2*i + 1;		\
+  asm volatile ("" : : : "memory");		\
+  vectors = vld1##SUFFIX##_x2 (data);		\
+  vst1##SUFFIX (temp, vectors.val[0]);		\
+  vst1##SUFFIX (&temp[ELTS], vectors.val[1]);	\
+  asm volatile ("" : : : "memory");		\
+  for (j = 0; j < ELTS * 2; j++)		\
+    if (temp[j] != data[j])			\
+      return 1;					\
+  return 0;					\
+}
+
+#define VARIANTS(VARIANT)	\
+VARIANT (uint8, 8, _u8)		\
+VARIANT (uint16, 4, _u16)	\
+VARIANT (uint32, 2, _u32)	\
+VARIANT (uint64, 1, _u64)	\
+VARIANT (int8, 8, _s8)		\
+VARIANT (int16, 4, _s16)	\
+VARIANT (int32, 2, _s32)	\
+VARIANT (int64, 1, _s64)	\
+VARIANT (poly8, 8, _p8)		\
+VARIANT (poly16, 4, _p16)	\
+VARIANT (float16, 4, _f16)	\
+VARIANT (float32, 2, _f32)	\
+VARIANT (float64, 1, _f64)	\
+VARIANT (uint8, 16, q_u8)	\
+VARIANT (uint16, 8, q_u16)	\
+VARIANT (uint32, 4, q_u32)	\
+VARIANT (uint64, 2, q_u64)	\
+VARIANT (int8, 16, q_s8)	\
+VARIANT (int16, 8, q_s16)	\
+VARIANT (int32, 4, q_s32)	\
+VARIANT (int64, 2, q_s64)	\
+VARIANT (poly8, 16, q_p8)	\
+VARIANT (poly16, 8, q_p16)	\
+VARIANT (float16, 8, q_f16)	\
+VARIANT (float32, 4, q_f32)	\
+VARIANT (float64, 2, q_f64)
+
+/* Tests of vld1_x2 and vld1q_x2.  */
+VARIANTS (TESTMETH)
+
+#define CHECK(BASE, ELTS, SUFFIX)	\
+  if (test_vld##SUFFIX##_x2 () != 0)	\
+    abort ();
+
+int
+main (int argc, char **argv)
+{
+  VARIANTS (CHECK)
+
+  return 0;
+}
+
-- 
2.7.4


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [AARCH64] implements neon vld1_*_x2 intrinsics
  2017-11-07  4:56 [AARCH64] implements neon vld1_*_x2 intrinsics Kugan Vivekanandarajah
@ 2017-11-15  0:52 ` Kugan Vivekanandarajah
  2017-11-15 10:04 ` Kyrill Tkachov
  1 sibling, 0 replies; 5+ messages in thread
From: Kugan Vivekanandarajah @ 2017-11-15  0:52 UTC (permalink / raw)
  To: gcc-patches, James Greenhalgh, Richard Earnshaw

Ping?

Thanks,
Kugan

On 7 November 2017 at 15:10, Kugan Vivekanandarajah
<kugan.vivekanandarajah@linaro.org> wrote:
> Hi,
>
> Attached patch implements the  vld1_*_x2 intrinsics as defined by the
> neon document.
>
> Bootstrap for the latest patch is ongoing on aarch64-linux-gnu. Is
> this OK for trunk if no regressions?
>
> Thanks,
> Kugan
>
> gcc/ChangeLog:
>
> 2017-11-06  Kugan Vivekanandarajah  <kuganv@linaro.org>
>
>     * config/aarch64/aarch64-simd.md (aarch64_ld1x2<VQ:mode>): New.
>     (aarch64_ld1x2<VDC:mode>): Likewise.
>     (aarch64_simd_ld1<mode>_x2): Likewise.
>     (aarch64_simd_ld1<mode>_x2): Likewise.
>     * config/aarch64/arm_neon.h (vld1_u8_x2): New.
>     (vld1_s8_x2): Likewise.
>     (vld1_u16_x2): Likewise.
>     (vld1_s16_x2): Likewise.
>     (vld1_u32_x2): Likewise.
>     (vld1_s32_x2): Likewise.
>     (vld1_u64_x2): Likewise.
>     (vld1_s64_x2): Likewise.
>     (vld1_f16_x2): Likewise.
>     (vld1_f32_x2): Likewise.
>     (vld1_f64_x2): Likewise.
>     (vld1_p8_x2): Likewise.
>     (vld1_p16_x2): Likewise.
>     (vld1_p64_x2): Likewise.
>     (vld1q_u8_x2): Likewise.
>     (vld1q_s8_x2): Likewise.
>     (vld1q_u16_x2): Likewise.
>     (vld1q_s16_x2): Likewise.
>     (vld1q_u32_x2): Likewise.
>     (vld1q_s32_x2): Likewise.
>     (vld1q_u64_x2): Likewise.
>     (vld1q_s64_x2): Likewise.
>     (vld1q_f16_x2): Likewise.
>     (vld1q_f32_x2): Likewise.
>     (vld1q_f64_x2): Likewise.
>     (vld1q_p8_x2): Likewise.
>     (vld1q_p16_x2): Likewise.
>     (vld1q_p64_x2): Likewise.
>
> gcc/testsuite/ChangeLog:
>
> 2017-11-06  Kugan Vivekanandarajah  <kuganv@linaro.org>
>
>     * gcc.target/aarch64/advsimd-intrinsics/vld1x2.c: New test.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [AARCH64] implements neon vld1_*_x2 intrinsics
  2017-11-07  4:56 [AARCH64] implements neon vld1_*_x2 intrinsics Kugan Vivekanandarajah
  2017-11-15  0:52 ` Kugan Vivekanandarajah
@ 2017-11-15 10:04 ` Kyrill Tkachov
  2017-11-15 11:41   ` James Greenhalgh
  1 sibling, 1 reply; 5+ messages in thread
From: Kyrill Tkachov @ 2017-11-15 10:04 UTC (permalink / raw)
  To: Kugan Vivekanandarajah, gcc-patches, James Greenhalgh, Richard Earnshaw

Hi Kugan,

On 07/11/17 04:10, Kugan Vivekanandarajah wrote:
> Hi,
>
> Attached patch implements the  vld1_*_x2 intrinsics as defined by the
> neon document.
>
> Bootstrap for the latest patch is ongoing on aarch64-linux-gnu. Is
> this OK for trunk if no regressions?
>

This looks mostly ok to me (though I cannot approve) modulo a couple of 
minor type issues below.

Thanks,
Kyrill

> Thanks,
> Kugan
>
> gcc/ChangeLog:
>
> 2017-11-06  Kugan Vivekanandarajah <kuganv@linaro.org>
>
>     * config/aarch64/aarch64-simd.md (aarch64_ld1x2<VQ:mode>): New.
>     (aarch64_ld1x2<VDC:mode>): Likewise.
>     (aarch64_simd_ld1<mode>_x2): Likewise.
>     (aarch64_simd_ld1<mode>_x2): Likewise.
>     * config/aarch64/arm_neon.h (vld1_u8_x2): New.
>     (vld1_s8_x2): Likewise.
>     (vld1_u16_x2): Likewise.
>     (vld1_s16_x2): Likewise.
>     (vld1_u32_x2): Likewise.
>     (vld1_s32_x2): Likewise.
>     (vld1_u64_x2): Likewise.
>     (vld1_s64_x2): Likewise.
>     (vld1_f16_x2): Likewise.
>     (vld1_f32_x2): Likewise.
>     (vld1_f64_x2): Likewise.
>     (vld1_p8_x2): Likewise.
>     (vld1_p16_x2): Likewise.
>     (vld1_p64_x2): Likewise.
>     (vld1q_u8_x2): Likewise.
>     (vld1q_s8_x2): Likewise.
>     (vld1q_u16_x2): Likewise.
>     (vld1q_s16_x2): Likewise.
>     (vld1q_u32_x2): Likewise.
>     (vld1q_s32_x2): Likewise.
>     (vld1q_u64_x2): Likewise.
>     (vld1q_s64_x2): Likewise.
>     (vld1q_f16_x2): Likewise.
>     (vld1q_f32_x2): Likewise.
>     (vld1q_f64_x2): Likewise.
>     (vld1q_p8_x2): Likewise.
>     (vld1q_p16_x2): Likewise.
>     (vld1q_p64_x2): Likewise.
>
> gcc/testsuite/ChangeLog:
>
> 2017-11-06  Kugan Vivekanandarajah <kuganv@linaro.org>
>
>     * gcc.target/aarch64/advsimd-intrinsics/vld1x2.c: New test.

+__extension__ extern __inline int8x8x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_s8_x2 (const uint8_t *__a)

This should be "const int8_t *"

  +{
+  int8x8x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v8qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
+  ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
+  return ret;
+}

...

+__extension__ extern __inline int32x2x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_s32_x2 (const uint32_t *__a)

Likewise, this should be "const int32_t *"

+{
+  int32x2x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld1x2v2si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
+  ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
+  return ret;
+}
+


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [AARCH64] implements neon vld1_*_x2 intrinsics
  2017-11-15 10:04 ` Kyrill Tkachov
@ 2017-11-15 11:41   ` James Greenhalgh
  2018-01-03 10:20     ` Christophe Lyon
  0 siblings, 1 reply; 5+ messages in thread
From: James Greenhalgh @ 2017-11-15 11:41 UTC (permalink / raw)
  To: Kyrill Tkachov; +Cc: Kugan Vivekanandarajah, gcc-patches, Richard Earnshaw, nd

On Wed, Nov 15, 2017 at 09:58:28AM +0000, Kyrill Tkachov wrote:
> Hi Kugan,
> 
> On 07/11/17 04:10, Kugan Vivekanandarajah wrote:
> > Hi,
> >
> > Attached patch implements the  vld1_*_x2 intrinsics as defined by the
> > neon document.
> >
> > Bootstrap for the latest patch is ongoing on aarch64-linux-gnu. Is
> > this OK for trunk if no regressions?
> >
> 
> This looks mostly ok to me (though I cannot approve) modulo a couple of 
> minor type issues below.

Thanks for the review Kyrill!

I'm happy to trust Kyrill's knowledge of the back-end here, so the patch
is OK with the changes Kyrill requested.

Thanks for the patch!

James

> > gcc/ChangeLog:
> >
> > 2017-11-06  Kugan Vivekanandarajah <kuganv@linaro.org>
> >
> >     * config/aarch64/aarch64-simd.md (aarch64_ld1x2<VQ:mode>): New.
> >     (aarch64_ld1x2<VDC:mode>): Likewise.
> >     (aarch64_simd_ld1<mode>_x2): Likewise.
> >     (aarch64_simd_ld1<mode>_x2): Likewise.
> >     * config/aarch64/arm_neon.h (vld1_u8_x2): New.
> >     (vld1_s8_x2): Likewise.
> >     (vld1_u16_x2): Likewise.
> >     (vld1_s16_x2): Likewise.
> >     (vld1_u32_x2): Likewise.
> >     (vld1_s32_x2): Likewise.
> >     (vld1_u64_x2): Likewise.
> >     (vld1_s64_x2): Likewise.
> >     (vld1_f16_x2): Likewise.
> >     (vld1_f32_x2): Likewise.
> >     (vld1_f64_x2): Likewise.
> >     (vld1_p8_x2): Likewise.
> >     (vld1_p16_x2): Likewise.
> >     (vld1_p64_x2): Likewise.
> >     (vld1q_u8_x2): Likewise.
> >     (vld1q_s8_x2): Likewise.
> >     (vld1q_u16_x2): Likewise.
> >     (vld1q_s16_x2): Likewise.
> >     (vld1q_u32_x2): Likewise.
> >     (vld1q_s32_x2): Likewise.
> >     (vld1q_u64_x2): Likewise.
> >     (vld1q_s64_x2): Likewise.
> >     (vld1q_f16_x2): Likewise.
> >     (vld1q_f32_x2): Likewise.
> >     (vld1q_f64_x2): Likewise.
> >     (vld1q_p8_x2): Likewise.
> >     (vld1q_p16_x2): Likewise.
> >     (vld1q_p64_x2): Likewise.
> >
> > gcc/testsuite/ChangeLog:
> >
> > 2017-11-06  Kugan Vivekanandarajah <kuganv@linaro.org>
> >
> >     * gcc.target/aarch64/advsimd-intrinsics/vld1x2.c: New test.
> 
> +__extension__ extern __inline int8x8x2_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vld1_s8_x2 (const uint8_t *__a)
> 
> This should be "const int8_t *"
> 
>   +{
> +  int8x8x2_t ret;
> +  __builtin_aarch64_simd_oi __o;
> +  __o = __builtin_aarch64_ld1x2v8qi ((const __builtin_aarch64_simd_qi *) __a);
> +  ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
> +  ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
> +  return ret;
> +}
> 
> ...
> 
> +__extension__ extern __inline int32x2x2_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vld1_s32_x2 (const uint32_t *__a)
> 
> Likewise, this should be "const int32_t *"
> 
> +{
> +  int32x2x2_t ret;
> +  __builtin_aarch64_simd_oi __o;
> +  __o = __builtin_aarch64_ld1x2v2si ((const __builtin_aarch64_simd_si *) __a);
> +  ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
> +  ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
> +  return ret;
> +}
> +
> 
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [AARCH64] implements neon vld1_*_x2 intrinsics
  2017-11-15 11:41   ` James Greenhalgh
@ 2018-01-03 10:20     ` Christophe Lyon
  0 siblings, 0 replies; 5+ messages in thread
From: Christophe Lyon @ 2018-01-03 10:20 UTC (permalink / raw)
  To: James Greenhalgh
  Cc: Kyrill Tkachov, Kugan Vivekanandarajah, gcc-patches,
	Richard Earnshaw, nd

Hi Kugan,


On 15 November 2017 at 12:23, James Greenhalgh <james.greenhalgh@arm.com> wrote:
> On Wed, Nov 15, 2017 at 09:58:28AM +0000, Kyrill Tkachov wrote:
>> Hi Kugan,
>>
>> On 07/11/17 04:10, Kugan Vivekanandarajah wrote:
>> > Hi,
>> >
>> > Attached patch implements the  vld1_*_x2 intrinsics as defined by the
>> > neon document.
>> >
>> > Bootstrap for the latest patch is ongoing on aarch64-linux-gnu. Is
>> > this OK for trunk if no regressions?
>> >
>>
>> This looks mostly ok to me (though I cannot approve) modulo a couple of
>> minor type issues below.
>
> Thanks for the review Kyrill!
>
> I'm happy to trust Kyrill's knowledge of the back-end here, so the patch
> is OK with the changes Kyrill requested.
>
> Thanks for the patch!
>
> James
>
>> > gcc/ChangeLog:
>> >
>> > 2017-11-06  Kugan Vivekanandarajah <kuganv@linaro.org>
>> >
>> >     * config/aarch64/aarch64-simd.md (aarch64_ld1x2<VQ:mode>): New.
>> >     (aarch64_ld1x2<VDC:mode>): Likewise.
>> >     (aarch64_simd_ld1<mode>_x2): Likewise.
>> >     (aarch64_simd_ld1<mode>_x2): Likewise.
>> >     * config/aarch64/arm_neon.h (vld1_u8_x2): New.
>> >     (vld1_s8_x2): Likewise.
>> >     (vld1_u16_x2): Likewise.
>> >     (vld1_s16_x2): Likewise.
>> >     (vld1_u32_x2): Likewise.
>> >     (vld1_s32_x2): Likewise.
>> >     (vld1_u64_x2): Likewise.
>> >     (vld1_s64_x2): Likewise.
>> >     (vld1_f16_x2): Likewise.
>> >     (vld1_f32_x2): Likewise.
>> >     (vld1_f64_x2): Likewise.
>> >     (vld1_p8_x2): Likewise.
>> >     (vld1_p16_x2): Likewise.
>> >     (vld1_p64_x2): Likewise.
>> >     (vld1q_u8_x2): Likewise.
>> >     (vld1q_s8_x2): Likewise.
>> >     (vld1q_u16_x2): Likewise.
>> >     (vld1q_s16_x2): Likewise.
>> >     (vld1q_u32_x2): Likewise.
>> >     (vld1q_s32_x2): Likewise.
>> >     (vld1q_u64_x2): Likewise.
>> >     (vld1q_s64_x2): Likewise.
>> >     (vld1q_f16_x2): Likewise.
>> >     (vld1q_f32_x2): Likewise.
>> >     (vld1q_f64_x2): Likewise.
>> >     (vld1q_p8_x2): Likewise.
>> >     (vld1q_p16_x2): Likewise.
>> >     (vld1q_p64_x2): Likewise.
>> >
>> > gcc/testsuite/ChangeLog:
>> >
>> > 2017-11-06  Kugan Vivekanandarajah <kuganv@linaro.org>
>> >
>> >     * gcc.target/aarch64/advsimd-intrinsics/vld1x2.c: New test.
>>

Sorry for not seeing this before you committed this patch, but the new
test fails to compile on arm targets.
Can you add the proper guard, as there is in other tests in the same dir?

Other question: why do you force -O3? The harness iterates on O0, O1, ....

Thanks,

Christophe


>> +__extension__ extern __inline int8x8x2_t
>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>> +vld1_s8_x2 (const uint8_t *__a)
>>
>> This should be "const int8_t *"
>>
>>   +{
>> +  int8x8x2_t ret;
>> +  __builtin_aarch64_simd_oi __o;
>> +  __o = __builtin_aarch64_ld1x2v8qi ((const __builtin_aarch64_simd_qi *) __a);
>> +  ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
>> +  ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
>> +  return ret;
>> +}
>>
>> ...
>>
>> +__extension__ extern __inline int32x2x2_t
>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>> +vld1_s32_x2 (const uint32_t *__a)
>>
>> Likewise, this should be "const int32_t *"
>>
>> +{
>> +  int32x2x2_t ret;
>> +  __builtin_aarch64_simd_oi __o;
>> +  __o = __builtin_aarch64_ld1x2v2si ((const __builtin_aarch64_simd_si *) __a);
>> +  ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
>> +  ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
>> +  return ret;
>> +}
>> +
>>
>>

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2018-01-03 10:20 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-11-07  4:56 [AARCH64] implements neon vld1_*_x2 intrinsics Kugan Vivekanandarajah
2017-11-15  0:52 ` Kugan Vivekanandarajah
2017-11-15 10:04 ` Kyrill Tkachov
2017-11-15 11:41   ` James Greenhalgh
2018-01-03 10:20     ` Christophe Lyon

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).