public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] aarch64: Reimplement vget_high* intrinsics
@ 2021-02-05  9:17 Kyrylo Tkachov
  0 siblings, 0 replies; only message in thread
From: Kyrylo Tkachov @ 2021-02-05  9:17 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 882 bytes --]

Hi all,

Similar to the vget_low* intrinsics we should just use a proper vec_select rather than
going through V2DI subregs.

Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf.

Pushing to trunk.
Thanks,
Kyrill

gcc/ChangeLog:

	* config/aarch64/aarch64-simd-builtins.def (get_high): Define builtin.
	* config/aarch64/aarch64-simd.md (aarch64_get_high<mode>): Define.
	* config/aarch64/arm_neon.h (__GET_HIGH): Delete.
	(vget_high_f16): Reimplement using new builtin.
	(vget_high_f32): Likewise.
	(vget_high_f64): Likewise.
	(vget_high_p8): Likewise.
	(vget_high_p16): Likewise.
	(vget_high_p64): Likewise.
	(vget_high_s8): Likewise.
	(vget_high_s16): Likewise.
	(vget_high_s32): Likewise.
	(vget_high_s64): Likewise.
	(vget_high_u8): Likewise.
	(vget_high_u16): Likewise.
	(vget_high_u32): Likewise.
	(vget_high_u64): Likewise.

[-- Attachment #2: get-high.patch --]
[-- Type: application/octet-stream, Size: 5482 bytes --]

diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index 66420cf4f4b84b210c2ba7a9919d49d012cfc59f..b885bd5b38bf7ad83eb9d801284bf9b34db17210 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -57,6 +57,8 @@
 
   /* Implemented by aarch64_get_low<mode>.  */
   BUILTIN_VQMOV (UNOP, get_low, 0, AUTO_FP)
+  /* Implemented by aarch64_get_high<mode>.  */
+  BUILTIN_VQMOV (UNOP, get_high, 0, AUTO_FP)
 
   /* Implemented by aarch64_<sur>q<r>shl<mode>.  */
   BUILTIN_VSDQ_I (BINOP, sqshl, 0, NONE)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 033c6ef4b6c0faceecb359d33196bb5ad653642f..7a5e7b9d8cae8a2bbbf2b9d2f97a8c6e15f47716 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -308,6 +308,17 @@ (define_expand "aarch64_get_low<mode>"
   }
 )
 
+(define_expand "aarch64_get_high<mode>"
+  [(match_operand:<VHALF> 0 "register_operand")
+   (match_operand:VQMOV 1 "register_operand")]
+  "TARGET_SIMD"
+  {
+    rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
+    emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], hi));
+    DONE;
+  }
+)
+
 (define_insn_and_split "aarch64_simd_mov_from_<mode>low"
   [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r")
         (vec_select:<VHALF>
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 67c7f2493893c22e571b6e9107f01fda72168399..baa30bd5a9d96c1bf04a37fb105091ea56a6444a 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -6400,111 +6400,105 @@ vget_low_u64 (uint64x2_t __a)
   return (uint64x1_t) {__builtin_aarch64_get_lowv2di ((int64x2_t) __a)};
 }
 
-#define __GET_HIGH(__TYPE)					\
-  uint64x2_t tmp = vreinterpretq_u64_##__TYPE (__a);		\
-  uint64x1_t hi = vcreate_u64 (vgetq_lane_u64 (tmp, 1));	\
-  return vreinterpret_##__TYPE##_u64 (hi);
-
 __extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vget_high_f16 (float16x8_t __a)
 {
-  __GET_HIGH (f16);
+  return __builtin_aarch64_get_highv8hf (__a);
 }
 
 __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vget_high_f32 (float32x4_t __a)
 {
-  __GET_HIGH (f32);
+  return __builtin_aarch64_get_highv4sf (__a);
 }
 
 __extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vget_high_f64 (float64x2_t __a)
 {
-  __GET_HIGH (f64);
+  return (float64x1_t) {__builtin_aarch64_get_highv2df (__a)};
 }
 
 __extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vget_high_p8 (poly8x16_t __a)
 {
-  __GET_HIGH (p8);
+  return (poly8x8_t) __builtin_aarch64_get_highv16qi ((int8x16_t) __a);
 }
 
 __extension__ extern __inline poly16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vget_high_p16 (poly16x8_t __a)
 {
-  __GET_HIGH (p16);
+  return (poly16x4_t) __builtin_aarch64_get_highv8hi ((int16x8_t) __a);
 }
 
 __extension__ extern __inline poly64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vget_high_p64 (poly64x2_t __a)
 {
-  __GET_HIGH (p64);
+  return (poly64x1_t) __builtin_aarch64_get_highv2di ((int64x2_t) __a);
 }
 
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vget_high_s8 (int8x16_t __a)
 {
-  __GET_HIGH (s8);
+  return  __builtin_aarch64_get_highv16qi (__a);
 }
 
 __extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vget_high_s16 (int16x8_t __a)
 {
-  __GET_HIGH (s16);
+  return  __builtin_aarch64_get_highv8hi (__a);
 }
 
 __extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vget_high_s32 (int32x4_t __a)
 {
-  __GET_HIGH (s32);
+  return  __builtin_aarch64_get_highv4si (__a);
 }
 
 __extension__ extern __inline int64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vget_high_s64 (int64x2_t __a)
 {
-  __GET_HIGH (s64);
+  return  (int64x1_t) {__builtin_aarch64_get_highv2di (__a)};
 }
 
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vget_high_u8 (uint8x16_t __a)
 {
-  __GET_HIGH (u8);
+  return (uint8x8_t) __builtin_aarch64_get_highv16qi ((int8x16_t) __a);
 }
 
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vget_high_u16 (uint16x8_t __a)
 {
-  __GET_HIGH (u16);
+  return (uint16x4_t) __builtin_aarch64_get_highv8hi ((int16x8_t) __a);
 }
 
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vget_high_u32 (uint32x4_t __a)
 {
-  __GET_HIGH (u32);
+  return (uint32x2_t) __builtin_aarch64_get_highv4si ((int32x4_t) __a);
 }
 
-#undef __GET_HIGH
-
 __extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vget_high_u64 (uint64x2_t __a)
 {
-  return vcreate_u64 (vgetq_lane_u64 (__a, 1));
+  return (uint64x1_t) {__builtin_aarch64_get_highv2di ((int64x2_t) __a)};
 }
 
+
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcombine_s8 (int8x8_t __a, int8x8_t __b)

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-02-05  9:17 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-02-05  9:17 [PATCH] aarch64: Reimplement vget_high* intrinsics Kyrylo Tkachov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).