[PATCH][GCC 9][AArch64] Implement ACLE intrinsics for FRINT[32,64][Z,X]

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH][GCC 9][AArch64] Implement ACLE intrinsics for FRINT[32,64][Z,X]
@ 2020-10-08 17:27 Kyrylo Tkachov
  0 siblings, 0 replies; only message in thread
From: Kyrylo Tkachov @ 2020-10-08 17:27 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 1568 bytes --]

Hi all,

I'd like to backport this patch to the GCC 9 branch to implement these Armv8.5-a intrinsics that should have been there.
The backport is fairly simple.

Bootstrapped and tested on aarch64-none-linux-gnu.
Pushing to GCC 9 branch.

This patch implements the ACLE intrinsics to access the FRINT[32,64][Z,X] scalar[1] and vector[2][3] instructions
from Armv8.5-a. These are enabled when the __ARM_FEATURE_FRINT macro is defined.

They're added in a fairly standard way through builtins and unspecs at the RTL level.

	* config/aarch64/aarch64.md ("unspec"): Add UNSPEC_FRINT32Z,
	UNSPEC_FRINT32X, UNSPEC_FRINT64Z, UNSPEC_FRINT64X.
	(aarch64_<frintnzs_op><mode>): New define_insn.
	* config/aarch64/aarch64.h (TARGET_FRINT): Define.
	* config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): Define
	__ARM_FEATURE_FRINT when appropriate.
	* config/aarch64/aarch64-simd-builtins.def: Add builtins for frint32z,
	frint32x, frint64z, frint64x.
	* config/aarch64/arm_acle.h (__rint32zf, __rint32z, __rint64zf,
	__rint64z, __rint32xf, __rint32x, __rint64xf, __rint64x): Define.
	* config/aarch64/arm_neon.h (vrnd32z_f32, vrnd32zq_f32, vrnd32z_f64,
	vrnd32zq_f64, vrnd32x_f32, vrnd32xq_f32, vrnd32x_f64, vrnd32xq_f64,
	vrnd64z_f32, vrnd64zq_f32, vrnd64z_f64, vrnd64zq_f64, vrnd64x_f32,
	vrnd64xq_f32, vrnd64x_f64, vrnd64xq_f64): Define.
	* config/aarch64/iterators.md (VSFDF): Define.
	(FRINTNZX): Likewise.
	(frintnzs_op): Likewise.

	* gcc.target/aarch64/acle/rintnzx_1.c: New test.
	* gcc.target/aarch64/simd/vrndnzx_1.c: Likewise.

[-- Attachment #2: frint-9.patch --]
[-- Type: application/octet-stream, Size: 14580 bytes --]

diff --git a/gcc/config/aarch64/aarch64-c.c b/gcc/config/aarch64/aarch64-c.c
index 1812387dee67abf456584b8ec124201a859a6565..73527ac960f32fb44f55ce94d042448a9a763dc4 100644
--- a/gcc/config/aarch64/aarch64-c.c
+++ b/gcc/config/aarch64/aarch64-c.c
@@ -170,6 +170,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
     }
 
   aarch64_def_or_undef (TARGET_RNG, "__ARM_FEATURE_RNG", pfile);
+  aarch64_def_or_undef (TARGET_FRINT, "__ARM_FEATURE_FRINT", pfile);
 
   /* Not for ACLE, but required to keep "float.h" correct if we switch
      target between implementations that do or do not support ARMv8.2-A
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index 20af5ebb5c7d81f625d8e74696c279411678998e..542de32c4385ff6ccc126b012390c906b4553e66 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -675,3 +675,9 @@
   /* Implemented by aarch64_fml<f16mac1>lq_laneq_highv4sf.  */
   VAR1 (QUADOP_LANE, fmlalq_laneq_high, 0, v4sf)
   VAR1 (QUADOP_LANE, fmlslq_laneq_high, 0, v4sf)
+
+  /* Implemented by aarch64_<frintnzs_op><mode>.  */
+  BUILTIN_VSFDF (UNOP, frint32z, 0)
+  BUILTIN_VSFDF (UNOP, frint32x, 0)
+  BUILTIN_VSFDF (UNOP, frint64z, 0)
+  BUILTIN_VSFDF (UNOP, frint64x, 0)
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index ace80ac4cefaf0ebc439a68d4ce16ac13ede291e..036f3cc176f19c772b4a91c349185efd10ece83e 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -284,6 +284,9 @@ extern unsigned aarch64_architecture_version;
 /* Random number instructions from Armv8.5-a.  */
 #define TARGET_RNG (AARCH64_ISA_RNG)
 
+/* Floating-point rounding instructions from Armv8.5-a.  */
+#define TARGET_FRINT (AARCH64_ISA_V8_5 && TARGET_FLOAT)
+
 /* Make sure this is always defined so we don't have to check for ifdefs
    but rather use normal ifs.  */
 #ifndef TARGET_FIX_ERR_A53_835769_DEFAULT
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 8d71994611df0a8393111104c501490e13a46fea..78288abfd9279cfcb12fdc349a0c87a8b9e6a235 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -139,6 +139,10 @@
     UNSPEC_FCVTZS
     UNSPEC_FCVTZU
     UNSPEC_FJCVTZS
+    UNSPEC_FRINT32Z
+    UNSPEC_FRINT32X
+    UNSPEC_FRINT64Z
+    UNSPEC_FRINT64X
     UNSPEC_URECPE
     UNSPEC_FRECPE
     UNSPEC_FRECPS
@@ -7272,6 +7276,16 @@
    (set_attr "speculation_barrier" "true")]
 )
 
+(define_insn "aarch64_<frintnzs_op><mode>"
+  [(set (match_operand:VSFDF 0 "register_operand" "=w")
+	(unspec:VSFDF [(match_operand:VSFDF 1 "register_operand" "w")]
+		      FRINTNZX))]
+  "TARGET_FRINT && TARGET_FLOAT
+   && !(VECTOR_MODE_P (<MODE>mode) && !TARGET_SIMD)"
+  "<frintnzs_op>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
+  [(set_attr "type" "f_rint<stype>")]
+)
+
 (define_insn "aarch64_rndr"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(unspec_volatile:DI [(const_int 0)] UNSPEC_RNDR))
diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h
index 423136ecefd5d3d72bc3c1f567b14b38fd843186..56147352c234f4ace851e1b6c7356d348a242300 100644
--- a/gcc/config/aarch64/arm_acle.h
+++ b/gcc/config/aarch64/arm_acle.h
@@ -41,6 +41,57 @@ __jcvt (double __a)
   return __builtin_aarch64_jcvtzs (__a);
 }
 
+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.5-a")
+__extension__ static __inline float __attribute__ ((__always_inline__))
+__rint32zf (float __a)
+{
+  return __builtin_aarch64_frint32zsf (__a);
+}
+
+__extension__ static __inline double __attribute__ ((__always_inline__))
+__rint32z (double __a)
+{
+  return __builtin_aarch64_frint32zdf (__a);
+}
+
+__extension__ static __inline float __attribute__ ((__always_inline__))
+__rint64zf (float __a)
+{
+  return __builtin_aarch64_frint64zsf (__a);
+}
+
+__extension__ static __inline double __attribute__ ((__always_inline__))
+__rint64z (double __a)
+{
+  return __builtin_aarch64_frint64zdf (__a);
+}
+
+__extension__ static __inline float __attribute__ ((__always_inline__))
+__rint32xf (float __a)
+{
+  return __builtin_aarch64_frint32xsf (__a);
+}
+
+__extension__ static __inline double __attribute__ ((__always_inline__))
+__rint32x (double __a)
+{
+  return __builtin_aarch64_frint32xdf (__a);
+}
+
+__extension__ static __inline float __attribute__ ((__always_inline__))
+__rint64xf (float __a)
+{
+  return __builtin_aarch64_frint64xsf (__a);
+}
+
+__extension__ static __inline double __attribute__ ((__always_inline__))
+__rint64x (double __a)
+{
+  return __builtin_aarch64_frint64xdf (__a);
+}
+#pragma GCC pop_options
+
 #pragma GCC pop_options
 
 #pragma GCC push_options
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 1fd8faae80fbebb8fb0d220679d47b6b4c38a39a..15ed96564c6f1fafd6f0e0984e2e0990933d06b9 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -34683,6 +34683,124 @@ vaddq_p128 (poly128_t __a, poly128_t __b)
   return __a ^ __b;
 }
 
+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.5-a")
+
+__extension__ extern __inline float32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vrnd32z_f32 (float32x2_t __a)
+{
+  return __builtin_aarch64_frint32zv2sf (__a);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vrnd32zq_f32 (float32x4_t __a)
+{
+  return __builtin_aarch64_frint32zv4sf (__a);
+}
+
+__extension__ extern __inline float64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vrnd32z_f64 (float64x1_t __a)
+{
+  return (float64x1_t)
+	   {__builtin_aarch64_frint32zdf (vget_lane_f64 (__a, 0))};
+}
+
+__extension__ extern __inline float64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vrnd32zq_f64 (float64x2_t __a)
+{
+  return __builtin_aarch64_frint32zv2df (__a);
+}
+
+__extension__ extern __inline float32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vrnd32x_f32 (float32x2_t __a)
+{
+  return __builtin_aarch64_frint32xv2sf (__a);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vrnd32xq_f32 (float32x4_t __a)
+{
+  return __builtin_aarch64_frint32xv4sf (__a);
+}
+
+__extension__ extern __inline float64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vrnd32x_f64 (float64x1_t __a)
+{
+  return (float64x1_t) {__builtin_aarch64_frint32xdf (vget_lane_f64 (__a, 0))};
+}
+
+__extension__ extern __inline float64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vrnd32xq_f64 (float64x2_t __a)
+{
+  return __builtin_aarch64_frint32xv2df (__a);
+}
+
+__extension__ extern __inline float32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vrnd64z_f32 (float32x2_t __a)
+{
+  return __builtin_aarch64_frint64zv2sf (__a);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vrnd64zq_f32 (float32x4_t __a)
+{
+  return __builtin_aarch64_frint64zv4sf (__a);
+}
+
+__extension__ extern __inline float64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vrnd64z_f64 (float64x1_t __a)
+{
+  return (float64x1_t) {__builtin_aarch64_frint64zdf (vget_lane_f64 (__a, 0))};
+}
+
+__extension__ extern __inline float64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vrnd64zq_f64 (float64x2_t __a)
+{
+  return __builtin_aarch64_frint64zv2df (__a);
+}
+
+__extension__ extern __inline float32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vrnd64x_f32 (float32x2_t __a)
+{
+  return __builtin_aarch64_frint64xv2sf (__a);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vrnd64xq_f32 (float32x4_t __a)
+{
+  return __builtin_aarch64_frint64xv4sf (__a);
+}
+
+__extension__ extern __inline float64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vrnd64x_f64 (float64x1_t __a)
+{
+  return (float64x1_t) {__builtin_aarch64_frint64xdf (vget_lane_f64 (__a, 0))};
+}
+
+__extension__ extern __inline float64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vrnd64xq_f64 (float64x2_t __a)
+{
+  return __builtin_aarch64_frint64xv2df (__a);
+}
+
+#pragma GCC pop_options
+
 #undef __aarch64_vget_lane_any
 
 #undef __aarch64_vdup_lane_any
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 3bc49ea023853ca027fb3566619a03dbfa445d76..527e64c561a2e082235d06acee435c4110234506 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -131,6 +131,9 @@
 				  (HF "TARGET_SIMD_F16INST")
 				  SF DF])
 
+;; Scalar and vetor modes for SF, DF.
+(define_mode_iterator VSFDF [V2SF V4SF V2DF DF SF])
+
 ;; Advanced SIMD single Float modes.
 (define_mode_iterator VDQSF [V2SF V4SF])
 
@@ -1617,6 +1620,9 @@
 			    UNSPEC_FCMLA180
 			    UNSPEC_FCMLA270])
 
+(define_int_iterator FRINTNZX [UNSPEC_FRINT32Z UNSPEC_FRINT32X
+			       UNSPEC_FRINT64Z UNSPEC_FRINT64X])
+
 ;; Iterators for atomic operations.
 
 (define_int_iterator ATOMIC_LDOP
@@ -1856,6 +1862,9 @@
 (define_int_attr f16mac1 [(UNSPEC_FMLAL "a") (UNSPEC_FMLSL "s")
 			  (UNSPEC_FMLAL2 "a") (UNSPEC_FMLSL2 "s")])
 
+(define_int_attr frintnzs_op [(UNSPEC_FRINT32Z "frint32z") (UNSPEC_FRINT32X "frint32x")
+			      (UNSPEC_FRINT64Z "frint64z") (UNSPEC_FRINT64X "frint64x")])
+
 ;; The condition associated with an UNSPEC_COND_<xx>.
 (define_int_attr cmp_op [(UNSPEC_COND_LT "lt")
 			 (UNSPEC_COND_LE "le")
diff --git a/gcc/testsuite/gcc.target/aarch64/acle/rintnzx_1.c b/gcc/testsuite/gcc.target/aarch64/acle/rintnzx_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..12572084856d8d77c9df4971605d237e1831c8e7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/acle/rintnzx_1.c
@@ -0,0 +1,73 @@
+/* Test the __rint[32,64][z,x] intrinsics.  */
+
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv8.5-a" } */
+
+#include <arm_acle.h>
+
+#ifdef __ARM_FEATURE_FRINT
+float
+foo_32z_f32_scal (float a)
+{
+  return __rint32zf (a);
+}
+
+/* { dg-final { scan-assembler-times "frint32z\ts\[0-9\]+, s\[0-9\]+\n" 1 } } */
+
+double
+foo_32z_f64_scal (double a)
+{
+  return __rint32z (a);
+}
+
+/* { dg-final { scan-assembler-times "frint32z\td\[0-9\]+, d\[0-9\]+\n" 1 } } */
+
+float
+foo_32x_f32_scal (float a)
+{
+  return __rint32xf (a);
+}
+
+/* { dg-final { scan-assembler-times "frint32x\ts\[0-9\]+, s\[0-9\]+\n" 1 } } */
+
+double
+foo_32x_f64_scal (double a)
+{
+  return __rint32x (a);
+}
+
+/* { dg-final { scan-assembler-times "frint32x\td\[0-9\]+, d\[0-9\]+\n" 1 } } */
+
+float
+foo_64z_f32_scal (float a)
+{
+  return __rint64zf (a);
+}
+
+/* { dg-final { scan-assembler-times "frint64z\ts\[0-9\]+, s\[0-9\]+\n" 1 } } */
+
+double
+foo_64z_f64_scal (double a)
+{
+  return __rint64z (a);
+}
+
+/* { dg-final { scan-assembler-times "frint64z\td\[0-9\]+, d\[0-9\]+\n" 1 } } */
+
+float
+foo_64x_f32_scal (float a)
+{
+  return __rint64xf (a);
+}
+
+/* { dg-final { scan-assembler-times "frint64x\ts\[0-9\]+, s\[0-9\]+\n" 1 } } */
+
+double
+foo_64x_f64_scal (double a)
+{
+  return __rint64x (a);
+}
+
+/* { dg-final { scan-assembler-times "frint64x\td\[0-9\]+, d\[0-9\]+\n" 1 } } */
+
+#endif
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vrndnzx_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vrndnzx_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..0399b838df92d7f6d78dcd9421461bfab8bcf5b2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/vrndnzx_1.c
@@ -0,0 +1,137 @@
+/* Test the vrnd[32,64][z,x] intrinsics.  */
+
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv8.5-a" } */
+
+#include "arm_neon.h"
+
+#ifdef __ARM_FEATURE_FRINT
+
+float32x2_t
+foo_32z (float32x2_t a)
+{
+  return vrnd32z_f32 (a);
+}
+
+/* { dg-final { scan-assembler-times "frint32z\tv\[0-9\]+\.2s, v\[0-9\]+\.2s\n" 1 } } */
+
+float32x4_t
+foo_32z_q (float32x4_t a)
+{
+  return vrnd32zq_f32 (a);
+}
+
+/* { dg-final { scan-assembler-times "frint32z\tv\[0-9\]+\.4s, v\[0-9\]+\.4s\n" 1 } } */
+
+float64x1_t
+foo_32z_f64 (float64x1_t a)
+{
+  return vrnd32z_f64 (a);
+}
+
+/* { dg-final { scan-assembler-times "frint32z\td\[0-9\]+, d\[0-9\]+\n" 1 } } */
+
+float64x2_t
+foo_32z_q_f64 (float64x2_t a)
+{
+  return vrnd32zq_f64 (a);
+}
+
+/* { dg-final { scan-assembler-times "frint32z\tv\[0-9\]+\.2d, v\[0-9\]+\.2d\n" 1 } } */
+
+float32x2_t
+foo_32x (float32x2_t a)
+{
+  return vrnd32x_f32 (a);
+}
+
+/* { dg-final { scan-assembler-times "frint32x\tv\[0-9\]+\.2s, v\[0-9\]+\.2s\n" 1 } } */
+
+float32x4_t
+foo_32x_q (float32x4_t a)
+{
+  return vrnd32xq_f32 (a);
+}
+
+/* { dg-final { scan-assembler-times "frint32x\tv\[0-9\]+\.4s, v\[0-9\]+\.4s\n" 1 } } */
+
+float64x1_t
+foo_32x_f64 (float64x1_t a)
+{
+  return vrnd32x_f64 (a);
+}
+
+/* { dg-final { scan-assembler-times "frint32x\td\[0-9\]+, d\[0-9\]+\n" 1 } } */
+
+float64x2_t
+foo_32x_q_f64 (float64x2_t a)
+{
+  return vrnd32xq_f64 (a);
+}
+
+/* { dg-final { scan-assembler-times "frint32x\tv\[0-9\]+\.2d, v\[0-9\]+\.2d\n" 1 } } */
+
+float32x2_t
+foo_64z (float32x2_t a)
+{
+  return vrnd64z_f32 (a);
+}
+
+/* { dg-final { scan-assembler-times "frint64z\tv\[0-9\]+\.2s, v\[0-9\]+\.2s\n" 1 } } */
+
+float32x4_t
+foo_64z_q (float32x4_t a)
+{
+  return vrnd64zq_f32 (a);
+}
+
+/* { dg-final { scan-assembler-times "frint64z\tv\[0-9\]+\.4s, v\[0-9\]+\.4s\n" 1 } } */
+
+float64x1_t
+foo_64z_f64 (float64x1_t a)
+{
+  return vrnd64z_f64 (a);
+}
+
+/* { dg-final { scan-assembler-times "frint64z\td\[0-9\]+, d\[0-9\]+\n" 1 } } */
+
+float64x2_t
+foo_64z_q_f64 (float64x2_t a)
+{
+  return vrnd64zq_f64 (a);
+}
+
+/* { dg-final { scan-assembler-times "frint64z\tv\[0-9\]+\.2d, v\[0-9\]+\.2d\n" 1 } } */
+
+float32x2_t
+foo_64x (float32x2_t a)
+{
+  return vrnd64x_f32 (a);
+}
+
+/* { dg-final { scan-assembler-times "frint64x\tv\[0-9\]+\.2s, v\[0-9\]+\.2s\n" 1 } } */
+
+float32x4_t
+foo_64x_q (float32x4_t a)
+{
+  return vrnd64xq_f32 (a);
+}
+
+/* { dg-final { scan-assembler-times "frint64x\tv\[0-9\]+\.4s, v\[0-9\]+\.4s\n" 1 } } */
+
+float64x1_t
+foo_64x_f64 (float64x1_t a)
+{
+  return vrnd64x_f64 (a);
+}
+
+/* { dg-final { scan-assembler-times "frint64x\td\[0-9\]+, d\[0-9\]+\n" 1 } } */
+
+float64x2_t
+foo_64x_q_f64 (float64x2_t a)
+{
+  return vrnd64xq_f64 (a);
+}
+
+/* { dg-final { scan-assembler-times "frint64x\tv\[0-9\]+\.2d, v\[0-9\]+\.2d\n" 1 } } */
+#endif

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2020-10-08 17:28 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-08 17:27 [PATCH][GCC 9][AArch64] Implement ACLE intrinsics for FRINT[32,64][Z,X] Kyrylo Tkachov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).