* [Patch AArch64] Add some more missing intrinsics
@ 2016-06-13 16:32 James Greenhalgh
2016-06-14 14:23 ` Kyrill Tkachov
` (2 more replies)
0 siblings, 3 replies; 4+ messages in thread
From: James Greenhalgh @ 2016-06-13 16:32 UTC (permalink / raw)
To: gcc-patches; +Cc: nd, marcus.shawcroft, richard.earnshaw
[-- Attachment #1: Type: text/plain, Size: 1050 bytes --]
Hi,
Inspired by Jiong's recent work, here are some more missing intrinsics,
and a smoke test for each of them.
This patch covers:
vcvt_n_f64_s64
vcvt_n_f64_u64
vcvt_n_s64_f64
vcvt_n_u64_f64
vcvt_f64_s64
vrecpe_f64
vcvt_f64_u64
vrecps_f64
Tested on aarch64-none-elf, and on an internal testsuite for Neon
intrinsics.
Note that the new tests will ICE without the fixups in
https://gcc.gnu.org/ml/gcc-patches/2016-06/msg00805.html
OK?
Thanks,
James
---
gcc/ChangeLog
2016-06-10 James Greenhalgh <james.greenhalgh@arm.com>
* config/aarch64/arm_neon.h (vcvt_n_f64_s64): New.
(vcvt_n_f64_u64): Likewise.
(vcvt_n_s64_f64): Likewise.
(vcvt_n_u64_f64): Likewise.
(vcvt_f64_s64): Likewise.
(vrecpe_f64): Likewise.
(vcvt_f64_u64): Likewise.
(vrecps_f64): Likewise.
gcc/testsuite/ChangeLog
2016-06-10 James Greenhalgh <james.greenhalgh@arm.com>
* gcc.target/aarch64/vcvt_f64_1.c: New.
* gcc.target/aarch64/vcvt_n_f64_1.c: New.
* gcc.target/aarch64/vrecp_f64_1.c: New.
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-Patch-AArch64-Add-some-more-missing-intrinsics.patch --]
[-- Type: text/x-patch; name=0001-Patch-AArch64-Add-some-more-missing-intrinsics.patch, Size: 8209 bytes --]
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index f70b6d3..2f90938 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -12447,6 +12447,20 @@ vcvt_n_f32_u32 (uint32x2_t __a, const int __b)
return __builtin_aarch64_ucvtfv2si_sus (__a, __b);
}
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vcvt_n_f64_s64 (int64x1_t __a, const int __b)
+{
+ return (float64x1_t)
+ { __builtin_aarch64_scvtfdi (vget_lane_s64 (__a, 0), __b) };
+}
+
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vcvt_n_f64_u64 (uint64x1_t __a, const int __b)
+{
+ return (float64x1_t)
+ { __builtin_aarch64_ucvtfdi_sus (vget_lane_u64 (__a, 0), __b) };
+}
+
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vcvtq_n_f32_s32 (int32x4_t __a, const int __b)
{
@@ -12509,6 +12523,20 @@ vcvt_n_u32_f32 (float32x2_t __a, const int __b)
return __builtin_aarch64_fcvtzuv2sf_uss (__a, __b);
}
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vcvt_n_s64_f64 (float64x1_t __a, const int __b)
+{
+ return (int64x1_t)
+ { __builtin_aarch64_fcvtzsdf (vget_lane_f64 (__a, 0), __b) };
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcvt_n_u64_f64 (float64x1_t __a, const int __b)
+{
+ return (uint64x1_t)
+ { __builtin_aarch64_fcvtzudf_uss (vget_lane_f64 (__a, 0), __b) };
+}
+
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vcvtq_n_s32_f32 (float32x4_t __a, const int __b)
{
@@ -12571,6 +12599,18 @@ vcvt_f32_u32 (uint32x2_t __a)
return __builtin_aarch64_floatunsv2siv2sf ((int32x2_t) __a);
}
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vcvt_f64_s64 (int64x1_t __a)
+{
+ return (float64x1_t) { vget_lane_s64 (__a, 0) };
+}
+
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vcvt_f64_u64 (uint64x1_t __a)
+{
+ return (float64x1_t) { vget_lane_u64 (__a, 0) };
+}
+
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vcvtq_f32_s32 (int32x4_t __a)
{
@@ -20659,6 +20699,12 @@ vrecpe_f32 (float32x2_t __a)
return __builtin_aarch64_frecpev2sf (__a);
}
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vrecpe_f64 (float64x1_t __a)
+{
+ return (float64x1_t) { vrecped_f64 (vget_lane_f64 (__a, 0)) };
+}
+
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vrecpeq_f32 (float32x4_t __a)
{
@@ -20691,6 +20737,13 @@ vrecps_f32 (float32x2_t __a, float32x2_t __b)
return __builtin_aarch64_frecpsv2sf (__a, __b);
}
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vrecps_f64 (float64x1_t __a, float64x1_t __b)
+{
+ return (float64x1_t) { vrecpsd_f64 (vget_lane_f64 (__a, 0),
+ vget_lane_f64 (__b, 0)) };
+}
+
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vrecpsq_f32 (float32x4_t __a, float32x4_t __b)
{
diff --git a/gcc/testsuite/gcc.target/aarch64/vcvt_f64_1.c b/gcc/testsuite/gcc.target/aarch64/vcvt_f64_1.c
new file mode 100644
index 0000000..b7ee7af
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vcvt_f64_1.c
@@ -0,0 +1,48 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include "arm_neon.h"
+
+/* For each of these intrinsics, we're mapping to a simple C cast.
+ While the compiler has some freedom in terms of choice of instruction,
+ we'd hope that for this simple case it would always pick the single
+ instruction form given in these tests. Anything else is likely a
+ regression, so check for an exact instruction pattern and
+ register allocation decision. */
+
+/* Test that if we have a value already in Advanced-SIMD registers, we use
+ the scalar register forms. */
+
+float64x1_t
+test_vcvt_f64_s64_fpr (int64x1_t a)
+{
+ /* { dg-final { scan-assembler-times "scvtf\td0, d0" 1 } } */
+ return vcvt_f64_s64 (a);
+}
+
+float64x1_t
+test_vcvt_f64_u64_fpr (uint64x1_t a)
+{
+ /* { dg-final { scan-assembler-times "ucvtf\td0, d0" 1 } } */
+ return vcvt_f64_u64 (a);
+}
+
+/* Test that if we have an integer constructor, we use the general-purpose
+ register forms for scvtf and ucvtf. */
+
+float64x1_t
+test_vcvt_f64_s64_gpr (int64_t a)
+{
+ /* { dg-final { scan-assembler-times "scvtf\td0, x0" 1 } } */
+ int64x1_t b = (int64x1_t) { a };
+ return vcvt_f64_s64 (b);
+}
+
+float64x1_t
+test_vcvt_f64_u64_gpr (uint64_t a)
+{
+ /* { dg-final { scan-assembler-times "ucvtf\td0, x0" 1 } } */
+ uint64x1_t b = (uint64x1_t) { a };
+ return vcvt_f64_u64 (b);
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/vcvt_n_f64_1.c b/gcc/testsuite/gcc.target/aarch64/vcvt_n_f64_1.c
new file mode 100644
index 0000000..6fe16de
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vcvt_n_f64_1.c
@@ -0,0 +1,80 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include "arm_neon.h"
+
+/* For each of these intrinsics, we map directly to an unspec in RTL.
+ We're just using the argument directly and returning the result, so we
+ can precisely specify the exact instruction pattern and register
+ allocations we expect. */
+
+/* Test that if we have a value already in Advanced-SIMD registers, we use
+ the scalar register forms. */
+
+float64x1_t
+test_vcvt_n_f64_s64_fpr (int64x1_t a)
+{
+ /* { dg-final { scan-assembler-times "scvtf\td0, d0, #3" 1 } } */
+ return vcvt_n_f64_s64 (a, 3);
+}
+
+float64x1_t
+test_vcvt_n_f64_u64_fpr (uint64x1_t a)
+{
+ /* { dg-final { scan-assembler-times "ucvtf\td0, d0, #3" 1 } } */
+ return vcvt_n_f64_u64 (a, 3);
+}
+
+/* Test that if we have an integer constructor, we use the general-purpose
+ register forms for scvtf and ucvtf. */
+
+float64x1_t
+test_vcvt_n_f64_s64_gpr (int64_t a)
+{
+ /* { dg-final { scan-assembler-times "scvtf\td0, x0, #3" 1 } } */
+ int64x1_t b = (int64x1_t) { a };
+ return vcvt_n_f64_s64 (b, 3);
+}
+
+float64x1_t
+test_vcvt_n_f64_u64_gpr (uint64_t a)
+{
+ /* { dg-final { scan-assembler-times "ucvtf\td0, x0, #3" 1 } } */
+ uint64x1_t b = (uint64x1_t) { a };
+ return vcvt_n_f64_u64 (b, 3);
+}
+
+/* Test that a normal return through the Advanced-SIMD registers uses
+ the scalar register form. */
+
+int64x1_t
+test_vcvt_n_s64_f64_fpr (float64x1_t a)
+{
+ /* { dg-final { scan-assembler-times "fcvtzs\td0, d0, #3" 1 } } */
+ return vcvt_n_s64_f64 (a, 3);
+}
+
+uint64x1_t
+test_vcvt_n_u64_f64_fpr (float64x1_t a)
+{
+ /* { dg-final { scan-assembler-times "fcvtzu\td0, d0, #3" 1 } } */
+ return vcvt_n_u64_f64 (a, 3);
+}
+
+/* Test that a lane extracted return as a plain [u]int64_t uses
+ the general-register forms of fcvtzs and fcvtzu. */
+
+int64_t
+test_vcvt_n_s64_f64_gpr (float64x1_t a)
+{
+ /* { dg-final { scan-assembler-times "fcvtzs\tx0, d0, #3" 1 } } */
+ return vget_lane_s64 (vcvt_n_s64_f64 (a, 3), 0);
+}
+
+uint64_t
+test_vcvt_n_u64_f64_gpr (float64x1_t a)
+{
+ /* { dg-final { scan-assembler-times "fcvtzu\tx0, d0, #3" 1 } } */
+ return vget_lane_u64 (vcvt_n_u64_f64 (a, 3), 0);
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/vrecp_f64_1.c b/gcc/testsuite/gcc.target/aarch64/vrecp_f64_1.c
new file mode 100644
index 0000000..c61b2f1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vrecp_f64_1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include "arm_neon.h"
+
+/* For each of these intrinsics, we're mapping to an unspec in RTL.
+ We therefore know the expected instruction choice and register pattern,
+ so we can look for it exactly. */
+
+float64x1_t
+test_vrecpe_f64 (float64x1_t a)
+{
+ /* { dg-final { scan-assembler-times "frecpe\td0, d0" 1 } } */
+ return vrecpe_f64 (a);
+}
+
+float64x1_t
+test_vrecps_f64 (float64x1_t a, float64x1_t b)
+{
+ /* { dg-final { scan-assembler-times "frecps\td0, d0, d1" 1 } } */
+ return vrecps_f64 (a, b);
+}
+
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [Patch AArch64] Add some more missing intrinsics
2016-06-13 16:32 [Patch AArch64] Add some more missing intrinsics James Greenhalgh
@ 2016-06-14 14:23 ` Kyrill Tkachov
2016-06-20 11:09 ` James Greenhalgh
2016-06-20 13:34 ` Richard Earnshaw (lists)
2 siblings, 0 replies; 4+ messages in thread
From: Kyrill Tkachov @ 2016-06-14 14:23 UTC (permalink / raw)
To: James Greenhalgh, gcc-patches; +Cc: nd, marcus.shawcroft, richard.earnshaw
Hi James,
On 13/06/16 17:31, James Greenhalgh wrote:
> Hi,
>
> Inspired by Jiong's recent work, here are some more missing intrinsics,
> and a smoke test for each of them.
>
> This patch covers:
>
> vcvt_n_f64_s64
> vcvt_n_f64_u64
> vcvt_n_s64_f64
> vcvt_n_u64_f64
> vcvt_f64_s64
> vrecpe_f64
> vcvt_f64_u64
> vrecps_f64
>
> Tested on aarch64-none-elf, and on an internal testsuite for Neon
> intrinsics.
>
> Note that the new tests will ICE without the fixups in
> https://gcc.gnu.org/ml/gcc-patches/2016-06/msg00805.html
>
> OK?
>
> Thanks,
> James
>
> ---
> gcc/ChangeLog
>
> 2016-06-10 James Greenhalgh <james.greenhalgh@arm.com>
>
> * config/aarch64/arm_neon.h (vcvt_n_f64_s64): New.
> (vcvt_n_f64_u64): Likewise.
> (vcvt_n_s64_f64): Likewise.
> (vcvt_n_u64_f64): Likewise.
> (vcvt_f64_s64): Likewise.
> (vrecpe_f64): Likewise.
> (vcvt_f64_u64): Likewise.
> (vrecps_f64): Likewise.
>
> gcc/testsuite/ChangeLog
>
> 2016-06-10 James Greenhalgh <james.greenhalgh@arm.com>
>
> * gcc.target/aarch64/vcvt_f64_1.c: New.
> * gcc.target/aarch64/vcvt_n_f64_1.c: New.
> * gcc.target/aarch64/vrecp_f64_1.c: New.
Looks ok to me (but I can't approve) with a trivial nit below:
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vrecps_f64 (float64x1_t __a, float64x1_t __b)
+{
+ return (float64x1_t) { vrecpsd_f64 (vget_lane_f64 (__a, 0),
+ vget_lane_f64 (__b, 0)) };
+}
+
Only one space between vrecpsd_f64 and '('.
Thanks,
Kyrill
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [Patch AArch64] Add some more missing intrinsics
2016-06-13 16:32 [Patch AArch64] Add some more missing intrinsics James Greenhalgh
2016-06-14 14:23 ` Kyrill Tkachov
@ 2016-06-20 11:09 ` James Greenhalgh
2016-06-20 13:34 ` Richard Earnshaw (lists)
2 siblings, 0 replies; 4+ messages in thread
From: James Greenhalgh @ 2016-06-20 11:09 UTC (permalink / raw)
To: gcc-patches; +Cc: nd, marcus.shawcroft, richard.earnshaw
On Mon, Jun 13, 2016 at 05:31:40PM +0100, James Greenhalgh wrote:
>
> Hi,
>
> Inspired by Jiong's recent work, here are some more missing intrinsics,
> and a smoke test for each of them.
>
> This patch covers:
>
> vcvt_n_f64_s64
> vcvt_n_f64_u64
> vcvt_n_s64_f64
> vcvt_n_u64_f64
> vcvt_f64_s64
> vrecpe_f64
> vcvt_f64_u64
> vrecps_f64
>
> Tested on aarch64-none-elf, and on an internal testsuite for Neon
> intrinsics.
>
> Note that the new tests will ICE without the fixups in
> https://gcc.gnu.org/ml/gcc-patches/2016-06/msg00805.html
>
> OK?
*ping*
https://gcc.gnu.org/ml/gcc-patches/2016-06/msg00977.html
Thanks,
James
> gcc/ChangeLog
>
> 2016-06-10 James Greenhalgh <james.greenhalgh@arm.com>
>
> * config/aarch64/arm_neon.h (vcvt_n_f64_s64): New.
> (vcvt_n_f64_u64): Likewise.
> (vcvt_n_s64_f64): Likewise.
> (vcvt_n_u64_f64): Likewise.
> (vcvt_f64_s64): Likewise.
> (vrecpe_f64): Likewise.
> (vcvt_f64_u64): Likewise.
> (vrecps_f64): Likewise.
>
> gcc/testsuite/ChangeLog
>
> 2016-06-10 James Greenhalgh <james.greenhalgh@arm.com>
>
> * gcc.target/aarch64/vcvt_f64_1.c: New.
> * gcc.target/aarch64/vcvt_n_f64_1.c: New.
> * gcc.target/aarch64/vrecp_f64_1.c: New.
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [Patch AArch64] Add some more missing intrinsics
2016-06-13 16:32 [Patch AArch64] Add some more missing intrinsics James Greenhalgh
2016-06-14 14:23 ` Kyrill Tkachov
2016-06-20 11:09 ` James Greenhalgh
@ 2016-06-20 13:34 ` Richard Earnshaw (lists)
2 siblings, 0 replies; 4+ messages in thread
From: Richard Earnshaw (lists) @ 2016-06-20 13:34 UTC (permalink / raw)
To: James Greenhalgh, gcc-patches; +Cc: nd, marcus.shawcroft
On 13/06/16 17:31, James Greenhalgh wrote:
>
> Hi,
>
> Inspired by Jiong's recent work, here are some more missing intrinsics,
> and a smoke test for each of them.
>
> This patch covers:
>
> vcvt_n_f64_s64
> vcvt_n_f64_u64
> vcvt_n_s64_f64
> vcvt_n_u64_f64
> vcvt_f64_s64
> vrecpe_f64
> vcvt_f64_u64
> vrecps_f64
>
> Tested on aarch64-none-elf, and on an internal testsuite for Neon
> intrinsics.
>
> Note that the new tests will ICE without the fixups in
> https://gcc.gnu.org/ml/gcc-patches/2016-06/msg00805.html
>
> OK?
>
OK, but please fix the nit that Kyrill highlighted.
R.
> Thanks,
> James
>
> ---
> gcc/ChangeLog
>
> 2016-06-10 James Greenhalgh <james.greenhalgh@arm.com>
>
> * config/aarch64/arm_neon.h (vcvt_n_f64_s64): New.
> (vcvt_n_f64_u64): Likewise.
> (vcvt_n_s64_f64): Likewise.
> (vcvt_n_u64_f64): Likewise.
> (vcvt_f64_s64): Likewise.
> (vrecpe_f64): Likewise.
> (vcvt_f64_u64): Likewise.
> (vrecps_f64): Likewise.
>
> gcc/testsuite/ChangeLog
>
> 2016-06-10 James Greenhalgh <james.greenhalgh@arm.com>
>
> * gcc.target/aarch64/vcvt_f64_1.c: New.
> * gcc.target/aarch64/vcvt_n_f64_1.c: New.
> * gcc.target/aarch64/vrecp_f64_1.c: New.
>
>
> 0001-Patch-AArch64-Add-some-more-missing-intrinsics.patch
>
>
> diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
> index f70b6d3..2f90938 100644
> --- a/gcc/config/aarch64/arm_neon.h
> +++ b/gcc/config/aarch64/arm_neon.h
> @@ -12447,6 +12447,20 @@ vcvt_n_f32_u32 (uint32x2_t __a, const int __b)
> return __builtin_aarch64_ucvtfv2si_sus (__a, __b);
> }
>
> +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
> +vcvt_n_f64_s64 (int64x1_t __a, const int __b)
> +{
> + return (float64x1_t)
> + { __builtin_aarch64_scvtfdi (vget_lane_s64 (__a, 0), __b) };
> +}
> +
> +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
> +vcvt_n_f64_u64 (uint64x1_t __a, const int __b)
> +{
> + return (float64x1_t)
> + { __builtin_aarch64_ucvtfdi_sus (vget_lane_u64 (__a, 0), __b) };
> +}
> +
> __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
> vcvtq_n_f32_s32 (int32x4_t __a, const int __b)
> {
> @@ -12509,6 +12523,20 @@ vcvt_n_u32_f32 (float32x2_t __a, const int __b)
> return __builtin_aarch64_fcvtzuv2sf_uss (__a, __b);
> }
>
> +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
> +vcvt_n_s64_f64 (float64x1_t __a, const int __b)
> +{
> + return (int64x1_t)
> + { __builtin_aarch64_fcvtzsdf (vget_lane_f64 (__a, 0), __b) };
> +}
> +
> +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
> +vcvt_n_u64_f64 (float64x1_t __a, const int __b)
> +{
> + return (uint64x1_t)
> + { __builtin_aarch64_fcvtzudf_uss (vget_lane_f64 (__a, 0), __b) };
> +}
> +
> __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
> vcvtq_n_s32_f32 (float32x4_t __a, const int __b)
> {
> @@ -12571,6 +12599,18 @@ vcvt_f32_u32 (uint32x2_t __a)
> return __builtin_aarch64_floatunsv2siv2sf ((int32x2_t) __a);
> }
>
> +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
> +vcvt_f64_s64 (int64x1_t __a)
> +{
> + return (float64x1_t) { vget_lane_s64 (__a, 0) };
> +}
> +
> +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
> +vcvt_f64_u64 (uint64x1_t __a)
> +{
> + return (float64x1_t) { vget_lane_u64 (__a, 0) };
> +}
> +
> __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
> vcvtq_f32_s32 (int32x4_t __a)
> {
> @@ -20659,6 +20699,12 @@ vrecpe_f32 (float32x2_t __a)
> return __builtin_aarch64_frecpev2sf (__a);
> }
>
> +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
> +vrecpe_f64 (float64x1_t __a)
> +{
> + return (float64x1_t) { vrecped_f64 (vget_lane_f64 (__a, 0)) };
> +}
> +
> __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
> vrecpeq_f32 (float32x4_t __a)
> {
> @@ -20691,6 +20737,13 @@ vrecps_f32 (float32x2_t __a, float32x2_t __b)
> return __builtin_aarch64_frecpsv2sf (__a, __b);
> }
>
> +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
> +vrecps_f64 (float64x1_t __a, float64x1_t __b)
> +{
> + return (float64x1_t) { vrecpsd_f64 (vget_lane_f64 (__a, 0),
> + vget_lane_f64 (__b, 0)) };
> +}
> +
> __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
> vrecpsq_f32 (float32x4_t __a, float32x4_t __b)
> {
> diff --git a/gcc/testsuite/gcc.target/aarch64/vcvt_f64_1.c b/gcc/testsuite/gcc.target/aarch64/vcvt_f64_1.c
> new file mode 100644
> index 0000000..b7ee7af
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/vcvt_f64_1.c
> @@ -0,0 +1,48 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include "arm_neon.h"
> +
> +/* For each of these intrinsics, we're mapping to a simple C cast.
> + While the compiler has some freedom in terms of choice of instruction,
> + we'd hope that for this simple case it would always pick the single
> + instruction form given in these tests. Anything else is likely a
> + regression, so check for an exact instruction pattern and
> + register allocation decision. */
> +
> +/* Test that if we have a value already in Advanced-SIMD registers, we use
> + the scalar register forms. */
> +
> +float64x1_t
> +test_vcvt_f64_s64_fpr (int64x1_t a)
> +{
> + /* { dg-final { scan-assembler-times "scvtf\td0, d0" 1 } } */
> + return vcvt_f64_s64 (a);
> +}
> +
> +float64x1_t
> +test_vcvt_f64_u64_fpr (uint64x1_t a)
> +{
> + /* { dg-final { scan-assembler-times "ucvtf\td0, d0" 1 } } */
> + return vcvt_f64_u64 (a);
> +}
> +
> +/* Test that if we have an integer constructor, we use the general-purpose
> + register forms for scvtf and ucvtf. */
> +
> +float64x1_t
> +test_vcvt_f64_s64_gpr (int64_t a)
> +{
> + /* { dg-final { scan-assembler-times "scvtf\td0, x0" 1 } } */
> + int64x1_t b = (int64x1_t) { a };
> + return vcvt_f64_s64 (b);
> +}
> +
> +float64x1_t
> +test_vcvt_f64_u64_gpr (uint64_t a)
> +{
> + /* { dg-final { scan-assembler-times "ucvtf\td0, x0" 1 } } */
> + uint64x1_t b = (uint64x1_t) { a };
> + return vcvt_f64_u64 (b);
> +}
> +
> diff --git a/gcc/testsuite/gcc.target/aarch64/vcvt_n_f64_1.c b/gcc/testsuite/gcc.target/aarch64/vcvt_n_f64_1.c
> new file mode 100644
> index 0000000..6fe16de
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/vcvt_n_f64_1.c
> @@ -0,0 +1,80 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include "arm_neon.h"
> +
> +/* For each of these intrinsics, we map directly to an unspec in RTL.
> + We're just using the argument directly and returning the result, so we
> + can precisely specify the exact instruction pattern and register
> + allocations we expect. */
> +
> +/* Test that if we have a value already in Advanced-SIMD registers, we use
> + the scalar register forms. */
> +
> +float64x1_t
> +test_vcvt_n_f64_s64_fpr (int64x1_t a)
> +{
> + /* { dg-final { scan-assembler-times "scvtf\td0, d0, #3" 1 } } */
> + return vcvt_n_f64_s64 (a, 3);
> +}
> +
> +float64x1_t
> +test_vcvt_n_f64_u64_fpr (uint64x1_t a)
> +{
> + /* { dg-final { scan-assembler-times "ucvtf\td0, d0, #3" 1 } } */
> + return vcvt_n_f64_u64 (a, 3);
> +}
> +
> +/* Test that if we have an integer constructor, we use the general-purpose
> + register forms for scvtf and ucvtf. */
> +
> +float64x1_t
> +test_vcvt_n_f64_s64_gpr (int64_t a)
> +{
> + /* { dg-final { scan-assembler-times "scvtf\td0, x0, #3" 1 } } */
> + int64x1_t b = (int64x1_t) { a };
> + return vcvt_n_f64_s64 (b, 3);
> +}
> +
> +float64x1_t
> +test_vcvt_n_f64_u64_gpr (uint64_t a)
> +{
> + /* { dg-final { scan-assembler-times "ucvtf\td0, x0, #3" 1 } } */
> + uint64x1_t b = (uint64x1_t) { a };
> + return vcvt_n_f64_u64 (b, 3);
> +}
> +
> +/* Test that a normal return through the Advanced-SIMD registers uses
> + the scalar register form. */
> +
> +int64x1_t
> +test_vcvt_n_s64_f64_fpr (float64x1_t a)
> +{
> + /* { dg-final { scan-assembler-times "fcvtzs\td0, d0, #3" 1 } } */
> + return vcvt_n_s64_f64 (a, 3);
> +}
> +
> +uint64x1_t
> +test_vcvt_n_u64_f64_fpr (float64x1_t a)
> +{
> + /* { dg-final { scan-assembler-times "fcvtzu\td0, d0, #3" 1 } } */
> + return vcvt_n_u64_f64 (a, 3);
> +}
> +
> +/* Test that a lane extracted return as a plain [u]int64_t uses
> + the general-register forms of fcvtzs and fcvtzu. */
> +
> +int64_t
> +test_vcvt_n_s64_f64_gpr (float64x1_t a)
> +{
> + /* { dg-final { scan-assembler-times "fcvtzs\tx0, d0, #3" 1 } } */
> + return vget_lane_s64 (vcvt_n_s64_f64 (a, 3), 0);
> +}
> +
> +uint64_t
> +test_vcvt_n_u64_f64_gpr (float64x1_t a)
> +{
> + /* { dg-final { scan-assembler-times "fcvtzu\tx0, d0, #3" 1 } } */
> + return vget_lane_u64 (vcvt_n_u64_f64 (a, 3), 0);
> +}
> +
> diff --git a/gcc/testsuite/gcc.target/aarch64/vrecp_f64_1.c b/gcc/testsuite/gcc.target/aarch64/vrecp_f64_1.c
> new file mode 100644
> index 0000000..c61b2f1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/vrecp_f64_1.c
> @@ -0,0 +1,23 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include "arm_neon.h"
> +
> +/* For each of these intrinsics, we're mapping to an unspec in RTL.
> + We therefore know the expected instruction choice and register pattern,
> + so we can look for it exactly. */
> +
> +float64x1_t
> +test_vrecpe_f64 (float64x1_t a)
> +{
> + /* { dg-final { scan-assembler-times "frecpe\td0, d0" 1 } } */
> + return vrecpe_f64 (a);
> +}
> +
> +float64x1_t
> +test_vrecps_f64 (float64x1_t a, float64x1_t b)
> +{
> + /* { dg-final { scan-assembler-times "frecps\td0, d0, d1" 1 } } */
> + return vrecps_f64 (a, b);
> +}
> +
>
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2016-06-20 13:34 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-06-13 16:32 [Patch AArch64] Add some more missing intrinsics James Greenhalgh
2016-06-14 14:23 ` Kyrill Tkachov
2016-06-20 11:09 ` James Greenhalgh
2016-06-20 13:34 ` Richard Earnshaw (lists)
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).