public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH][AArch64] vrnd<*>_f64 patch for stage-1
@ 2014-02-13 11:17 Alex Velenko
  2014-02-13 17:43 ` Richard Henderson
  0 siblings, 1 reply; 4+ messages in thread
From: Alex Velenko @ 2014-02-13 11:17 UTC (permalink / raw)
  To: gcc-patches; +Cc: Marcus Shawcroft

[-- Attachment #1: Type: text/plain, Size: 770 bytes --]

Hi,
This patch adds vrnd<*>_f64 aarch64 intrinsics. A testcase for those
intrinsics is added. Run a complete LE and BE regression run with no 
regressions.

Is patch OK for stage-1?

2014-02-13  Alex Velenko  <Alex.Velenko@arm.com>

gcc/

	* config/aarch64/aarch64-builtins.c (BUILTIN_VDQF_DF): Macro
	added.
	* config/aarch64/aarch64-simd-builtins.def (frintn): Use added
	macro.
	* config/aarch64/aarch64-simd.md (<frint_pattern>): Comment
	corrected.
	* config/aarch64/aarch64.md (<frint_pattern>): Likewise.
	* config/aarch64/arm_neon.h (vrnd_f64): Added.
	(vrnda_f64): Likewise.
	(vrndi_f64): Likewise.
	(vrndm_f64): Likewise.
	(vrndn_f64): Likewise.
	(vrndp_f64): Likewise.
	(vrndx_f64): Likewise.

gcc/testsuite/

	gcc.target/aarch64/vrnd_f64_1.c : New testcase.

[-- Attachment #2: vrnd_f64.patch --]
[-- Type: text/x-patch, Size: 9411 bytes --]

diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index ebab2ce8347a4425977c5cbd0f285c3ff1d9f2f1..7adc5fb96b6473ecde5c4f76973aff68af0ca7d4 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -307,6 +307,8 @@ aarch64_types_store1_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   VAR7 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di)
 #define BUILTIN_VDQF(T, N, MAP) \
   VAR3 (T, N, MAP, v2sf, v4sf, v2df)
+#define BUILTIN_VDQF_DF(T, N, MAP) \
+  VAR4 (T, N, MAP, v2sf, v4sf, v2df, df)
 #define BUILTIN_VDQH(T, N, MAP) \
   VAR2 (T, N, MAP, v4hi, v8hi)
 #define BUILTIN_VDQHS(T, N, MAP) \
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index e5f71b479ccfd1a9cbf84aed0f96b49762053f59..09e230c56683a0225f8760472d7137b7bac98297 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -264,7 +264,7 @@
   BUILTIN_VDQF (UNOP, nearbyint, 2)
   BUILTIN_VDQF (UNOP, rint, 2)
   BUILTIN_VDQF (UNOP, round, 2)
-  BUILTIN_VDQF (UNOP, frintn, 2)
+  BUILTIN_VDQF_DF (UNOP, frintn, 2)
 
   /* Implemented by l<fcvt_pattern><su_optab><VQDF:mode><vcvt_target>2.  */
   VAR1 (UNOP, lbtruncv2sf, 2, v2si)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 4dffb59e856aeaafb79007255d3b91a73ef1ef13..0c1d7de5b3f4fb0fa8fa226b81ec690d8112b849 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1427,7 +1427,7 @@
 )
 
 ;; Vector versions of the floating-point frint patterns.
-;; Expands to btrunc, ceil, floor, nearbyint, rint, round.
+;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
 (define_insn "<frint_pattern><mode>2"
   [(set (match_operand:VDQF 0 "register_operand" "=w")
 	(unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 99a6ac8fcbdcd24a0ea18cc037bef9cf72070281..577aa9fe08bb445e66734bc404e94e13dc1fa65b 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -3187,7 +3187,7 @@
 ;; -------------------------------------------------------------------
 
 ;; frint floating-point round to integral standard patterns.
-;; Expands to btrunc, ceil, floor, nearbyint, rint, round.
+;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
 
 (define_insn "<frint_pattern><mode>2"
   [(set (match_operand:GPF 0 "register_operand" "=w")
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 6af99361b8e265f66026dc506cfc23f044d153b4..797e37ad638648312ef34bcd63c463e5873c30c4 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -22481,6 +22481,12 @@ vrnd_f32 (float32x2_t __a)
   return __builtin_aarch64_btruncv2sf (__a);
 }
 
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vrnd_f64 (float64x1_t __a)
+{
+  return vset_lane_f64 (__builtin_trunc (vget_lane_f64 (__a, 0)), __a, 0);
+}
+
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vrndq_f32 (float32x4_t __a)
 {
@@ -22501,6 +22507,12 @@ vrnda_f32 (float32x2_t __a)
   return __builtin_aarch64_roundv2sf (__a);
 }
 
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vrnda_f64 (float64x1_t __a)
+{
+  return vset_lane_f64 (__builtin_round (vget_lane_f64 (__a, 0)), __a, 0);
+}
+
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vrndaq_f32 (float32x4_t __a)
 {
@@ -22521,6 +22533,12 @@ vrndi_f32 (float32x2_t __a)
   return __builtin_aarch64_nearbyintv2sf (__a);
 }
 
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vrndi_f64 (float64x1_t __a)
+{
+  return vset_lane_f64 (__builtin_nearbyint (vget_lane_f64 (__a, 0)), __a, 0);
+}
+
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vrndiq_f32 (float32x4_t __a)
 {
@@ -22541,6 +22559,12 @@ vrndm_f32 (float32x2_t __a)
   return __builtin_aarch64_floorv2sf (__a);
 }
 
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vrndm_f64 (float64x1_t __a)
+{
+  return vset_lane_f64 (__builtin_floor (vget_lane_f64 (__a, 0)), __a, 0);
+}
+
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vrndmq_f32 (float32x4_t __a)
 {
@@ -22560,6 +22584,13 @@ vrndn_f32 (float32x2_t __a)
 {
   return __builtin_aarch64_frintnv2sf (__a);
 }
+
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vrndn_f64 (float64x1_t __a)
+{
+  return __builtin_aarch64_frintndf (__a);
+}
+
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vrndnq_f32 (float32x4_t __a)
 {
@@ -22580,6 +22611,12 @@ vrndp_f32 (float32x2_t __a)
   return __builtin_aarch64_ceilv2sf (__a);
 }
 
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vrndp_f64 (float64x1_t __a)
+{
+  return vset_lane_f64 (__builtin_ceil (vget_lane_f64 (__a, 0)), __a, 0);
+}
+
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vrndpq_f32 (float32x4_t __a)
 {
@@ -22600,6 +22637,12 @@ vrndx_f32 (float32x2_t __a)
   return __builtin_aarch64_rintv2sf (__a);
 }
 
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vrndx_f64 (float64x1_t __a)
+{
+  return vset_lane_f64 (__builtin_rint (vget_lane_f64 (__a, 0)), __a, 0);
+}
+
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vrndxq_f32 (float32x4_t __a)
 {
diff --git a/gcc/testsuite/gcc.target/aarch64/vrnd_f64_1.c b/gcc/testsuite/gcc.target/aarch64/vrnd_f64_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..b8cce636af74cbf3111f7cfd1e43460dae674d8e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vrnd_f64_1.c
@@ -0,0 +1,105 @@
+/* Test vrnd_f64 works correctly.  */
+/* { dg-do run } */
+/* { dg-options "--save-temps" } */
+
+#include "arm_neon.h"
+
+extern void abort (void);
+
+/* Bit offset to round mode field in FPCR.  */
+#define RMODE_START 22
+
+#define FPROUNDING_ZERO 3
+
+/* Sets "rmode" field of "FPCR" control register to
+   "FPROUNDING_ZERO".  */
+void __inline __attribute__ ((__always_inline__))
+set_rounding_mode (uint32_t mode)
+{
+  uint32_t r;
+
+  /* Read current FPCR.  */
+  asm volatile ("mrs %[r], fpcr" : [r] "=r" (r) : :);
+
+  /* Clear rmode.  */
+  r &= 3 << RMODE_START;
+  /* Calculate desired FPCR.  */
+  r |= mode << RMODE_START;
+
+  /* Write desired FPCR back.  */
+  asm volatile ("msr fpcr, %[r]" : : [r] "r" (r) :);
+}
+
+float64x1_t __attribute__ ((noinline))
+compare_f64 (float64x1_t passed, float64_t expected)
+{
+  return (__builtin_fabs (vget_lane_f64 (passed, 0) - expected)
+	  > __DBL_EPSILON__);
+}
+
+void __attribute__ ((noinline))
+run_round_tests (float64x1_t *tests,
+		 float64_t expectations[][6])
+{
+  int i;
+
+  for (i = 0; i < 6; i++)
+    {
+      if (compare_f64 (vrnd_f64 (tests[i]), expectations[0][i]))
+	abort ();
+      if (compare_f64 (vrndx_f64 (tests[i]), expectations[1][i]))
+	abort ();
+      if (compare_f64 (vrndp_f64 (tests[i]), expectations[2][i]))
+	abort ();
+      if (compare_f64 (vrndn_f64 (tests[i]), expectations[3][i]))
+	abort ();
+      if (compare_f64 (vrndm_f64 (tests[i]), expectations[4][i]))
+	abort ();
+      if (compare_f64 (vrndi_f64 (tests[i]), expectations[5][i]))
+	abort ();
+      if (compare_f64 (vrnda_f64 (tests[i]), expectations[6][i]))
+	abort ();
+    }
+}
+
+int
+main (int argc, char **argv)
+{
+  float64x1_t tests[6] =
+    {
+      vcreate_f64 (0x3FE0000000000000), /* Hex for: 0.5.  */
+      vcreate_f64 (0x3FD999999999999A), /* Hex for: 0.4.  */
+      vcreate_f64 (0x3FE3333333333333), /* Hex for: 0.6.  */
+      vcreate_f64 (0xBFE0000000000000), /* Hex for: -0.5.  */
+      vcreate_f64 (0xBFD999999999999A), /* Hex for: -0.4.  */
+      vcreate_f64 (0xBFE3333333333333), /* Hex for: -0.6.  */
+    };
+
+  float64_t expectations[7][6] =
+  {
+    { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 },    /* vrnd - round towards zero.  */
+    { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 },    /* vrndx - round using FPCR mode.  */
+    { 1.0, 1.0, 1.0, 0.0, 0.0, 0.0 },    /* vrndp - round to plus infinity.  */
+    { 0.0, 0.0, 1.0, 0.0, 0.0, -1.0 },   /* vrndn - round ties to even.  */
+    { 0.0, 0.0, 0.0, -1.0, -1.0, -1.0 }, /* vrndm - round to minus infinity.  */
+    { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 },    /* vrndi - round using FPCR mode.  */
+    { 1.0, 0.0, 1.0, -1.0, 0.0, -1.0 },  /* vrnda - round ties away from 0.  */
+  };
+
+  /* Fix floating point control register
+     to have predictable vrndx and vrndi behaviour.  */
+  set_rounding_mode (FPROUNDING_ZERO);
+
+  run_round_tests (tests, expectations);
+
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times "frintz\\td\[0-9\]+, d\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "frintx\\td\[0-9\]+, d\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "frintp\\td\[0-9\]+, d\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "frintn\\td\[0-9\]+, d\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "frintm\\td\[0-9\]+, d\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "frinti\\td\[0-9\]+, d\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "frinta\\td\[0-9\]+, d\[0-9\]+" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH][AArch64] vrnd<*>_f64 patch for stage-1
  2014-02-13 11:17 [PATCH][AArch64] vrnd<*>_f64 patch for stage-1 Alex Velenko
@ 2014-02-13 17:43 ` Richard Henderson
  2014-02-21 12:45   ` Alex Velenko
  0 siblings, 1 reply; 4+ messages in thread
From: Richard Henderson @ 2014-02-13 17:43 UTC (permalink / raw)
  To: Alex Velenko, gcc-patches; +Cc: Marcus Shawcroft

On 02/13/2014 03:17 AM, Alex Velenko wrote:
> +/* Sets "rmode" field of "FPCR" control register to
> +   "FPROUNDING_ZERO".  */

Comment is wrong, or at least misleading.

> +void __inline __attribute__ ((__always_inline__))
> +set_rounding_mode (uint32_t mode)
> +{
> +  uint32_t r;
> +
> +  /* Read current FPCR.  */
> +  asm volatile ("mrs %[r], fpcr" : [r] "=r" (r) : :);
> +
> +  /* Clear rmode.  */
> +  r &= 3 << RMODE_START;

  ~(3 << RMODE_START)

> +  /* Calculate desired FPCR.  */
> +  r |= mode << RMODE_START;
> +
> +  /* Write desired FPCR back.  */
> +  asm volatile ("msr fpcr, %[r]" : : [r] "r" (r) :);
> +}

Fortunately for this testcase, you do always use FPROUNDING_ZERO == 3 when
calling this function, so the bugs are hidden.


r~

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH][AArch64] vrnd<*>_f64 patch for stage-1
  2014-02-13 17:43 ` Richard Henderson
@ 2014-02-21 12:45   ` Alex Velenko
  2014-02-26 16:00     ` Marcus Shawcroft
  0 siblings, 1 reply; 4+ messages in thread
From: Alex Velenko @ 2014-02-21 12:45 UTC (permalink / raw)
  To: Richard Henderson; +Cc: gcc-patches, Marcus Shawcroft

[-- Attachment #1: Type: text/plain, Size: 1880 bytes --]

On 13/02/14 17:43, Richard Henderson wrote:
> On 02/13/2014 03:17 AM, Alex Velenko wrote:
>> +/* Sets "rmode" field of "FPCR" control register to
>> +   "FPROUNDING_ZERO".  */
>
> Comment is wrong, or at least misleading.
>
>> +void __inline __attribute__ ((__always_inline__))
>> +set_rounding_mode (uint32_t mode)
>> +{
>> +  uint32_t r;
>> +
>> +  /* Read current FPCR.  */
>> +  asm volatile ("mrs %[r], fpcr" : [r] "=r" (r) : :);
>> +
>> +  /* Clear rmode.  */
>> +  r &= 3 << RMODE_START;
>
>    ~(3 << RMODE_START)
>
>> +  /* Calculate desired FPCR.  */
>> +  r |= mode << RMODE_START;
>> +
>> +  /* Write desired FPCR back.  */
>> +  asm volatile ("msr fpcr, %[r]" : : [r] "r" (r) :);
>> +}
>
> Fortunately for this testcase, you do always use FPROUNDING_ZERO == 3 when
> calling this function, so the bugs are hidden.
>
>
> r~
>

Hi Richard,
Thank you for pointing those issue out. here is a respin of the same 
patch with indecated issues fixed. the description of the patch is as 
follows:

This patch adds vrnd<*>_f64 aarch64 intrinsics. A testcase for those
intrinsics is added. Run a complete LE and BE regression run with no 
regressions.

Is patch OK for stage-1?

gcc/

2014-02-21  Alex Velenko  <Alex.Velenko@arm.com>

     * config/aarch64/aarch64-builtins.c (BUILTIN_VDQF_DF): Macro
     added.
     * config/aarch64/aarch64-simd-builtins.def (frintn): Use added
     macro.
     * config/aarch64/aarch64-simd.md (<frint_pattern>): Comment
     corrected.
     * config/aarch64/aarch64.md (<frint_pattern>): Likewise.
     * config/aarch64/arm_neon.h (vrnd_f64): Added.
     (vrnda_f64): Likewise.
     (vrndi_f64): Likewise.
     (vrndm_f64): Likewise.
     (vrndn_f64): Likewise.
     (vrndp_f64): Likewise.
     (vrndx_f64): Likewise.

gcc/testsuite/

2014-02-21  Alex Velenko  <Alex.Velenko@arm.com>

     gcc.target/aarch64/vrnd_f64_1.c : New testcase.



[-- Attachment #2: vrnd_f64.patch --]
[-- Type: text/x-patch, Size: 9412 bytes --]

diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index ebab2ce8347a4425977c5cbd0f285c3ff1d9f2f1..7adc5fb96b6473ecde5c4f76973aff68af0ca7d4 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -307,6 +307,8 @@ aarch64_types_store1_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   VAR7 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di)
 #define BUILTIN_VDQF(T, N, MAP) \
   VAR3 (T, N, MAP, v2sf, v4sf, v2df)
+#define BUILTIN_VDQF_DF(T, N, MAP) \
+  VAR4 (T, N, MAP, v2sf, v4sf, v2df, df)
 #define BUILTIN_VDQH(T, N, MAP) \
   VAR2 (T, N, MAP, v4hi, v8hi)
 #define BUILTIN_VDQHS(T, N, MAP) \
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index e5f71b479ccfd1a9cbf84aed0f96b49762053f59..09e230c56683a0225f8760472d7137b7bac98297 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -264,7 +264,7 @@
   BUILTIN_VDQF (UNOP, nearbyint, 2)
   BUILTIN_VDQF (UNOP, rint, 2)
   BUILTIN_VDQF (UNOP, round, 2)
-  BUILTIN_VDQF (UNOP, frintn, 2)
+  BUILTIN_VDQF_DF (UNOP, frintn, 2)
 
   /* Implemented by l<fcvt_pattern><su_optab><VQDF:mode><vcvt_target>2.  */
   VAR1 (UNOP, lbtruncv2sf, 2, v2si)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 4dffb59e856aeaafb79007255d3b91a73ef1ef13..0c1d7de5b3f4fb0fa8fa226b81ec690d8112b849 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1427,7 +1427,7 @@
 )
 
 ;; Vector versions of the floating-point frint patterns.
-;; Expands to btrunc, ceil, floor, nearbyint, rint, round.
+;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
 (define_insn "<frint_pattern><mode>2"
   [(set (match_operand:VDQF 0 "register_operand" "=w")
 	(unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 99a6ac8fcbdcd24a0ea18cc037bef9cf72070281..577aa9fe08bb445e66734bc404e94e13dc1fa65b 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -3187,7 +3187,7 @@
 ;; -------------------------------------------------------------------
 
 ;; frint floating-point round to integral standard patterns.
-;; Expands to btrunc, ceil, floor, nearbyint, rint, round.
+;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
 
 (define_insn "<frint_pattern><mode>2"
   [(set (match_operand:GPF 0 "register_operand" "=w")
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 6af99361b8e265f66026dc506cfc23f044d153b4..797e37ad638648312ef34bcd63c463e5873c30c4 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -22481,6 +22481,12 @@ vrnd_f32 (float32x2_t __a)
   return __builtin_aarch64_btruncv2sf (__a);
 }
 
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vrnd_f64 (float64x1_t __a)
+{
+  return vset_lane_f64 (__builtin_trunc (vget_lane_f64 (__a, 0)), __a, 0);
+}
+
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vrndq_f32 (float32x4_t __a)
 {
@@ -22501,6 +22507,12 @@ vrnda_f32 (float32x2_t __a)
   return __builtin_aarch64_roundv2sf (__a);
 }
 
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vrnda_f64 (float64x1_t __a)
+{
+  return vset_lane_f64 (__builtin_round (vget_lane_f64 (__a, 0)), __a, 0);
+}
+
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vrndaq_f32 (float32x4_t __a)
 {
@@ -22521,6 +22533,12 @@ vrndi_f32 (float32x2_t __a)
   return __builtin_aarch64_nearbyintv2sf (__a);
 }
 
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vrndi_f64 (float64x1_t __a)
+{
+  return vset_lane_f64 (__builtin_nearbyint (vget_lane_f64 (__a, 0)), __a, 0);
+}
+
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vrndiq_f32 (float32x4_t __a)
 {
@@ -22541,6 +22559,12 @@ vrndm_f32 (float32x2_t __a)
   return __builtin_aarch64_floorv2sf (__a);
 }
 
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vrndm_f64 (float64x1_t __a)
+{
+  return vset_lane_f64 (__builtin_floor (vget_lane_f64 (__a, 0)), __a, 0);
+}
+
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vrndmq_f32 (float32x4_t __a)
 {
@@ -22560,6 +22584,13 @@ vrndn_f32 (float32x2_t __a)
 {
   return __builtin_aarch64_frintnv2sf (__a);
 }
+
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vrndn_f64 (float64x1_t __a)
+{
+  return __builtin_aarch64_frintndf (__a);
+}
+
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vrndnq_f32 (float32x4_t __a)
 {
@@ -22580,6 +22611,12 @@ vrndp_f32 (float32x2_t __a)
   return __builtin_aarch64_ceilv2sf (__a);
 }
 
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vrndp_f64 (float64x1_t __a)
+{
+  return vset_lane_f64 (__builtin_ceil (vget_lane_f64 (__a, 0)), __a, 0);
+}
+
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vrndpq_f32 (float32x4_t __a)
 {
@@ -22600,6 +22637,12 @@ vrndx_f32 (float32x2_t __a)
   return __builtin_aarch64_rintv2sf (__a);
 }
 
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vrndx_f64 (float64x1_t __a)
+{
+  return vset_lane_f64 (__builtin_rint (vget_lane_f64 (__a, 0)), __a, 0);
+}
+
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vrndxq_f32 (float32x4_t __a)
 {
diff --git a/gcc/testsuite/gcc.target/aarch64/vrnd_f64_1.c b/gcc/testsuite/gcc.target/aarch64/vrnd_f64_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..2451ecdcfb6440c100675d34342ee1f5d517c2d5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vrnd_f64_1.c
@@ -0,0 +1,105 @@
+/* Test vrnd_f64 works correctly.  */
+/* { dg-do run } */
+/* { dg-options "--save-temps" } */
+
+#include "arm_neon.h"
+
+extern void abort (void);
+
+/* Bit offset to round mode field in FPCR.  */
+#define RMODE_START 22
+
+#define FPROUNDING_ZERO 3
+
+/* Set RMODE field of FPCR control register
+   to rounding mode passed.  */
+void __inline __attribute__ ((__always_inline__))
+set_rounding_mode (uint32_t mode)
+{
+  uint32_t r;
+
+  /* Read current FPCR.  */
+  asm volatile ("mrs %[r], fpcr" : [r] "=r" (r) : :);
+
+  /* Clear rmode.  */
+  r &= ~(3 << RMODE_START);
+  /* Calculate desired FPCR.  */
+  r |= mode << RMODE_START;
+
+  /* Write desired FPCR back.  */
+  asm volatile ("msr fpcr, %[r]" : : [r] "r" (r) :);
+}
+
+float64x1_t __attribute__ ((noinline))
+compare_f64 (float64x1_t passed, float64_t expected)
+{
+  return (__builtin_fabs (vget_lane_f64 (passed, 0) - expected)
+	  > __DBL_EPSILON__);
+}
+
+void __attribute__ ((noinline))
+run_round_tests (float64x1_t *tests,
+		 float64_t expectations[][6])
+{
+  int i;
+
+  for (i = 0; i < 6; i++)
+    {
+      if (compare_f64 (vrnd_f64 (tests[i]), expectations[0][i]))
+	abort ();
+      if (compare_f64 (vrndx_f64 (tests[i]), expectations[1][i]))
+	abort ();
+      if (compare_f64 (vrndp_f64 (tests[i]), expectations[2][i]))
+	abort ();
+      if (compare_f64 (vrndn_f64 (tests[i]), expectations[3][i]))
+	abort ();
+      if (compare_f64 (vrndm_f64 (tests[i]), expectations[4][i]))
+	abort ();
+      if (compare_f64 (vrndi_f64 (tests[i]), expectations[5][i]))
+	abort ();
+      if (compare_f64 (vrnda_f64 (tests[i]), expectations[6][i]))
+	abort ();
+    }
+}
+
+int
+main (int argc, char **argv)
+{
+  float64x1_t tests[6] =
+    {
+      vcreate_f64 (0x3FE0000000000000), /* Hex for: 0.5.  */
+      vcreate_f64 (0x3FD999999999999A), /* Hex for: 0.4.  */
+      vcreate_f64 (0x3FE3333333333333), /* Hex for: 0.6.  */
+      vcreate_f64 (0xBFE0000000000000), /* Hex for: -0.5.  */
+      vcreate_f64 (0xBFD999999999999A), /* Hex for: -0.4.  */
+      vcreate_f64 (0xBFE3333333333333), /* Hex for: -0.6.  */
+    };
+
+  float64_t expectations[7][6] =
+  {
+    { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 },    /* vrnd - round towards zero.  */
+    { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 },    /* vrndx - round using FPCR mode.  */
+    { 1.0, 1.0, 1.0, 0.0, 0.0, 0.0 },    /* vrndp - round to plus infinity.  */
+    { 0.0, 0.0, 1.0, 0.0, 0.0, -1.0 },   /* vrndn - round ties to even.  */
+    { 0.0, 0.0, 0.0, -1.0, -1.0, -1.0 }, /* vrndm - round to minus infinity.  */
+    { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 },    /* vrndi - round using FPCR mode.  */
+    { 1.0, 0.0, 1.0, -1.0, 0.0, -1.0 },  /* vrnda - round ties away from 0.  */
+  };
+
+  /* Set floating point control register
+     to have predictable vrndx and vrndi behaviour.  */
+  set_rounding_mode (FPROUNDING_ZERO);
+
+  run_round_tests (tests, expectations);
+
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times "frintz\\td\[0-9\]+, d\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "frintx\\td\[0-9\]+, d\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "frintp\\td\[0-9\]+, d\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "frintn\\td\[0-9\]+, d\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "frintm\\td\[0-9\]+, d\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "frinti\\td\[0-9\]+, d\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "frinta\\td\[0-9\]+, d\[0-9\]+" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH][AArch64] vrnd<*>_f64 patch for stage-1
  2014-02-21 12:45   ` Alex Velenko
@ 2014-02-26 16:00     ` Marcus Shawcroft
  0 siblings, 0 replies; 4+ messages in thread
From: Marcus Shawcroft @ 2014-02-26 16:00 UTC (permalink / raw)
  To: Alex Velenko; +Cc: gcc-patches

On 21 February 2014 12:44, Alex Velenko <Alex.Velenko@arm.com> wrote:

> This patch adds vrnd<*>_f64 aarch64 intrinsics. A testcase for those
> intrinsics is added. Run a complete LE and BE regression run with no
> regressions.
>
> Is patch OK for stage-1?
>
> gcc/
>
> 2014-02-21  Alex Velenko  <Alex.Velenko@arm.com>
>
>
>     * config/aarch64/aarch64-builtins.c (BUILTIN_VDQF_DF): Macro
>     added.
>     * config/aarch64/aarch64-simd-builtins.def (frintn): Use added
>     macro.
>     * config/aarch64/aarch64-simd.md (<frint_pattern>): Comment
>     corrected.
>     * config/aarch64/aarch64.md (<frint_pattern>): Likewise.
>     * config/aarch64/arm_neon.h (vrnd_f64): Added.
>     (vrnda_f64): Likewise.
>     (vrndi_f64): Likewise.
>     (vrndm_f64): Likewise.
>     (vrndn_f64): Likewise.
>     (vrndp_f64): Likewise.
>     (vrndx_f64): Likewise.
>
> gcc/testsuite/
>
> 2014-02-21  Alex Velenko  <Alex.Velenko@arm.com>
>
>     gcc.target/aarch64/vrnd_f64_1.c : New testcase.
>
>

Ok for stage-1

/Marcus

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2014-02-26 16:00 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-02-13 11:17 [PATCH][AArch64] vrnd<*>_f64 patch for stage-1 Alex Velenko
2014-02-13 17:43 ` Richard Henderson
2014-02-21 12:45   ` Alex Velenko
2014-02-26 16:00     ` Marcus Shawcroft

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).