public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* [PATCH] AArch64: Remove -0.0 check from vector sin
@ 2023-09-21 16:34 Wilco Dijkstra
  2023-09-25 14:42 ` Szabolcs Nagy
  0 siblings, 1 reply; 2+ messages in thread
From: Wilco Dijkstra @ 2023-09-21 16:34 UTC (permalink / raw)
  To: 'GNU C Library'; +Cc: Szabolcs Nagy


Remove the unnecessary extra checks for sin (-0.0) from vector sin/sinf, improving performance.

Passes regress, OK for commit?

---

diff --git a/sysdeps/aarch64/fpu/sin_advsimd.c b/sysdeps/aarch64/fpu/sin_advsimd.c
index ddc41425990f3d3d5fa422f09271e62d2958a094..0389b334cc79fadad495a35a1acb3122860cdbcd 100644
--- a/sysdeps/aarch64/fpu/sin_advsimd.c
+++ b/sysdeps/aarch64/fpu/sin_advsimd.c
@@ -56,7 +56,7 @@ float64x2_t VPCS_ATTR V_NAME_D1 (sin) (float64x2_t x)
 {
   const struct data *d = ptr_barrier (&data);
   float64x2_t n, r, r2, r3, r4, y, t1, t2, t3;
-  uint64x2_t odd, cmp, eqz;
+  uint64x2_t odd, cmp;
 
 #if WANT_SIMD_EXCEPT
   /* Detect |x| <= TinyBound or |x| >= RangeVal. If fenv exceptions are to be
@@ -70,7 +70,6 @@ float64x2_t VPCS_ATTR V_NAME_D1 (sin) (float64x2_t x)
   cmp = vcageq_f64 (d->range_val, x);
   cmp = vceqzq_u64 (cmp); /* cmp = ~cmp.  */
 #endif
-  eqz = vceqzq_f64 (x);
 
   /* n = rint(|x|/pi).  */
   n = vfmaq_f64 (d->shift, d->inv_pi, r);
@@ -96,10 +95,6 @@ float64x2_t VPCS_ATTR V_NAME_D1 (sin) (float64x2_t x)
   y = vfmaq_f64 (t3, y, r4);
   y = vfmaq_f64 (r, y, r3);
 
-  /* Sign of 0 is discarded by polynomial, so copy it back here.  */
-  if (__glibc_unlikely (v_any_u64 (eqz)))
-    y = vbslq_f64 (eqz, x, y);
-
   if (__glibc_unlikely (v_any_u64 (cmp)))
     return special_case (x, y, odd, cmp);
   return vreinterpretq_f64_u64 (veorq_u64 (vreinterpretq_u64_f64 (y), odd));
diff --git a/sysdeps/aarch64/fpu/sinf_advsimd.c b/sysdeps/aarch64/fpu/sinf_advsimd.c
index b67d37f2fde745f717d6eb85a9ef7f20cf5cb271..0e78cf55f03cc2836de49ed8aba50befacdc10cb 100644
--- a/sysdeps/aarch64/fpu/sinf_advsimd.c
+++ b/sysdeps/aarch64/fpu/sinf_advsimd.c
@@ -56,7 +56,7 @@ float32x4_t VPCS_ATTR V_NAME_F1 (sin) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
   float32x4_t n, r, r2, y;
-  uint32x4_t odd, cmp, eqz;
+  uint32x4_t odd, cmp;
 
 #if WANT_SIMD_EXCEPT
   uint32x4_t ir = vreinterpretq_u32_f32 (vabsq_f32 (x));
@@ -70,7 +70,6 @@ float32x4_t VPCS_ATTR V_NAME_F1 (sin) (float32x4_t x)
   cmp = vcageq_f32 (d->range_val, x);
   cmp = vceqzq_u32 (cmp); /* cmp = ~cmp.  */
 #endif
-  eqz = vceqzq_f32 (x);
 
   /* n = rint(|x|/pi) */
   n = vfmaq_f32 (d->shift, d->inv_pi, r);
@@ -89,10 +88,6 @@ float32x4_t VPCS_ATTR V_NAME_F1 (sin) (float32x4_t x)
   y = vfmaq_f32 (C (0), y, r2);
   y = vfmaq_f32 (r, vmulq_f32 (y, r2), r);
 
-  /* Sign of 0 is discarded by polynomial, so copy it back here.  */
-  if (__glibc_unlikely (v_any_u32 (eqz)))
-    y = vbslq_f32 (eqz, x, y);
-
   if (__glibc_unlikely (v_any_u32 (cmp)))
     return special_case (x, y, odd, cmp);
   return vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 (y), odd));


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] AArch64: Remove -0.0 check from vector sin
  2023-09-21 16:34 [PATCH] AArch64: Remove -0.0 check from vector sin Wilco Dijkstra
@ 2023-09-25 14:42 ` Szabolcs Nagy
  0 siblings, 0 replies; 2+ messages in thread
From: Szabolcs Nagy @ 2023-09-25 14:42 UTC (permalink / raw)
  To: Wilco Dijkstra, 'GNU C Library'

The 09/21/2023 17:34, Wilco Dijkstra wrote:
> 
> Remove the unnecessary extra checks for sin (-0.0) from vector sin/sinf, improving performance.
> 
> Passes regress, OK for commit?

OK.

Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2023-09-25 14:42 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-09-21 16:34 [PATCH] AArch64: Remove -0.0 check from vector sin Wilco Dijkstra
2023-09-25 14:42 ` Szabolcs Nagy

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).