[PATCH 1/4] Use libc_fe* macros in ldbl-128/s

public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed

* [PATCH 1/4] Use libc_fe* macros in ldbl-128/s_nearbyintl.c.
@ 2020-03-25 10:06 Stefan Liebler
  2020-03-25 10:06 ` [PATCH 2/4] Use libc_fe* macros in ldbl-128/s_fmal.c Stefan Liebler
                   ` (2 more replies)
  0 siblings, 3 replies; 14+ messages in thread
From: Stefan Liebler @ 2020-03-25 10:06 UTC (permalink / raw)
  To: libc-alpha; +Cc: Stefan Liebler

The calls to feholdexcept and fesetenv are replaced
by the libc_fe* macros as it is also done in nearbyintf
and nearbyint.
---
 sysdeps/ieee754/float128/float128_private.h | 10 ++++++++++
 sysdeps/ieee754/ldbl-128/s_nearbyintl.c     |  9 +++++----
 sysdeps/x86/fpu/fenv_private.h              |  4 ++++
 3 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/sysdeps/ieee754/float128/float128_private.h b/sysdeps/ieee754/float128/float128_private.h
index f97463d9dc..af1ed8f3c0 100644
--- a/sysdeps/ieee754/float128/float128_private.h
+++ b/sysdeps/ieee754/float128/float128_private.h
@@ -66,6 +66,16 @@
 # define libc_feupdateenv_testl(ENV, EX) libc_feupdateenv_testf128 (ENV, EX)
 #endif
 
+#ifdef libc_feholdexceptf128
+# undef libc_feholdexceptl
+# define libc_feholdexceptl(ENV) libc_feholdexceptf128 (ENV)
+#endif
+
+#ifdef libc_fesetenvf128
+# undef libc_fesetenvl
+# define libc_fesetenvl(ENV) libc_fesetenvf128 (ENV)
+#endif
+
 /* misc macros from the header below.  */
 #include <fix-fp-int-convert-overflow.h>
 #undef FIX_LDBL_LONG_CONVERT_OVERFLOW
diff --git a/sysdeps/ieee754/ldbl-128/s_nearbyintl.c b/sysdeps/ieee754/ldbl-128/s_nearbyintl.c
index 8d26786f78..c0bc6fbd6d 100644
--- a/sysdeps/ieee754/ldbl-128/s_nearbyintl.c
+++ b/sysdeps/ieee754/ldbl-128/s_nearbyintl.c
@@ -27,6 +27,7 @@
 #include <math.h>
 #include <math-barriers.h>
 #include <math_private.h>
+#include <fenv_private.h>
 #include <libm-alias-ldouble.h>
 #include <math-use-builtins.h>
 
@@ -53,11 +54,11 @@ __nearbyintl (_Float128 x)
     {
       if (j0 < 0)
 	{
-	  feholdexcept (&env);
+	  libc_feholdexceptl (&env);
 	  w = TWO112[sx] + math_opt_barrier (x);
 	  t = w - TWO112[sx];
 	  math_force_eval (t);
-	  fesetenv (&env);
+	  libc_fesetenvl (&env);
 	  GET_LDOUBLE_MSW64 (i0, t);
 	  SET_LDOUBLE_MSW64 (t, (i0 & 0x7fffffffffffffffLL) | (sx << 63));
 	  return t;
@@ -70,11 +71,11 @@ __nearbyintl (_Float128 x)
       else
 	return x;		/* x is integral  */
     }
-  feholdexcept (&env);
+  libc_feholdexceptl (&env);
   w = TWO112[sx] + math_opt_barrier (x);
   t = w - TWO112[sx];
   math_force_eval (t);
-  fesetenv (&env);
+  libc_fesetenvl (&env);
   return t;
 #endif /* ! USE_NEARBYINTL_BUILTIN  */
 }
diff --git a/sysdeps/x86/fpu/fenv_private.h b/sysdeps/x86/fpu/fenv_private.h
index 4b081e015b..23a430362a 100644
--- a/sysdeps/x86/fpu/fenv_private.h
+++ b/sysdeps/x86/fpu/fenv_private.h
@@ -300,12 +300,16 @@ libc_feresetround_387 (fenv_t *e)
   SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround_sse, libc_feresetround_sse)
 # define libc_feholdexcept_setroundf128	libc_feholdexcept_setround_sse
 # define libc_feupdateenv_testf128	libc_feupdateenv_test_sse
+# define libc_feholdexceptf128	libc_feholdexcept_sse
+# define libc_fesetenvf128	libc_fesetenv_sse
 #else
 /* The 387 rounding mode is used by soft-fp for 32-bit, but whether
    387 or SSE exceptions are used depends on whether libgcc was built
    for SSE math, which is not known when glibc is being built.  */
 # define libc_feholdexcept_setroundf128	default_libc_feholdexcept_setround
 # define libc_feupdateenv_testf128	default_libc_feupdateenv_test
+# define libc_feholdexceptf128	default_libc_feholdexcept
+# define libc_fesetenvf128	default_libc_fesetenv
 #endif
 
 /* We have support for rounding mode context.  */
-- 
2.23.0


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 2/4] Use libc_fe* macros in ldbl-128/s_fmal.c.
  2020-03-25 10:06 [PATCH 1/4] Use libc_fe* macros in ldbl-128/s_nearbyintl.c Stefan Liebler
@ 2020-03-25 10:06 ` Stefan Liebler
  2020-03-25 10:06 ` [PATCH 3/4] Use libc_fe* macros in ldbl-128/e_expl.c Stefan Liebler
  2020-03-25 10:06 ` [PATCH 4/4] Use libc_fe* macros in k_standardl.c Stefan Liebler
  2 siblings, 0 replies; 14+ messages in thread
From: Stefan Liebler @ 2020-03-25 10:06 UTC (permalink / raw)
  To: libc-alpha; +Cc: Stefan Liebler

The calls to feholdexcept, fesetround, feupdateenv, fetestexcept
are replaced by the libc_fe* macros as it is also done in dbl-64/s_fma.c.
---
 sysdeps/ieee754/float128/float128_private.h | 10 +++++++
 sysdeps/ieee754/ldbl-128/s_fmal.c           | 33 +++++++++++----------
 sysdeps/x86/fpu/fenv_private.h              |  4 +++
 3 files changed, 32 insertions(+), 15 deletions(-)

diff --git a/sysdeps/ieee754/float128/float128_private.h b/sysdeps/ieee754/float128/float128_private.h
index af1ed8f3c0..671323035d 100644
--- a/sysdeps/ieee754/float128/float128_private.h
+++ b/sysdeps/ieee754/float128/float128_private.h
@@ -76,6 +76,16 @@
 # define libc_fesetenvl(ENV) libc_fesetenvf128 (ENV)
 #endif
 
+#ifdef libc_feupdateenvf128
+# undef libc_feupdateenvl
+# define libc_feupdateenvl(ENV) libc_feupdateenvf128 (ENV)
+#endif
+
+#ifdef libc_fesetroundf128
+# undef libc_fesetroundl
+# define libc_fesetroundl(RM) libc_fesetroundf128 (RM)
+#endif
+
 /* misc macros from the header below.  */
 #include <fix-fp-int-convert-overflow.h>
 #undef FIX_LDBL_LONG_CONVERT_OVERFLOW
diff --git a/sysdeps/ieee754/ldbl-128/s_fmal.c b/sysdeps/ieee754/ldbl-128/s_fmal.c
index 7475015bce..f5791b6a8a 100644
--- a/sysdeps/ieee754/ldbl-128/s_fmal.c
+++ b/sysdeps/ieee754/ldbl-128/s_fmal.c
@@ -23,6 +23,7 @@
 #include <ieee754.h>
 #include <math-barriers.h>
 #include <math_private.h>
+#include <fenv_private.h>
 #include <libm-alias-ldouble.h>
 #include <tininess.h>
 
@@ -187,8 +188,7 @@ __fmal (_Float128 x, _Float128 y, _Float128 z)
     }
 
   fenv_t env;
-  feholdexcept (&env);
-  fesetround (FE_TONEAREST);
+  libc_feholdexcept_setroundl (&env, FE_TONEAREST);
 
   /* Multiplication m1 + m2 = x * y using Dekker's algorithm.  */
 #define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1)
@@ -216,41 +216,44 @@ __fmal (_Float128 x, _Float128 y, _Float128 z)
   /* If the result is an exact zero, ensure it has the correct sign.  */
   if (a1 == 0 && m2 == 0)
     {
-      feupdateenv (&env);
+      libc_feupdateenvl (&env);
       /* Ensure that round-to-nearest value of z + m1 is not reused.  */
       z = math_opt_barrier (z);
       return z + m1;
     }
 
-  fesetround (FE_TOWARDZERO);
+  libc_fesetroundl (FE_TOWARDZERO);
   /* Perform m2 + a2 addition with round to odd.  */
   u.d = a2 + m2;
 
+  if (__glibc_unlikely (adjust < 0))
+    {
+      if ((u.ieee.mantissa3 & 1) == 0)
+	u.ieee.mantissa3 |= libc_fetestexceptl (FE_INEXACT) != 0;
+      v.d = a1 + u.d;
+      /* Ensure the addition is not scheduled after fetestexcept call.  */
+      math_force_eval (v.d);
+    }
+
+  /* Reset rounding mode and test for inexact simultaneously.  */
+  int j = libc_feupdateenv_testl (&env, FE_INEXACT) != 0;
+
   if (__glibc_likely (adjust == 0))
     {
       if ((u.ieee.mantissa3 & 1) == 0 && u.ieee.exponent != 0x7fff)
-	u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0;
-      feupdateenv (&env);
+	u.ieee.mantissa3 |= j;
       /* Result is a1 + u.d.  */
       return a1 + u.d;
     }
   else if (__glibc_likely (adjust > 0))
     {
       if ((u.ieee.mantissa3 & 1) == 0 && u.ieee.exponent != 0x7fff)
-	u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0;
-      feupdateenv (&env);
+	u.ieee.mantissa3 |= j;
       /* Result is a1 + u.d, scaled up.  */
       return (a1 + u.d) * L(0x1p113);
     }
   else
     {
-      if ((u.ieee.mantissa3 & 1) == 0)
-	u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0;
-      v.d = a1 + u.d;
-      /* Ensure the addition is not scheduled after fetestexcept call.  */
-      math_force_eval (v.d);
-      int j = fetestexcept (FE_INEXACT) != 0;
-      feupdateenv (&env);
       /* Ensure the following computations are performed in default rounding
 	 mode instead of just reusing the round to zero computation.  */
       asm volatile ("" : "=m" (u) : "m" (u));
diff --git a/sysdeps/x86/fpu/fenv_private.h b/sysdeps/x86/fpu/fenv_private.h
index 23a430362a..8453aaa270 100644
--- a/sysdeps/x86/fpu/fenv_private.h
+++ b/sysdeps/x86/fpu/fenv_private.h
@@ -302,6 +302,8 @@ libc_feresetround_387 (fenv_t *e)
 # define libc_feupdateenv_testf128	libc_feupdateenv_test_sse
 # define libc_feholdexceptf128	libc_feholdexcept_sse
 # define libc_fesetenvf128	libc_fesetenv_sse
+# define libc_feupdateenvf128	libc_feupdateenv_sse
+# define libc_fesetroundf128	libc_fesetround_sse
 #else
 /* The 387 rounding mode is used by soft-fp for 32-bit, but whether
    387 or SSE exceptions are used depends on whether libgcc was built
@@ -310,6 +312,8 @@ libc_feresetround_387 (fenv_t *e)
 # define libc_feupdateenv_testf128	default_libc_feupdateenv_test
 # define libc_feholdexceptf128	default_libc_feholdexcept
 # define libc_fesetenvf128	default_libc_fesetenv
+# define libc_feupdateenvf128	default_libc_feupdateenv
+# define libc_fesetroundf128	default_libc_fesetround
 #endif
 
 /* We have support for rounding mode context.  */
-- 
2.23.0


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 3/4] Use libc_fe* macros in ldbl-128/e_expl.c.
  2020-03-25 10:06 [PATCH 1/4] Use libc_fe* macros in ldbl-128/s_nearbyintl.c Stefan Liebler
  2020-03-25 10:06 ` [PATCH 2/4] Use libc_fe* macros in ldbl-128/s_fmal.c Stefan Liebler
@ 2020-03-25 10:06 ` Stefan Liebler
  2020-03-25 10:13   ` Stefan Liebler
  2020-03-25 10:06 ` [PATCH 4/4] Use libc_fe* macros in k_standardl.c Stefan Liebler
  2 siblings, 1 reply; 14+ messages in thread
From: Stefan Liebler @ 2020-03-25 10:06 UTC (permalink / raw)
  To: libc-alpha; +Cc: Stefan Liebler

The calls to feholdexcept, fesetround and fesetenv are replaced
by the libc_fe* macros.
---
 sysdeps/ieee754/ldbl-128/e_expl.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/sysdeps/ieee754/ldbl-128/e_expl.c b/sysdeps/ieee754/ldbl-128/e_expl.c
index 37c1538c08..104ace1690 100644
--- a/sysdeps/ieee754/ldbl-128/e_expl.c
+++ b/sysdeps/ieee754/ldbl-128/e_expl.c
@@ -66,6 +66,7 @@
 #include <inttypes.h>
 #include <math-barriers.h>
 #include <math_private.h>
+#include <fenv_private.h>
 #include <math-underflow.h>
 #include <stdlib.h>
 #include "t_expl.h"
@@ -146,9 +147,10 @@ __ieee754_expl (_Float128 x)
       union ieee854_long_double ex2_u, scale_u;
       fenv_t oldenv;
 
-      feholdexcept (&oldenv);
 #ifdef FE_TONEAREST
-      fesetround (FE_TONEAREST);
+      libc_feholdexcept_setroundl (&oldenv, FE_TONEAREST);
+#else
+      libc_feholdexceptl (&oldenv);
 #endif
 
       /* Calculate n.  */
@@ -198,7 +200,7 @@ __ieee754_expl (_Float128 x)
       math_force_eval (x22);
 
       /* Return result.  */
-      fesetenv (&oldenv);
+      libc_fesetenvl (&oldenv);
 
       result = x22 * ex2_u.d + ex2_u.d;
 
-- 
2.23.0


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/4] Use libc_fe* macros in ldbl-128/e_expl.c.
  2020-03-25 10:06 ` [PATCH 3/4] Use libc_fe* macros in ldbl-128/e_expl.c Stefan Liebler
@ 2020-03-25 10:13   ` Stefan Liebler
  2020-03-25 15:00     ` Adhemerval Zanella
  0 siblings, 1 reply; 14+ messages in thread
From: Stefan Liebler @ 2020-03-25 10:13 UTC (permalink / raw)
  To: GNU C Library; +Cc: Stefan Liebler

Unfortunately, this patch is responsible for testfails on x86_64:

math/test-float128-exp.out:
Failure: exp (-0x1p-10000): Exception "Underflow" set
Failure: exp (-0x2p-16384): Exception "Underflow" set
...

math/test-float128-cexp.out:
Failure: Real part of: cexp (0x2p-16384 - 0x4p-1076 i): Exception 
"Underflow" set
Failure: Real part of: cexp (0x2p-16384 - 0x8p-152 i): Exception 
"Underflow" set
...

I've stepped through "expf128 (0x1p-10000)" in 
sysdeps/ieee754/float128/../ldbl-128/e_expl.c:
151: libc_feholdexcept_setroundl (&oldenv, FE_TONEAREST);    // before 
this patch: feholdexcept (&oldenv); fesetround (FE_TONEAREST);
199: x22 = x + x*x*(P1+x*(P2+x*(P3+x*(P4+x*(P5+x*P6)))));
203: libc_fesetenvl (&oldenv);    // before this patch: fesetenv (&oldenv);

During the evaluation of x22 the underflow exception occures while:
<__ieee754_expf128+920>  callq  0x7ffff7f2a7c0 <__multf3>
which calls __sfp_handle_exceptions():
ae8be:       40 f6 c7 10             test   $0x10,%dil
ae8c2:       74 0f                   je     ae8d3 
<__sfp_handle_exceptions+0x73>
ae8c4:       d9 74 24 d8             fnstenv -0x28(%rsp)
ae8c8:       66 83 4c 24 dc 10       orw    $0x10,-0x24(%rsp)
ae8ce:       d9 64 24 d8             fldenv -0x28(%rsp)
ae8d2:       9b                      fwait

According to sysdeps/x86/fpu/fenv_private.h:
#ifdef __x86_64__
# define libc_feholdexcept_setroundf128 libc_feholdexcept_setround_sse
# define libc_fesetenvf128	libc_fesetenv_sse
#else
# define libc_feholdexcept_setroundf128	default_libc_feholdexcept_setround
# define libc_fesetenvf128	default_libc_fesetenv
#endif

// On my machine:
# define STMXCSR "stmxcsr"
# define LDMXCSR "ldmxcsr"

static __always_inline void
libc_feholdexcept_setround_sse (fenv_t *e, int r)
{
   unsigned int mxcsr;
   asm (STMXCSR " %0" : "=m" (*&mxcsr));
   e->__mxcsr = mxcsr;
   mxcsr = ((mxcsr | 0x1f80) & ~0x603f) | (r << 3);
   asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));
}

Whereas the feholdexcept() function is using in
./sysdeps/x86_64/fpu/feholdexcpt.c:
int
__feholdexcept (fenv_t *envp)
{
   unsigned int mxcsr;

   /* Store the environment.  Recall that fnstenv has a side effect of
      masking all exceptions.  Then clear all exceptions.  */
   __asm__ ("fnstenv %0\n\t"
	   "stmxcsr %1\n\t"
	   "fnclex"
	   : "=m" (*envp), "=m" (envp->__mxcsr));

   /* Set the SSE MXCSR register.  */
   mxcsr = (envp->__mxcsr | 0x1f80) & ~0x3f;
   __asm__ ("ldmxcsr %0" : : "m" (*&mxcsr));

   return 0;
}

I assume that the underflow exception keeps active as the pair of 
fnstenv / fldenv is missing if libc_feholdexcept_setroundf128 / 
libc_fesetenvf128 is used instead of feholdexcept, fesetround and fesetenv.
As I'm not familiar with float128 on x86_64, can anybody please help?

Bye,
Stefan

On 3/25/20 11:06 AM, Stefan Liebler wrote:
> The calls to feholdexcept, fesetround and fesetenv are replaced
> by the libc_fe* macros.
> ---
>   sysdeps/ieee754/ldbl-128/e_expl.c | 8 +++++---
>   1 file changed, 5 insertions(+), 3 deletions(-)
> 
> diff --git a/sysdeps/ieee754/ldbl-128/e_expl.c b/sysdeps/ieee754/ldbl-128/e_expl.c
> index 37c1538c08..104ace1690 100644
> --- a/sysdeps/ieee754/ldbl-128/e_expl.c
> +++ b/sysdeps/ieee754/ldbl-128/e_expl.c
> @@ -66,6 +66,7 @@
>   #include <inttypes.h>
>   #include <math-barriers.h>
>   #include <math_private.h>
> +#include <fenv_private.h>
>   #include <math-underflow.h>
>   #include <stdlib.h>
>   #include "t_expl.h"
> @@ -146,9 +147,10 @@ __ieee754_expl (_Float128 x)
>         union ieee854_long_double ex2_u, scale_u;
>         fenv_t oldenv;
>   
> -      feholdexcept (&oldenv);
>   #ifdef FE_TONEAREST
> -      fesetround (FE_TONEAREST);
> +      libc_feholdexcept_setroundl (&oldenv, FE_TONEAREST);
> +#else
> +      libc_feholdexceptl (&oldenv);
>   #endif
>   
>         /* Calculate n.  */
> @@ -198,7 +200,7 @@ __ieee754_expl (_Float128 x)
>         math_force_eval (x22);
>   
>         /* Return result.  */
> -      fesetenv (&oldenv);
> +      libc_fesetenvl (&oldenv);
>   
>         result = x22 * ex2_u.d + ex2_u.d;
>   
> 


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/4] Use libc_fe* macros in ldbl-128/e_expl.c.
  2020-03-25 10:13   ` Stefan Liebler
@ 2020-03-25 15:00     ` Adhemerval Zanella
  2020-03-25 15:07       ` Adhemerval Zanella
  0 siblings, 1 reply; 14+ messages in thread
From: Adhemerval Zanella @ 2020-03-25 15:00 UTC (permalink / raw)
  To: libc-alpha



On 25/03/2020 07:13, Stefan Liebler via Libc-alpha wrote:
> Unfortunately, this patch is responsible for testfails on x86_64:
> 
> math/test-float128-exp.out:
> Failure: exp (-0x1p-10000): Exception "Underflow" set
> Failure: exp (-0x2p-16384): Exception "Underflow" set
> ...
> 
> math/test-float128-cexp.out:
> Failure: Real part of: cexp (0x2p-16384 - 0x4p-1076 i): Exception "Underflow" set
> Failure: Real part of: cexp (0x2p-16384 - 0x8p-152 i): Exception "Underflow" set

The sysdeps/x86/fpu/fenv_private.h states:

296 #ifdef __x86_64__
297 /* The SSE rounding mode is used by soft-fp (libgcc and glibc) on
298    x86_64, so that must be set for float128 computations.  */
299 # define SET_RESTORE_ROUNDF128(RM) \
300   SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround_sse, libc_feresetround_sse)

So 

>> diff --git a/sysdeps/ieee754/ldbl-128/e_expl.c b/sysdeps/ieee754/ldbl-128/e_expl.c
>> index 37c1538c08..104ace1690 100644
>> --- a/sysdeps/ieee754/ldbl-128/e_expl.c
>> +++ b/sysdeps/ieee754/ldbl-128/e_expl.c
>> @@ -66,6 +66,7 @@
>>   #include <inttypes.h>
>>   #include <math-barriers.h>
>>   #include <math_private.h>
>> +#include <fenv_private.h>
>>   #include <math-underflow.h>
>>   #include <stdlib.h>
>>   #include "t_expl.h"
>> @@ -146,9 +147,10 @@ __ieee754_expl (_Float128 x)
>>         union ieee854_long_double ex2_u, scale_u;
>>         fenv_t oldenv;
>>   -      feholdexcept (&oldenv);
>>   #ifdef FE_TONEAREST
>> -      fesetround (FE_TONEAREST);
>> +      libc_feholdexcept_setroundl (&oldenv, FE_TONEAREST);

Should be libc_feholdexcept_setroundf128.

>> +#else
>> +      libc_feholdexceptl (&oldenv);

And here libc_fesetenvf128.

>>   #endif
>>           /* Calculate n.  */
>> @@ -198,7 +200,7 @@ __ieee754_expl (_Float128 x)
>>         math_force_eval (x22);
>>           /* Return result.  */
>> -      fesetenv (&oldenv);
>> +      libc_fesetenvl (&oldenv);
>>           result = x22 * ex2_u.d + ex2_u.d;

It might require extend the libc_*f128 macros to other architectures
(not sure).

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/4] Use libc_fe* macros in ldbl-128/e_expl.c.
  2020-03-25 15:00     ` Adhemerval Zanella
@ 2020-03-25 15:07       ` Adhemerval Zanella
  2020-03-25 15:42         ` Adhemerval Zanella
  2020-03-26  9:08         ` Stefan Liebler
  0 siblings, 2 replies; 14+ messages in thread
From: Adhemerval Zanella @ 2020-03-25 15:07 UTC (permalink / raw)
  To: libc-alpha



On 25/03/2020 12:00, Adhemerval Zanella wrote:
> 
> 
> On 25/03/2020 07:13, Stefan Liebler via Libc-alpha wrote:
>> Unfortunately, this patch is responsible for testfails on x86_64:
>>
>> math/test-float128-exp.out:
>> Failure: exp (-0x1p-10000): Exception "Underflow" set
>> Failure: exp (-0x2p-16384): Exception "Underflow" set
>> ...
>>
>> math/test-float128-cexp.out:
>> Failure: Real part of: cexp (0x2p-16384 - 0x4p-1076 i): Exception "Underflow" set
>> Failure: Real part of: cexp (0x2p-16384 - 0x8p-152 i): Exception "Underflow" set
> 
> The sysdeps/x86/fpu/fenv_private.h states:
> 
> 296 #ifdef __x86_64__
> 297 /* The SSE rounding mode is used by soft-fp (libgcc and glibc) on
> 298    x86_64, so that must be set for float128 computations.  */
> 299 # define SET_RESTORE_ROUNDF128(RM) \
> 300   SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround_sse, libc_feresetround_sse)
> 
> So 
> 
>>> diff --git a/sysdeps/ieee754/ldbl-128/e_expl.c b/sysdeps/ieee754/ldbl-128/e_expl.c
>>> index 37c1538c08..104ace1690 100644
>>> --- a/sysdeps/ieee754/ldbl-128/e_expl.c
>>> +++ b/sysdeps/ieee754/ldbl-128/e_expl.c
>>> @@ -66,6 +66,7 @@
>>>   #include <inttypes.h>
>>>   #include <math-barriers.h>
>>>   #include <math_private.h>
>>> +#include <fenv_private.h>
>>>   #include <math-underflow.h>
>>>   #include <stdlib.h>
>>>   #include "t_expl.h"
>>> @@ -146,9 +147,10 @@ __ieee754_expl (_Float128 x)
>>>         union ieee854_long_double ex2_u, scale_u;
>>>         fenv_t oldenv;
>>>   -      feholdexcept (&oldenv);
>>>   #ifdef FE_TONEAREST
>>> -      fesetround (FE_TONEAREST);
>>> +      libc_feholdexcept_setroundl (&oldenv, FE_TONEAREST);
> 
> Should be libc_feholdexcept_setroundf128.

But it does not see to help here, so I don't know what is failing as well.

> 
>>> +#else
>>> +      libc_feholdexceptl (&oldenv);
> 
> And here libc_fesetenvf128.
> 
>>>   #endif
>>>           /* Calculate n.  */
>>> @@ -198,7 +200,7 @@ __ieee754_expl (_Float128 x)
>>>         math_force_eval (x22);
>>>           /* Return result.  */
>>> -      fesetenv (&oldenv);
>>> +      libc_fesetenvl (&oldenv);
>>>           result = x22 * ex2_u.d + ex2_u.d;
> 
> It might require extend the libc_*f128 macros to other architectures
> (not sure).
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/4] Use libc_fe* macros in ldbl-128/e_expl.c.
  2020-03-25 15:07       ` Adhemerval Zanella
@ 2020-03-25 15:42         ` Adhemerval Zanella
  2020-03-26  9:08           ` Stefan Liebler
  2020-03-26  9:08         ` Stefan Liebler
  1 sibling, 1 reply; 14+ messages in thread
From: Adhemerval Zanella @ 2020-03-25 15:42 UTC (permalink / raw)
  To: libc-alpha



On 25/03/2020 12:07, Adhemerval Zanella wrote:
> 
> 
> On 25/03/2020 12:00, Adhemerval Zanella wrote:
>>
>>
>> On 25/03/2020 07:13, Stefan Liebler via Libc-alpha wrote:
>>> Unfortunately, this patch is responsible for testfails on x86_64:
>>>
>>> math/test-float128-exp.out:
>>> Failure: exp (-0x1p-10000): Exception "Underflow" set
>>> Failure: exp (-0x2p-16384): Exception "Underflow" set
>>> ...
>>>
>>> math/test-float128-cexp.out:
>>> Failure: Real part of: cexp (0x2p-16384 - 0x4p-1076 i): Exception "Underflow" set
>>> Failure: Real part of: cexp (0x2p-16384 - 0x8p-152 i): Exception "Underflow" set
>>
>> The sysdeps/x86/fpu/fenv_private.h states:
>>
>> 296 #ifdef __x86_64__
>> 297 /* The SSE rounding mode is used by soft-fp (libgcc and glibc) on
>> 298    x86_64, so that must be set for float128 computations.  */
>> 299 # define SET_RESTORE_ROUNDF128(RM) \
>> 300   SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround_sse, libc_feresetround_sse)
>>
>> So 
>>
>>>> diff --git a/sysdeps/ieee754/ldbl-128/e_expl.c b/sysdeps/ieee754/ldbl-128/e_expl.c
>>>> index 37c1538c08..104ace1690 100644
>>>> --- a/sysdeps/ieee754/ldbl-128/e_expl.c
>>>> +++ b/sysdeps/ieee754/ldbl-128/e_expl.c
>>>> @@ -66,6 +66,7 @@
>>>>   #include <inttypes.h>
>>>>   #include <math-barriers.h>
>>>>   #include <math_private.h>
>>>> +#include <fenv_private.h>
>>>>   #include <math-underflow.h>
>>>>   #include <stdlib.h>
>>>>   #include "t_expl.h"
>>>> @@ -146,9 +147,10 @@ __ieee754_expl (_Float128 x)
>>>>         union ieee854_long_double ex2_u, scale_u;
>>>>         fenv_t oldenv;
>>>>   -      feholdexcept (&oldenv);
>>>>   #ifdef FE_TONEAREST
>>>> -      fesetround (FE_TONEAREST);
>>>> +      libc_feholdexcept_setroundl (&oldenv, FE_TONEAREST);
>>
>> Should be libc_feholdexcept_setroundf128.
> 
> But it does not see to help here, so I don't know what is failing as well.

Ok, so what is happening __sfp_handle_exceptions always use 387 exception
mode for FP_EX_OVERFLOW and FP_EX_UNDERFLOW:

config/i386/sfp-exceptions.c

 79   if (_fex & FP_EX_OVERFLOW)
 80     {
 81       struct fenv temp;
 82       asm volatile ("fnstenv\t%0" : "=m" (temp));
 83       temp.__status_word |= FP_EX_OVERFLOW;
 84       asm volatile ("fldenv\t%0" : : "m" (temp));
 85       asm volatile ("fwait");
 86     }
 87   if (_fex & FP_EX_UNDERFLOW)
 88     {
 89       struct fenv temp;
 90       asm volatile ("fnstenv\t%0" : "=m" (temp));
 91       temp.__status_word |= FP_EX_UNDERFLOW;
 92       asm volatile ("fldenv\t%0" : : "m" (temp));
 93       asm volatile ("fwait");
 94     }

Different that FP_EX_INEXACT, for instance, where __SSE_MATH__ sets
whether SSE is used or not.

So I think it is not safe to use the SSE variants for libc_*_testf128,
as for i387 we should use the default_* instead.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/4] Use libc_fe* macros in ldbl-128/e_expl.c.
  2020-03-25 15:42         ` Adhemerval Zanella
@ 2020-03-26  9:08           ` Stefan Liebler
  2020-03-26 14:53             ` Adhemerval Zanella
  0 siblings, 1 reply; 14+ messages in thread
From: Stefan Liebler @ 2020-03-26  9:08 UTC (permalink / raw)
  To: libc-alpha

On 3/25/20 4:42 PM, Adhemerval Zanella via Libc-alpha wrote:
> 
> 
> On 25/03/2020 12:07, Adhemerval Zanella wrote:
>>
>>
>> On 25/03/2020 12:00, Adhemerval Zanella wrote:
>>>
>>>
>>> On 25/03/2020 07:13, Stefan Liebler via Libc-alpha wrote:
>>>> Unfortunately, this patch is responsible for testfails on x86_64:
>>>>
>>>> math/test-float128-exp.out:
>>>> Failure: exp (-0x1p-10000): Exception "Underflow" set
>>>> Failure: exp (-0x2p-16384): Exception "Underflow" set
>>>> ...
>>>>
>>>> math/test-float128-cexp.out:
>>>> Failure: Real part of: cexp (0x2p-16384 - 0x4p-1076 i): Exception "Underflow" set
>>>> Failure: Real part of: cexp (0x2p-16384 - 0x8p-152 i): Exception "Underflow" set
>>>
>>> The sysdeps/x86/fpu/fenv_private.h states:
>>>
>>> 296 #ifdef __x86_64__
>>> 297 /* The SSE rounding mode is used by soft-fp (libgcc and glibc) on
>>> 298    x86_64, so that must be set for float128 computations.  */
>>> 299 # define SET_RESTORE_ROUNDF128(RM) \
>>> 300   SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround_sse, libc_feresetround_sse)
>>>
>>> So
>>>
>>>>> diff --git a/sysdeps/ieee754/ldbl-128/e_expl.c b/sysdeps/ieee754/ldbl-128/e_expl.c
>>>>> index 37c1538c08..104ace1690 100644
>>>>> --- a/sysdeps/ieee754/ldbl-128/e_expl.c
>>>>> +++ b/sysdeps/ieee754/ldbl-128/e_expl.c
>>>>> @@ -66,6 +66,7 @@
>>>>>    #include <inttypes.h>
>>>>>    #include <math-barriers.h>
>>>>>    #include <math_private.h>
>>>>> +#include <fenv_private.h>
>>>>>    #include <math-underflow.h>
>>>>>    #include <stdlib.h>
>>>>>    #include "t_expl.h"
>>>>> @@ -146,9 +147,10 @@ __ieee754_expl (_Float128 x)
>>>>>          union ieee854_long_double ex2_u, scale_u;
>>>>>          fenv_t oldenv;
>>>>>    -      feholdexcept (&oldenv);
>>>>>    #ifdef FE_TONEAREST
>>>>> -      fesetround (FE_TONEAREST);
>>>>> +      libc_feholdexcept_setroundl (&oldenv, FE_TONEAREST);
>>>
>>> Should be libc_feholdexcept_setroundf128.
>>
>> But it does not see to help here, so I don't know what is failing as well.
> 
> Ok, so what is happening __sfp_handle_exceptions always use 387 exception
> mode for FP_EX_OVERFLOW and FP_EX_UNDERFLOW:
> 
> config/i386/sfp-exceptions.c
> 
>   79   if (_fex & FP_EX_OVERFLOW)
>   80     {
>   81       struct fenv temp;
>   82       asm volatile ("fnstenv\t%0" : "=m" (temp));
>   83       temp.__status_word |= FP_EX_OVERFLOW;
>   84       asm volatile ("fldenv\t%0" : : "m" (temp));
>   85       asm volatile ("fwait");
>   86     }
>   87   if (_fex & FP_EX_UNDERFLOW)
>   88     {
>   89       struct fenv temp;
>   90       asm volatile ("fnstenv\t%0" : "=m" (temp));
>   91       temp.__status_word |= FP_EX_UNDERFLOW;
>   92       asm volatile ("fldenv\t%0" : : "m" (temp));
>   93       asm volatile ("fwait");
>   94     }
> Yes this looks like the mentioned disassembly.
> Different that FP_EX_INEXACT, for instance, where __SSE_MATH__ sets
> whether SSE is used or not.
> 
> So I think it is not safe to use the SSE variants for libc_*_testf128,
> as for i387 we should use the default_* instead.
> 
I've just switched to default_* in sysdeps/x86/fpu/fenv_private.h:
-#ifdef __x86_64__
+#if 0
  /* The SSE rounding mode is used by soft-fp (libgcc and glibc) on
     x86_64, so that must be set for float128 computations.  */
  # define SET_RESTORE_ROUNDF128(RM) \

But now there are 7 testfails. For some of them, the max.ulp goes up 
(over 10), but there are even worse cases. Here are shortened excerpts 
of the out files:
- math/test-float128-clog.out:
Failure: Test: Real part of: clog_towardzero 
(0x2.82b795e420b281a934c6dd315cb2p-4 + 
0xf.cd42a15bf9a361243a89663e81e8p-4 i)
  ulp       :  162259276829213363391578010288127.0000
  max.ulp   :  3.0000
Failure: Test: Real part of: clog_upward 
(0x2.82b795e420b281a934c6dd315cb2p-4 + 
0xf.cd42a15bf9a361243a89663e81e8p-4 i)
  ulp       :  162259276829213363391578010288128.0000
  max.ulp   :  4.0000

- math/test-float128-clog10.out:
Failure: Test: Real part of: clog10_downward (0x3.bea2bd62e35p-4 + 
0xf.8e3d619a8d11bfd30b038eep-4 i)
  ulp       :  4.0000
  max.ulp   :  3.0000
Failure: Test: Real part of: clog10_towardzero 
(0x2.82b795e420b281a934c6dd315cb2p-4 + 
0xf.cd42a15bf9a361243a89663e81e8p-4 i)
  ulp       :  140936617129079063283494433422698.0000
  max.ulp   :  4.0000
Failure: Test: Real part of: clog10_upward 
(0x2.82b795e420b281a934c6dd315cb2p-4 + 
0xf.cd42a15bf9a361243a89663e81e8p-4 i)
  ulp       :  140936617129079063283494433422698.0000
  max.ulp   :  4.0000

- math/test-float128-jn.out
- math/test-float128-lgamma.out
- math/test-float128-tgamma.out:
something like:
  ulp       :  12.0000
  max.ulp   :  4.0000

Failure: tgamma_upward (-0x6.ec00000000000008p+8): errno set to 0, 
expected 34 (ERANGE)

- math/test-float128-y1.out:
Failure: Test: y1_downward (0x2p+0)
  ulp       :  13.0000
  max.ulp   :  4.0000
Failure: Test: y1_towardzero (0x2p+0)
  ulp       :  6.0000
  max.ulp   :  2.0000
Failure: Test: y1_upward (0x2p+0)
  ulp       :  10.0000
  max.ulp   :  5.0000

- math/test-float128-yn.out


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/4] Use libc_fe* macros in ldbl-128/e_expl.c.
  2020-03-26  9:08           ` Stefan Liebler
@ 2020-03-26 14:53             ` Adhemerval Zanella
  2020-03-27 14:23               ` Stefan Liebler
  0 siblings, 1 reply; 14+ messages in thread
From: Adhemerval Zanella @ 2020-03-26 14:53 UTC (permalink / raw)
  To: libc-alpha, Stefan Liebler via Libc-alpha



On 26/03/2020 06:08, Stefan Liebler via Libc-alpha wrote:
> On 3/25/20 4:42 PM, Adhemerval Zanella via Libc-alpha wrote:
>>
>>
>> On 25/03/2020 12:07, Adhemerval Zanella wrote:
>>>
>>>
>>> On 25/03/2020 12:00, Adhemerval Zanella wrote:
>>>>
>>>>
>>>> On 25/03/2020 07:13, Stefan Liebler via Libc-alpha wrote:
>>>>> Unfortunately, this patch is responsible for testfails on x86_64:
>>>>>
>>>>> math/test-float128-exp.out:
>>>>> Failure: exp (-0x1p-10000): Exception "Underflow" set
>>>>> Failure: exp (-0x2p-16384): Exception "Underflow" set
>>>>> ...
>>>>>
>>>>> math/test-float128-cexp.out:
>>>>> Failure: Real part of: cexp (0x2p-16384 - 0x4p-1076 i): Exception "Underflow" set
>>>>> Failure: Real part of: cexp (0x2p-16384 - 0x8p-152 i): Exception "Underflow" set
>>>>
>>>> The sysdeps/x86/fpu/fenv_private.h states:
>>>>
>>>> 296 #ifdef __x86_64__
>>>> 297 /* The SSE rounding mode is used by soft-fp (libgcc and glibc) on
>>>> 298    x86_64, so that must be set for float128 computations.  */
>>>> 299 # define SET_RESTORE_ROUNDF128(RM) \
>>>> 300   SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround_sse, libc_feresetround_sse)
>>>>
>>>> So
>>>>
>>>>>> diff --git a/sysdeps/ieee754/ldbl-128/e_expl.c b/sysdeps/ieee754/ldbl-128/e_expl.c
>>>>>> index 37c1538c08..104ace1690 100644
>>>>>> --- a/sysdeps/ieee754/ldbl-128/e_expl.c
>>>>>> +++ b/sysdeps/ieee754/ldbl-128/e_expl.c
>>>>>> @@ -66,6 +66,7 @@
>>>>>>    #include <inttypes.h>
>>>>>>    #include <math-barriers.h>
>>>>>>    #include <math_private.h>
>>>>>> +#include <fenv_private.h>
>>>>>>    #include <math-underflow.h>
>>>>>>    #include <stdlib.h>
>>>>>>    #include "t_expl.h"
>>>>>> @@ -146,9 +147,10 @@ __ieee754_expl (_Float128 x)
>>>>>>          union ieee854_long_double ex2_u, scale_u;
>>>>>>          fenv_t oldenv;
>>>>>>    -      feholdexcept (&oldenv);
>>>>>>    #ifdef FE_TONEAREST
>>>>>> -      fesetround (FE_TONEAREST);
>>>>>> +      libc_feholdexcept_setroundl (&oldenv, FE_TONEAREST);
>>>>
>>>> Should be libc_feholdexcept_setroundf128.
>>>
>>> But it does not see to help here, so I don't know what is failing as well.
>>
>> Ok, so what is happening __sfp_handle_exceptions always use 387 exception
>> mode for FP_EX_OVERFLOW and FP_EX_UNDERFLOW:
>>
>> config/i386/sfp-exceptions.c
>>
>>   79   if (_fex & FP_EX_OVERFLOW)
>>   80     {
>>   81       struct fenv temp;
>>   82       asm volatile ("fnstenv\t%0" : "=m" (temp));
>>   83       temp.__status_word |= FP_EX_OVERFLOW;
>>   84       asm volatile ("fldenv\t%0" : : "m" (temp));
>>   85       asm volatile ("fwait");
>>   86     }
>>   87   if (_fex & FP_EX_UNDERFLOW)
>>   88     {
>>   89       struct fenv temp;
>>   90       asm volatile ("fnstenv\t%0" : "=m" (temp));
>>   91       temp.__status_word |= FP_EX_UNDERFLOW;
>>   92       asm volatile ("fldenv\t%0" : : "m" (temp));
>>   93       asm volatile ("fwait");
>>   94     }
>> Yes this looks like the mentioned disassembly.
>> Different that FP_EX_INEXACT, for instance, where __SSE_MATH__ sets
>> whether SSE is used or not.
>>
>> So I think it is not safe to use the SSE variants for libc_*_testf128,
>> as for i387 we should use the default_* instead.
>>
> I've just switched to default_* in sysdeps/x86/fpu/fenv_private.h:
> -#ifdef __x86_64__
> +#if 0
>  /* The SSE rounding mode is used by soft-fp (libgcc and glibc) on
>     x86_64, so that must be set for float128 computations.  */
>  # define SET_RESTORE_ROUNDF128(RM) \
> 
> But now there are 7 testfails. For some of them, the max.ulp goes up (over 10), but there are even worse cases. Here are shortened excerpts of the out files:
> - math/test-float128-clog.out:
> Failure: Test: Real part of: clog_towardzero (0x2.82b795e420b281a934c6dd315cb2p-4 + 0xf.cd42a15bf9a361243a89663e81e8p-4 i)
>  ulp       :  162259276829213363391578010288127.0000
>  max.ulp   :  3.0000
> Failure: Test: Real part of: clog_upward (0x2.82b795e420b281a934c6dd315cb2p-4 + 0xf.cd42a15bf9a361243a89663e81e8p-4 i)
>  ulp       :  162259276829213363391578010288128.0000
>  max.ulp   :  4.0000
> 
> - math/test-float128-clog10.out:
> Failure: Test: Real part of: clog10_downward (0x3.bea2bd62e35p-4 + 0xf.8e3d619a8d11bfd30b038eep-4 i)
>  ulp       :  4.0000
>  max.ulp   :  3.0000
> Failure: Test: Real part of: clog10_towardzero (0x2.82b795e420b281a934c6dd315cb2p-4 + 0xf.cd42a15bf9a361243a89663e81e8p-4 i)
>  ulp       :  140936617129079063283494433422698.0000
>  max.ulp   :  4.0000
> Failure: Test: Real part of: clog10_upward (0x2.82b795e420b281a934c6dd315cb2p-4 + 0xf.cd42a15bf9a361243a89663e81e8p-4 i)
>  ulp       :  140936617129079063283494433422698.0000
>  max.ulp   :  4.0000
> 
> - math/test-float128-jn.out
> - math/test-float128-lgamma.out
> - math/test-float128-tgamma.out:
> something like:
>  ulp       :  12.0000
>  max.ulp   :  4.0000
> 
> Failure: tgamma_upward (-0x6.ec00000000000008p+8): errno set to 0, expected 34 (ERANGE)
> 
> - math/test-float128-y1.out:
> Failure: Test: y1_downward (0x2p+0)
>  ulp       :  13.0000
>  max.ulp   :  4.0000
> Failure: Test: y1_towardzero (0x2p+0)
>  ulp       :  6.0000
>  max.ulp   :  2.0000
> Failure: Test: y1_upward (0x2p+0)
>  ulp       :  10.0000
>  max.ulp   :  5.0000
> 
> - math/test-float128-yn.out
> 

So it seems the issue is the mix on how libm fenv function, the internal
libc_fe*, and libgcc handles the exception register.  The exported fenv
operates on both i387 and SSE (since it should work on long double as well),
and the internal libc_fe* will set either SSE for float, double, and float128
and i387 for long double (as expected).

The libgcc, however, will set either SEE or i387 depending of the exception.
This broke the assumption of libc_fe* for float128 where either SSE or i387
will be used.

One option might be to force libgcc to not use its __sfp_handle_exceptions
on x86_64 and provide one that uses only SEE operations since libgcc does 
not use 'long double' on float128 operations.  The patch below does it
and applied on top your patches shows no regressions.

And I think we should fix libgcc in a similar manner, since checking on
config/i386/64/sfp-machine.h it only support SSE rounding mode.

--

diff --git a/sysdeps/x86/fpu/sfp-exceptions.c b/sysdeps/x86/fpu/sfp-exceptions.c
new file mode 100644
index 0000000000..676f396bc3
--- /dev/null
+++ b/sysdeps/x86/fpu/sfp-exceptions.c
@@ -0,0 +1,49 @@
+#include <fenv.h>
+#include <float.h>
+#include <math-barriers.h>
+
+#define FP_EX_INVALID           0x01
+#define FP_EX_DENORM            0x02
+#define FP_EX_DIVZERO           0x04
+#define FP_EX_OVERFLOW          0x08
+#define FP_EX_UNDERFLOW         0x10
+#define FP_EX_INEXACT           0x20
+#define FP_EX_ALL \
+        (FP_EX_INVALID | FP_EX_DENORM | FP_EX_DIVZERO | FP_EX_OVERFLOW \
+         | FP_EX_UNDERFLOW | FP_EX_INEXACT)
+
+void
+__sfp_handle_exceptions (int _fex)
+{
+  if (_fex & FP_EX_INVALID)
+    {
+      float f = 0.0f;
+      math_force_eval (f / f);
+    }
+  if (_fex & FP_EX_DENORM)
+    {
+      float f = FLT_MIN, g = 2.0f;
+      math_force_eval (f / g);
+    }
+  if (_fex & FP_EX_DIVZERO)
+    {
+      float f = 1.0f, g = 0.0f;
+      math_force_eval (f / g);
+    }
+  if (_fex & FP_EX_OVERFLOW)
+    {
+      float force_underflow = FLT_MAX * FLT_MAX;
+      math_force_eval (force_underflow);
+    }
+  if (_fex & FP_EX_UNDERFLOW)
+    {
+      float force_overflow = FLT_MIN * FLT_MIN;
+      math_force_eval (force_overflow);
+    }
+  if (_fex & FP_EX_INEXACT)
+    {
+      float f = 1.0f, g = 3.0f;
+      math_force_eval (f / g);
+    }
+}
+strong_alias (__sfp_handle_exceptions, __wrap___sfp_handle_exceptions)
diff --git a/sysdeps/x86_64/fpu/Makefile b/sysdeps/x86_64/fpu/Makefile
index a4ff2723a8..5becb96fa3 100644
--- a/sysdeps/x86_64/fpu/Makefile
+++ b/sysdeps/x86_64/fpu/Makefile
@@ -25,6 +25,9 @@ endif
 
 # Variables for libmvec tests.
 ifeq ($(subdir),math)
+libm-routines += sfp-exceptions
+LDFLAGS-m.so += -Wl,--wrap=__sfp_handle_exceptions
+
 ifeq ($(build-mathvec),yes)
 libmvec-tests += double-vlen2 double-vlen4 double-vlen4-avx2 \
 		 float-vlen4 float-vlen8 float-vlen8-avx2

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/4] Use libc_fe* macros in ldbl-128/e_expl.c.
  2020-03-26 14:53             ` Adhemerval Zanella
@ 2020-03-27 14:23               ` Stefan Liebler
  2020-03-30 18:12                 ` Adhemerval Zanella
  0 siblings, 1 reply; 14+ messages in thread
From: Stefan Liebler @ 2020-03-27 14:23 UTC (permalink / raw)
  To: libc-alpha

On 3/26/20 3:53 PM, Adhemerval Zanella via Libc-alpha wrote:
> 
> 
> On 26/03/2020 06:08, Stefan Liebler via Libc-alpha wrote:
>> On 3/25/20 4:42 PM, Adhemerval Zanella via Libc-alpha wrote:
>>>
>>>
>>> On 25/03/2020 12:07, Adhemerval Zanella wrote:
>>>>
>>>>
>>>> On 25/03/2020 12:00, Adhemerval Zanella wrote:
>>>>>
>>>>>
>>>>> On 25/03/2020 07:13, Stefan Liebler via Libc-alpha wrote:
>>>>>> Unfortunately, this patch is responsible for testfails on x86_64:
>>>>>>
>>>>>> math/test-float128-exp.out:
>>>>>> Failure: exp (-0x1p-10000): Exception "Underflow" set
>>>>>> Failure: exp (-0x2p-16384): Exception "Underflow" set
>>>>>> ...
>>>>>>
>>>>>> math/test-float128-cexp.out:
>>>>>> Failure: Real part of: cexp (0x2p-16384 - 0x4p-1076 i): Exception "Underflow" set
>>>>>> Failure: Real part of: cexp (0x2p-16384 - 0x8p-152 i): Exception "Underflow" set
>>>>>
>>>>> The sysdeps/x86/fpu/fenv_private.h states:
>>>>>
>>>>> 296 #ifdef __x86_64__
>>>>> 297 /* The SSE rounding mode is used by soft-fp (libgcc and glibc) on
>>>>> 298    x86_64, so that must be set for float128 computations.  */
>>>>> 299 # define SET_RESTORE_ROUNDF128(RM) \
>>>>> 300   SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround_sse, libc_feresetround_sse)
>>>>>
>>>>> So
>>>>>
>>>>>>> diff --git a/sysdeps/ieee754/ldbl-128/e_expl.c b/sysdeps/ieee754/ldbl-128/e_expl.c
>>>>>>> index 37c1538c08..104ace1690 100644
>>>>>>> --- a/sysdeps/ieee754/ldbl-128/e_expl.c
>>>>>>> +++ b/sysdeps/ieee754/ldbl-128/e_expl.c
>>>>>>> @@ -66,6 +66,7 @@
>>>>>>>     #include <inttypes.h>
>>>>>>>     #include <math-barriers.h>
>>>>>>>     #include <math_private.h>
>>>>>>> +#include <fenv_private.h>
>>>>>>>     #include <math-underflow.h>
>>>>>>>     #include <stdlib.h>
>>>>>>>     #include "t_expl.h"
>>>>>>> @@ -146,9 +147,10 @@ __ieee754_expl (_Float128 x)
>>>>>>>           union ieee854_long_double ex2_u, scale_u;
>>>>>>>           fenv_t oldenv;
>>>>>>>     -      feholdexcept (&oldenv);
>>>>>>>     #ifdef FE_TONEAREST
>>>>>>> -      fesetround (FE_TONEAREST);
>>>>>>> +      libc_feholdexcept_setroundl (&oldenv, FE_TONEAREST);
>>>>>
>>>>> Should be libc_feholdexcept_setroundf128.
>>>>
>>>> But it does not see to help here, so I don't know what is failing as well.
>>>
>>> Ok, so what is happening __sfp_handle_exceptions always use 387 exception
>>> mode for FP_EX_OVERFLOW and FP_EX_UNDERFLOW:
>>>
>>> config/i386/sfp-exceptions.c
>>>
>>>    79   if (_fex & FP_EX_OVERFLOW)
>>>    80     {
>>>    81       struct fenv temp;
>>>    82       asm volatile ("fnstenv\t%0" : "=m" (temp));
>>>    83       temp.__status_word |= FP_EX_OVERFLOW;
>>>    84       asm volatile ("fldenv\t%0" : : "m" (temp));
>>>    85       asm volatile ("fwait");
>>>    86     }
>>>    87   if (_fex & FP_EX_UNDERFLOW)
>>>    88     {
>>>    89       struct fenv temp;
>>>    90       asm volatile ("fnstenv\t%0" : "=m" (temp));
>>>    91       temp.__status_word |= FP_EX_UNDERFLOW;
>>>    92       asm volatile ("fldenv\t%0" : : "m" (temp));
>>>    93       asm volatile ("fwait");
>>>    94     }
>>> Yes this looks like the mentioned disassembly.
>>> Different that FP_EX_INEXACT, for instance, where __SSE_MATH__ sets
>>> whether SSE is used or not.
>>>
>>> So I think it is not safe to use the SSE variants for libc_*_testf128,
>>> as for i387 we should use the default_* instead.
>>>
>> I've just switched to default_* in sysdeps/x86/fpu/fenv_private.h:
>> -#ifdef __x86_64__
>> +#if 0
>>   /* The SSE rounding mode is used by soft-fp (libgcc and glibc) on
>>      x86_64, so that must be set for float128 computations.  */
>>   # define SET_RESTORE_ROUNDF128(RM) \
>>
>> But now there are 7 testfails. For some of them, the max.ulp goes up (over 10), but there are even worse cases. Here are shortened excerpts of the out files:
>> - math/test-float128-clog.out:
>> Failure: Test: Real part of: clog_towardzero (0x2.82b795e420b281a934c6dd315cb2p-4 + 0xf.cd42a15bf9a361243a89663e81e8p-4 i)
>>   ulp       :  162259276829213363391578010288127.0000
>>   max.ulp   :  3.0000
>> Failure: Test: Real part of: clog_upward (0x2.82b795e420b281a934c6dd315cb2p-4 + 0xf.cd42a15bf9a361243a89663e81e8p-4 i)
>>   ulp       :  162259276829213363391578010288128.0000
>>   max.ulp   :  4.0000
>>
>> - math/test-float128-clog10.out:
>> Failure: Test: Real part of: clog10_downward (0x3.bea2bd62e35p-4 + 0xf.8e3d619a8d11bfd30b038eep-4 i)
>>   ulp       :  4.0000
>>   max.ulp   :  3.0000
>> Failure: Test: Real part of: clog10_towardzero (0x2.82b795e420b281a934c6dd315cb2p-4 + 0xf.cd42a15bf9a361243a89663e81e8p-4 i)
>>   ulp       :  140936617129079063283494433422698.0000
>>   max.ulp   :  4.0000
>> Failure: Test: Real part of: clog10_upward (0x2.82b795e420b281a934c6dd315cb2p-4 + 0xf.cd42a15bf9a361243a89663e81e8p-4 i)
>>   ulp       :  140936617129079063283494433422698.0000
>>   max.ulp   :  4.0000
>>
>> - math/test-float128-jn.out
>> - math/test-float128-lgamma.out
>> - math/test-float128-tgamma.out:
>> something like:
>>   ulp       :  12.0000
>>   max.ulp   :  4.0000
>>
>> Failure: tgamma_upward (-0x6.ec00000000000008p+8): errno set to 0, expected 34 (ERANGE)
>>
>> - math/test-float128-y1.out:
>> Failure: Test: y1_downward (0x2p+0)
>>   ulp       :  13.0000
>>   max.ulp   :  4.0000
>> Failure: Test: y1_towardzero (0x2p+0)
>>   ulp       :  6.0000
>>   max.ulp   :  2.0000
>> Failure: Test: y1_upward (0x2p+0)
>>   ulp       :  10.0000
>>   max.ulp   :  5.0000
>>
>> - math/test-float128-yn.out
>>
> 
> So it seems the issue is the mix on how libm fenv function, the internal
> libc_fe*, and libgcc handles the exception register.  The exported fenv
> operates on both i387 and SSE (since it should work on long double as well),
> and the internal libc_fe* will set either SSE for float, double, and float128
> and i387 for long double (as expected).
> 
> The libgcc, however, will set either SEE or i387 depending of the exception.
> This broke the assumption of libc_fe* for float128 where either SSE or i387
> will be used.
> 
> One option might be to force libgcc to not use its __sfp_handle_exceptions
> on x86_64 and provide one that uses only SEE operations since libgcc does
> not use 'long double' on float128 operations.  The patch below does it
> and applied on top your patches shows no regressions.

Great news. Thanks Adhemerval.
I've also successfully build and run the testsuite with your patch on 
top of mine and with only your patch without mine.

As e.g. __multf3 or __addtf3 is used in various f128 functions, can you 
please first commit your patch? Then I will add a reference to this 
commit id in the commit-message.

One other question: Why are the soft-fp functions (for add / multiply) 
called at all. Are the corresponding hardware instructions not available 
on all x86_64 machines? Or do we miss a compiler flag?
> 
> And I think we should fix libgcc in a similar manner, since checking on
> config/i386/64/sfp-machine.h it only support SSE rounding mode.
> 
> --
> 
> diff --git a/sysdeps/x86/fpu/sfp-exceptions.c b/sysdeps/x86/fpu/sfp-exceptions.c
> new file mode 100644
> index 0000000000..676f396bc3
> --- /dev/null
> +++ b/sysdeps/x86/fpu/sfp-exceptions.c
> @@ -0,0 +1,49 @@
> +#include <fenv.h>
> +#include <float.h>
> +#include <math-barriers.h>
> +
> +#define FP_EX_INVALID           0x01
> +#define FP_EX_DENORM            0x02
> +#define FP_EX_DIVZERO           0x04
> +#define FP_EX_OVERFLOW          0x08
> +#define FP_EX_UNDERFLOW         0x10
> +#define FP_EX_INEXACT           0x20
> +#define FP_EX_ALL \
> +        (FP_EX_INVALID | FP_EX_DENORM | FP_EX_DIVZERO | FP_EX_OVERFLOW \
> +         | FP_EX_UNDERFLOW | FP_EX_INEXACT)
> +
> +void
> +__sfp_handle_exceptions (int _fex)
> +{
> +  if (_fex & FP_EX_INVALID)
> +    {
> +      float f = 0.0f;
> +      math_force_eval (f / f);
> +    }
> +  if (_fex & FP_EX_DENORM)
> +    {
> +      float f = FLT_MIN, g = 2.0f;
> +      math_force_eval (f / g);
> +    }
> +  if (_fex & FP_EX_DIVZERO)
> +    {
> +      float f = 1.0f, g = 0.0f;
> +      math_force_eval (f / g);
> +    }
> +  if (_fex & FP_EX_OVERFLOW)
> +    {
> +      float force_underflow = FLT_MAX * FLT_MAX;
> +      math_force_eval (force_underflow);
> +    }
> +  if (_fex & FP_EX_UNDERFLOW)
> +    {
> +      float force_overflow = FLT_MIN * FLT_MIN;
> +      math_force_eval (force_overflow);
> +    }
> +  if (_fex & FP_EX_INEXACT)
> +    {
> +      float f = 1.0f, g = 3.0f;
> +      math_force_eval (f / g);
> +    }
> +}
> +strong_alias (__sfp_handle_exceptions, __wrap___sfp_handle_exceptions)
> diff --git a/sysdeps/x86_64/fpu/Makefile b/sysdeps/x86_64/fpu/Makefile
> index a4ff2723a8..5becb96fa3 100644
> --- a/sysdeps/x86_64/fpu/Makefile
> +++ b/sysdeps/x86_64/fpu/Makefile
> @@ -25,6 +25,9 @@ endif
> 
>   # Variables for libmvec tests.
>   ifeq ($(subdir),math)
> +libm-routines += sfp-exceptions
> +LDFLAGS-m.so += -Wl,--wrap=__sfp_handle_exceptions
> +
>   ifeq ($(build-mathvec),yes)
>   libmvec-tests += double-vlen2 double-vlen4 double-vlen4-avx2 \
>   		 float-vlen4 float-vlen8 float-vlen8-avx2
> 


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/4] Use libc_fe* macros in ldbl-128/e_expl.c.
  2020-03-27 14:23               ` Stefan Liebler
@ 2020-03-30 18:12                 ` Adhemerval Zanella
  2020-03-31  7:39                   ` Stefan Liebler
  0 siblings, 1 reply; 14+ messages in thread
From: Adhemerval Zanella @ 2020-03-30 18:12 UTC (permalink / raw)
  To: libc-alpha



On 27/03/2020 11:23, Stefan Liebler via Libc-alpha wrote:
> On 3/26/20 3:53 PM, Adhemerval Zanella via Libc-alpha wrote:
>>
>>
>> On 26/03/2020 06:08, Stefan Liebler via Libc-alpha wrote:
>>> On 3/25/20 4:42 PM, Adhemerval Zanella via Libc-alpha wrote:
>>>>
>>>>
>>>> On 25/03/2020 12:07, Adhemerval Zanella wrote:
>>>>>
>>>>>
>>>>> On 25/03/2020 12:00, Adhemerval Zanella wrote:
>>>>>>
>>>>>>
>>>>>> On 25/03/2020 07:13, Stefan Liebler via Libc-alpha wrote:
>>>>>>> Unfortunately, this patch is responsible for testfails on x86_64:
>>>>>>>
>>>>>>> math/test-float128-exp.out:
>>>>>>> Failure: exp (-0x1p-10000): Exception "Underflow" set
>>>>>>> Failure: exp (-0x2p-16384): Exception "Underflow" set
>>>>>>> ...
>>>>>>>
>>>>>>> math/test-float128-cexp.out:
>>>>>>> Failure: Real part of: cexp (0x2p-16384 - 0x4p-1076 i): Exception "Underflow" set
>>>>>>> Failure: Real part of: cexp (0x2p-16384 - 0x8p-152 i): Exception "Underflow" set
>>>>>>
>>>>>> The sysdeps/x86/fpu/fenv_private.h states:
>>>>>>
>>>>>> 296 #ifdef __x86_64__
>>>>>> 297 /* The SSE rounding mode is used by soft-fp (libgcc and glibc) on
>>>>>> 298    x86_64, so that must be set for float128 computations.  */
>>>>>> 299 # define SET_RESTORE_ROUNDF128(RM) \
>>>>>> 300   SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround_sse, libc_feresetround_sse)
>>>>>>
>>>>>> So
>>>>>>
>>>>>>>> diff --git a/sysdeps/ieee754/ldbl-128/e_expl.c b/sysdeps/ieee754/ldbl-128/e_expl.c
>>>>>>>> index 37c1538c08..104ace1690 100644
>>>>>>>> --- a/sysdeps/ieee754/ldbl-128/e_expl.c
>>>>>>>> +++ b/sysdeps/ieee754/ldbl-128/e_expl.c
>>>>>>>> @@ -66,6 +66,7 @@
>>>>>>>>     #include <inttypes.h>
>>>>>>>>     #include <math-barriers.h>
>>>>>>>>     #include <math_private.h>
>>>>>>>> +#include <fenv_private.h>
>>>>>>>>     #include <math-underflow.h>
>>>>>>>>     #include <stdlib.h>
>>>>>>>>     #include "t_expl.h"
>>>>>>>> @@ -146,9 +147,10 @@ __ieee754_expl (_Float128 x)
>>>>>>>>           union ieee854_long_double ex2_u, scale_u;
>>>>>>>>           fenv_t oldenv;
>>>>>>>>     -      feholdexcept (&oldenv);
>>>>>>>>     #ifdef FE_TONEAREST
>>>>>>>> -      fesetround (FE_TONEAREST);
>>>>>>>> +      libc_feholdexcept_setroundl (&oldenv, FE_TONEAREST);
>>>>>>
>>>>>> Should be libc_feholdexcept_setroundf128.
>>>>>
>>>>> But it does not see to help here, so I don't know what is failing as well.
>>>>
>>>> Ok, so what is happening __sfp_handle_exceptions always use 387 exception
>>>> mode for FP_EX_OVERFLOW and FP_EX_UNDERFLOW:
>>>>
>>>> config/i386/sfp-exceptions.c
>>>>
>>>>    79   if (_fex & FP_EX_OVERFLOW)
>>>>    80     {
>>>>    81       struct fenv temp;
>>>>    82       asm volatile ("fnstenv\t%0" : "=m" (temp));
>>>>    83       temp.__status_word |= FP_EX_OVERFLOW;
>>>>    84       asm volatile ("fldenv\t%0" : : "m" (temp));
>>>>    85       asm volatile ("fwait");
>>>>    86     }
>>>>    87   if (_fex & FP_EX_UNDERFLOW)
>>>>    88     {
>>>>    89       struct fenv temp;
>>>>    90       asm volatile ("fnstenv\t%0" : "=m" (temp));
>>>>    91       temp.__status_word |= FP_EX_UNDERFLOW;
>>>>    92       asm volatile ("fldenv\t%0" : : "m" (temp));
>>>>    93       asm volatile ("fwait");
>>>>    94     }
>>>> Yes this looks like the mentioned disassembly.
>>>> Different that FP_EX_INEXACT, for instance, where __SSE_MATH__ sets
>>>> whether SSE is used or not.
>>>>
>>>> So I think it is not safe to use the SSE variants for libc_*_testf128,
>>>> as for i387 we should use the default_* instead.
>>>>
>>> I've just switched to default_* in sysdeps/x86/fpu/fenv_private.h:
>>> -#ifdef __x86_64__
>>> +#if 0
>>>   /* The SSE rounding mode is used by soft-fp (libgcc and glibc) on
>>>      x86_64, so that must be set for float128 computations.  */
>>>   # define SET_RESTORE_ROUNDF128(RM) \
>>>
>>> But now there are 7 testfails. For some of them, the max.ulp goes up (over 10), but there are even worse cases. Here are shortened excerpts of the out files:
>>> - math/test-float128-clog.out:
>>> Failure: Test: Real part of: clog_towardzero (0x2.82b795e420b281a934c6dd315cb2p-4 + 0xf.cd42a15bf9a361243a89663e81e8p-4 i)
>>>   ulp       :  162259276829213363391578010288127.0000
>>>   max.ulp   :  3.0000
>>> Failure: Test: Real part of: clog_upward (0x2.82b795e420b281a934c6dd315cb2p-4 + 0xf.cd42a15bf9a361243a89663e81e8p-4 i)
>>>   ulp       :  162259276829213363391578010288128.0000
>>>   max.ulp   :  4.0000
>>>
>>> - math/test-float128-clog10.out:
>>> Failure: Test: Real part of: clog10_downward (0x3.bea2bd62e35p-4 + 0xf.8e3d619a8d11bfd30b038eep-4 i)
>>>   ulp       :  4.0000
>>>   max.ulp   :  3.0000
>>> Failure: Test: Real part of: clog10_towardzero (0x2.82b795e420b281a934c6dd315cb2p-4 + 0xf.cd42a15bf9a361243a89663e81e8p-4 i)
>>>   ulp       :  140936617129079063283494433422698.0000
>>>   max.ulp   :  4.0000
>>> Failure: Test: Real part of: clog10_upward (0x2.82b795e420b281a934c6dd315cb2p-4 + 0xf.cd42a15bf9a361243a89663e81e8p-4 i)
>>>   ulp       :  140936617129079063283494433422698.0000
>>>   max.ulp   :  4.0000
>>>
>>> - math/test-float128-jn.out
>>> - math/test-float128-lgamma.out
>>> - math/test-float128-tgamma.out:
>>> something like:
>>>   ulp       :  12.0000
>>>   max.ulp   :  4.0000
>>>
>>> Failure: tgamma_upward (-0x6.ec00000000000008p+8): errno set to 0, expected 34 (ERANGE)
>>>
>>> - math/test-float128-y1.out:
>>> Failure: Test: y1_downward (0x2p+0)
>>>   ulp       :  13.0000
>>>   max.ulp   :  4.0000
>>> Failure: Test: y1_towardzero (0x2p+0)
>>>   ulp       :  6.0000
>>>   max.ulp   :  2.0000
>>> Failure: Test: y1_upward (0x2p+0)
>>>   ulp       :  10.0000
>>>   max.ulp   :  5.0000
>>>
>>> - math/test-float128-yn.out
>>>
>>
>> So it seems the issue is the mix on how libm fenv function, the internal
>> libc_fe*, and libgcc handles the exception register.  The exported fenv
>> operates on both i387 and SSE (since it should work on long double as well),
>> and the internal libc_fe* will set either SSE for float, double, and float128
>> and i387 for long double (as expected).
>>
>> The libgcc, however, will set either SEE or i387 depending of the exception.
>> This broke the assumption of libc_fe* for float128 where either SSE or i387
>> will be used.
>>
>> One option might be to force libgcc to not use its __sfp_handle_exceptions
>> on x86_64 and provide one that uses only SEE operations since libgcc does
>> not use 'long double' on float128 operations.  The patch below does it
>> and applied on top your patches shows no regressions.
> 
> Great news. Thanks Adhemerval.
> I've also successfully build and run the testsuite with your patch on top of mine and with only your patch without mine.
> 
> As e.g. __multf3 or __addtf3 is used in various f128 functions, can you please first commit your patch? Then I will add a reference to this commit id in the commit-message.

I will send a RFC for this patch, we need to check with x86 maintainers
if this the desirable direction and if I got everything right. 

> 
> One other question: Why are the soft-fp functions (for add / multiply) called at all. Are the corresponding hardware instructions not available on all x86_64 machines? Or do we miss a compiler flag?

The float128 on gcc/x86_64 is implemented by soft-fp library in libgcc [1]
and its ABI passes arguments through SSE register [2].

[1] https://stackoverflow.com/questions/26639477/what-exactly-is-a-float128-if-im-using-gcc-4-9-on-x86-64
[2] https://github.com/hjl-tools/x86-psABI/wiki/X86-psABI

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/4] Use libc_fe* macros in ldbl-128/e_expl.c.
  2020-03-30 18:12                 ` Adhemerval Zanella
@ 2020-03-31  7:39                   ` Stefan Liebler
  0 siblings, 0 replies; 14+ messages in thread
From: Stefan Liebler @ 2020-03-31  7:39 UTC (permalink / raw)
  To: libc-alpha

On 3/30/20 8:12 PM, Adhemerval Zanella via Libc-alpha wrote:
> 
> 
> On 27/03/2020 11:23, Stefan Liebler via Libc-alpha wrote:
>> On 3/26/20 3:53 PM, Adhemerval Zanella via Libc-alpha wrote:
>>>
>>>
>>> On 26/03/2020 06:08, Stefan Liebler via Libc-alpha wrote:
>>>> On 3/25/20 4:42 PM, Adhemerval Zanella via Libc-alpha wrote:
>>>>>
>>>>>
>>>>> On 25/03/2020 12:07, Adhemerval Zanella wrote:
>>>>>>
>>>>>>
>>>>>> On 25/03/2020 12:00, Adhemerval Zanella wrote:
>>>>>>>
>>>>>>>
>>>>>>> On 25/03/2020 07:13, Stefan Liebler via Libc-alpha wrote:
>>>>>>>> Unfortunately, this patch is responsible for testfails on x86_64:
>>>>>>>>
>>>>>>>> math/test-float128-exp.out:
>>>>>>>> Failure: exp (-0x1p-10000): Exception "Underflow" set
>>>>>>>> Failure: exp (-0x2p-16384): Exception "Underflow" set
>>>>>>>> ...
>>>>>>>>
>>>>>>>> math/test-float128-cexp.out:
>>>>>>>> Failure: Real part of: cexp (0x2p-16384 - 0x4p-1076 i): Exception "Underflow" set
>>>>>>>> Failure: Real part of: cexp (0x2p-16384 - 0x8p-152 i): Exception "Underflow" set
>>>>>>>
>>>>>>> The sysdeps/x86/fpu/fenv_private.h states:
>>>>>>>
>>>>>>> 296 #ifdef __x86_64__
>>>>>>> 297 /* The SSE rounding mode is used by soft-fp (libgcc and glibc) on
>>>>>>> 298    x86_64, so that must be set for float128 computations.  */
>>>>>>> 299 # define SET_RESTORE_ROUNDF128(RM) \
>>>>>>> 300   SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround_sse, libc_feresetround_sse)
>>>>>>>
>>>>>>> So
>>>>>>>
>>>>>>>>> diff --git a/sysdeps/ieee754/ldbl-128/e_expl.c b/sysdeps/ieee754/ldbl-128/e_expl.c
>>>>>>>>> index 37c1538c08..104ace1690 100644
>>>>>>>>> --- a/sysdeps/ieee754/ldbl-128/e_expl.c
>>>>>>>>> +++ b/sysdeps/ieee754/ldbl-128/e_expl.c
>>>>>>>>> @@ -66,6 +66,7 @@
>>>>>>>>>      #include <inttypes.h>
>>>>>>>>>      #include <math-barriers.h>
>>>>>>>>>      #include <math_private.h>
>>>>>>>>> +#include <fenv_private.h>
>>>>>>>>>      #include <math-underflow.h>
>>>>>>>>>      #include <stdlib.h>
>>>>>>>>>      #include "t_expl.h"
>>>>>>>>> @@ -146,9 +147,10 @@ __ieee754_expl (_Float128 x)
>>>>>>>>>            union ieee854_long_double ex2_u, scale_u;
>>>>>>>>>            fenv_t oldenv;
>>>>>>>>>      -      feholdexcept (&oldenv);
>>>>>>>>>      #ifdef FE_TONEAREST
>>>>>>>>> -      fesetround (FE_TONEAREST);
>>>>>>>>> +      libc_feholdexcept_setroundl (&oldenv, FE_TONEAREST);
>>>>>>>
>>>>>>> Should be libc_feholdexcept_setroundf128.
>>>>>>
>>>>>> But it does not see to help here, so I don't know what is failing as well.
>>>>>
>>>>> Ok, so what is happening __sfp_handle_exceptions always use 387 exception
>>>>> mode for FP_EX_OVERFLOW and FP_EX_UNDERFLOW:
>>>>>
>>>>> config/i386/sfp-exceptions.c
>>>>>
>>>>>     79   if (_fex & FP_EX_OVERFLOW)
>>>>>     80     {
>>>>>     81       struct fenv temp;
>>>>>     82       asm volatile ("fnstenv\t%0" : "=m" (temp));
>>>>>     83       temp.__status_word |= FP_EX_OVERFLOW;
>>>>>     84       asm volatile ("fldenv\t%0" : : "m" (temp));
>>>>>     85       asm volatile ("fwait");
>>>>>     86     }
>>>>>     87   if (_fex & FP_EX_UNDERFLOW)
>>>>>     88     {
>>>>>     89       struct fenv temp;
>>>>>     90       asm volatile ("fnstenv\t%0" : "=m" (temp));
>>>>>     91       temp.__status_word |= FP_EX_UNDERFLOW;
>>>>>     92       asm volatile ("fldenv\t%0" : : "m" (temp));
>>>>>     93       asm volatile ("fwait");
>>>>>     94     }
>>>>> Yes this looks like the mentioned disassembly.
>>>>> Different that FP_EX_INEXACT, for instance, where __SSE_MATH__ sets
>>>>> whether SSE is used or not.
>>>>>
>>>>> So I think it is not safe to use the SSE variants for libc_*_testf128,
>>>>> as for i387 we should use the default_* instead.
>>>>>
>>>> I've just switched to default_* in sysdeps/x86/fpu/fenv_private.h:
>>>> -#ifdef __x86_64__
>>>> +#if 0
>>>>    /* The SSE rounding mode is used by soft-fp (libgcc and glibc) on
>>>>       x86_64, so that must be set for float128 computations.  */
>>>>    # define SET_RESTORE_ROUNDF128(RM) \
>>>>
>>>> But now there are 7 testfails. For some of them, the max.ulp goes up (over 10), but there are even worse cases. Here are shortened excerpts of the out files:
>>>> - math/test-float128-clog.out:
>>>> Failure: Test: Real part of: clog_towardzero (0x2.82b795e420b281a934c6dd315cb2p-4 + 0xf.cd42a15bf9a361243a89663e81e8p-4 i)
>>>>    ulp       :  162259276829213363391578010288127.0000
>>>>    max.ulp   :  3.0000
>>>> Failure: Test: Real part of: clog_upward (0x2.82b795e420b281a934c6dd315cb2p-4 + 0xf.cd42a15bf9a361243a89663e81e8p-4 i)
>>>>    ulp       :  162259276829213363391578010288128.0000
>>>>    max.ulp   :  4.0000
>>>>
>>>> - math/test-float128-clog10.out:
>>>> Failure: Test: Real part of: clog10_downward (0x3.bea2bd62e35p-4 + 0xf.8e3d619a8d11bfd30b038eep-4 i)
>>>>    ulp       :  4.0000
>>>>    max.ulp   :  3.0000
>>>> Failure: Test: Real part of: clog10_towardzero (0x2.82b795e420b281a934c6dd315cb2p-4 + 0xf.cd42a15bf9a361243a89663e81e8p-4 i)
>>>>    ulp       :  140936617129079063283494433422698.0000
>>>>    max.ulp   :  4.0000
>>>> Failure: Test: Real part of: clog10_upward (0x2.82b795e420b281a934c6dd315cb2p-4 + 0xf.cd42a15bf9a361243a89663e81e8p-4 i)
>>>>    ulp       :  140936617129079063283494433422698.0000
>>>>    max.ulp   :  4.0000
>>>>
>>>> - math/test-float128-jn.out
>>>> - math/test-float128-lgamma.out
>>>> - math/test-float128-tgamma.out:
>>>> something like:
>>>>    ulp       :  12.0000
>>>>    max.ulp   :  4.0000
>>>>
>>>> Failure: tgamma_upward (-0x6.ec00000000000008p+8): errno set to 0, expected 34 (ERANGE)
>>>>
>>>> - math/test-float128-y1.out:
>>>> Failure: Test: y1_downward (0x2p+0)
>>>>    ulp       :  13.0000
>>>>    max.ulp   :  4.0000
>>>> Failure: Test: y1_towardzero (0x2p+0)
>>>>    ulp       :  6.0000
>>>>    max.ulp   :  2.0000
>>>> Failure: Test: y1_upward (0x2p+0)
>>>>    ulp       :  10.0000
>>>>    max.ulp   :  5.0000
>>>>
>>>> - math/test-float128-yn.out
>>>>
>>>
>>> So it seems the issue is the mix on how libm fenv function, the internal
>>> libc_fe*, and libgcc handles the exception register.  The exported fenv
>>> operates on both i387 and SSE (since it should work on long double as well),
>>> and the internal libc_fe* will set either SSE for float, double, and float128
>>> and i387 for long double (as expected).
>>>
>>> The libgcc, however, will set either SEE or i387 depending of the exception.
>>> This broke the assumption of libc_fe* for float128 where either SSE or i387
>>> will be used.
>>>
>>> One option might be to force libgcc to not use its __sfp_handle_exceptions
>>> on x86_64 and provide one that uses only SEE operations since libgcc does
>>> not use 'long double' on float128 operations.  The patch below does it
>>> and applied on top your patches shows no regressions.
>>
>> Great news. Thanks Adhemerval.
>> I've also successfully build and run the testsuite with your patch on top of mine and with only your patch without mine.
>>
>> As e.g. __multf3 or __addtf3 is used in various f128 functions, can you please first commit your patch? Then I will add a reference to this commit id in the commit-message.
> 
> I will send a RFC for this patch, we need to check with x86 maintainers
> if this the desirable direction and if I got everything right.
> 
>>
>> One other question: Why are the soft-fp functions (for add / multiply) called at all. Are the corresponding hardware instructions not available on all x86_64 machines? Or do we miss a compiler flag?
> 
> The float128 on gcc/x86_64 is implemented by soft-fp library in libgcc [1]
> and its ABI passes arguments through SSE register [2].
> 
> [1] https://stackoverflow.com/questions/26639477/what-exactly-is-a-float128-if-im-using-gcc-4-9-on-x86-64
> [2] https://github.com/hjl-tools/x86-psABI/wiki/X86-psABI
> 

Thanks for the info and for working on this.


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/4] Use libc_fe* macros in ldbl-128/e_expl.c.
  2020-03-25 15:07       ` Adhemerval Zanella
  2020-03-25 15:42         ` Adhemerval Zanella
@ 2020-03-26  9:08         ` Stefan Liebler
  1 sibling, 0 replies; 14+ messages in thread
From: Stefan Liebler @ 2020-03-26  9:08 UTC (permalink / raw)
  To: libc-alpha

On 3/25/20 4:07 PM, Adhemerval Zanella via Libc-alpha wrote:
> 
> 
> On 25/03/2020 12:00, Adhemerval Zanella wrote:
>>
>>
>> On 25/03/2020 07:13, Stefan Liebler via Libc-alpha wrote:
>>> Unfortunately, this patch is responsible for testfails on x86_64:
>>>
>>> math/test-float128-exp.out:
>>> Failure: exp (-0x1p-10000): Exception "Underflow" set
>>> Failure: exp (-0x2p-16384): Exception "Underflow" set
>>> ...
>>>
>>> math/test-float128-cexp.out:
>>> Failure: Real part of: cexp (0x2p-16384 - 0x4p-1076 i): Exception "Underflow" set
>>> Failure: Real part of: cexp (0x2p-16384 - 0x8p-152 i): Exception "Underflow" set
>>
>> The sysdeps/x86/fpu/fenv_private.h states:
>>
>> 296 #ifdef __x86_64__
>> 297 /* The SSE rounding mode is used by soft-fp (libgcc and glibc) on
>> 298    x86_64, so that must be set for float128 computations.  */
>> 299 # define SET_RESTORE_ROUNDF128(RM) \
>> 300   SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround_sse, libc_feresetround_sse)
>>
>> So
>>
>>>> diff --git a/sysdeps/ieee754/ldbl-128/e_expl.c b/sysdeps/ieee754/ldbl-128/e_expl.c
>>>> index 37c1538c08..104ace1690 100644
>>>> --- a/sysdeps/ieee754/ldbl-128/e_expl.c
>>>> +++ b/sysdeps/ieee754/ldbl-128/e_expl.c
>>>> @@ -66,6 +66,7 @@
>>>>    #include <inttypes.h>
>>>>    #include <math-barriers.h>
>>>>    #include <math_private.h>
>>>> +#include <fenv_private.h>
>>>>    #include <math-underflow.h>
>>>>    #include <stdlib.h>
>>>>    #include "t_expl.h"
>>>> @@ -146,9 +147,10 @@ __ieee754_expl (_Float128 x)
>>>>          union ieee854_long_double ex2_u, scale_u;
>>>>          fenv_t oldenv;
>>>>    -      feholdexcept (&oldenv);
>>>>    #ifdef FE_TONEAREST
>>>> -      fesetround (FE_TONEAREST);
>>>> +      libc_feholdexcept_setroundl (&oldenv, FE_TONEAREST);
>>
>> Should be libc_feholdexcept_setroundf128.
> 
> But it does not see to help here, so I don't know what is failing as well.
> 
It does not help as this is already the case. The float128 exp is build 
with sysdeps/ieee754/float128/e_expf128.c which includes 
sysdeps/ieee754/float128/float128_private.h before including 
sysdeps/ieee754/ldbl-128/e_expl.c.

float128_private.h contains things like that:
...
#include <fenv_private.h>
...
#ifdef libc_feholdexcept_setroundf128
# undef libc_feholdexcept_setroundl
# define libc_feholdexcept_setroundl(ENV, RM)	\
   libc_feholdexcept_setroundf128 (ENV, RM)
#endif
...
#ifdef libc_fesetenvf128
# undef libc_fesetenvl
# define libc_fesetenvl(ENV) libc_fesetenvf128 (ENV)
#endif

>>
>>>> +#else
>>>> +      libc_feholdexceptl (&oldenv);
>>
>> And here libc_fesetenvf128.
>>
>>>>    #endif
>>>>            /* Calculate n.  */
>>>> @@ -198,7 +200,7 @@ __ieee754_expl (_Float128 x)
>>>>          math_force_eval (x22);
>>>>            /* Return result.  */
>>>> -      fesetenv (&oldenv);
>>>> +      libc_fesetenvl (&oldenv);
>>>>            result = x22 * ex2_u.d + ex2_u.d;
>>
>> It might require extend the libc_*f128 macros to other architectures
>> (not sure).
>>


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 4/4] Use libc_fe* macros in k_standardl.c.
  2020-03-25 10:06 [PATCH 1/4] Use libc_fe* macros in ldbl-128/s_nearbyintl.c Stefan Liebler
  2020-03-25 10:06 ` [PATCH 2/4] Use libc_fe* macros in ldbl-128/s_fmal.c Stefan Liebler
  2020-03-25 10:06 ` [PATCH 3/4] Use libc_fe* macros in ldbl-128/e_expl.c Stefan Liebler
@ 2020-03-25 10:06 ` Stefan Liebler
  2 siblings, 0 replies; 14+ messages in thread
From: Stefan Liebler @ 2020-03-25 10:06 UTC (permalink / raw)
  To: libc-alpha; +Cc: Stefan Liebler

The calls to feholdexcept and fesetenv are replaced
by the libc_fe* macros.
---
 sysdeps/ieee754/k_standardl.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/sysdeps/ieee754/k_standardl.c b/sysdeps/ieee754/k_standardl.c
index a93d53cde9..286741879d 100644
--- a/sysdeps/ieee754/k_standardl.c
+++ b/sysdeps/ieee754/k_standardl.c
@@ -34,6 +34,7 @@
 #include <math-barriers.h>
 #include <math-svid-compat.h>
 #include <fenv.h>
+#include <fenv_private.h>
 #include <float.h>
 #include <errno.h>
 
@@ -53,12 +54,12 @@ __kernel_standard_l (long double x, long double y, int type)
   struct exception exc;
   fenv_t env;
 
-  feholdexcept (&env);
+  libc_feholdexceptl (&env);
   dx = x;
   dy = y;
   math_force_eval (dx);
   math_force_eval (dy);
-  fesetenv (&env);
+  libc_fesetenvl (&env);
 
   switch (type)
     {
-- 
2.23.0


^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2020-03-31  7:39 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-03-25 10:06 [PATCH 1/4] Use libc_fe* macros in ldbl-128/s_nearbyintl.c Stefan Liebler
2020-03-25 10:06 ` [PATCH 2/4] Use libc_fe* macros in ldbl-128/s_fmal.c Stefan Liebler
2020-03-25 10:06 ` [PATCH 3/4] Use libc_fe* macros in ldbl-128/e_expl.c Stefan Liebler
2020-03-25 10:13   ` Stefan Liebler
2020-03-25 15:00     ` Adhemerval Zanella
2020-03-25 15:07       ` Adhemerval Zanella
2020-03-25 15:42         ` Adhemerval Zanella
2020-03-26  9:08           ` Stefan Liebler
2020-03-26 14:53             ` Adhemerval Zanella
2020-03-27 14:23               ` Stefan Liebler
2020-03-30 18:12                 ` Adhemerval Zanella
2020-03-31  7:39                   ` Stefan Liebler
2020-03-26  9:08         ` Stefan Liebler
2020-03-25 10:06 ` [PATCH 4/4] Use libc_fe* macros in k_standardl.c Stefan Liebler

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).