* [PATCH 1/4] Use libc_fe* macros in ldbl-128/s_nearbyintl.c. @ 2020-03-25 10:06 Stefan Liebler 2020-03-25 10:06 ` [PATCH 2/4] Use libc_fe* macros in ldbl-128/s_fmal.c Stefan Liebler ` (2 more replies) 0 siblings, 3 replies; 14+ messages in thread From: Stefan Liebler @ 2020-03-25 10:06 UTC (permalink / raw) To: libc-alpha; +Cc: Stefan Liebler The calls to feholdexcept and fesetenv are replaced by the libc_fe* macros as it is also done in nearbyintf and nearbyint. --- sysdeps/ieee754/float128/float128_private.h | 10 ++++++++++ sysdeps/ieee754/ldbl-128/s_nearbyintl.c | 9 +++++---- sysdeps/x86/fpu/fenv_private.h | 4 ++++ 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/sysdeps/ieee754/float128/float128_private.h b/sysdeps/ieee754/float128/float128_private.h index f97463d9dc..af1ed8f3c0 100644 --- a/sysdeps/ieee754/float128/float128_private.h +++ b/sysdeps/ieee754/float128/float128_private.h @@ -66,6 +66,16 @@ # define libc_feupdateenv_testl(ENV, EX) libc_feupdateenv_testf128 (ENV, EX) #endif +#ifdef libc_feholdexceptf128 +# undef libc_feholdexceptl +# define libc_feholdexceptl(ENV) libc_feholdexceptf128 (ENV) +#endif + +#ifdef libc_fesetenvf128 +# undef libc_fesetenvl +# define libc_fesetenvl(ENV) libc_fesetenvf128 (ENV) +#endif + /* misc macros from the header below. */ #include <fix-fp-int-convert-overflow.h> #undef FIX_LDBL_LONG_CONVERT_OVERFLOW diff --git a/sysdeps/ieee754/ldbl-128/s_nearbyintl.c b/sysdeps/ieee754/ldbl-128/s_nearbyintl.c index 8d26786f78..c0bc6fbd6d 100644 --- a/sysdeps/ieee754/ldbl-128/s_nearbyintl.c +++ b/sysdeps/ieee754/ldbl-128/s_nearbyintl.c @@ -27,6 +27,7 @@ #include <math.h> #include <math-barriers.h> #include <math_private.h> +#include <fenv_private.h> #include <libm-alias-ldouble.h> #include <math-use-builtins.h> @@ -53,11 +54,11 @@ __nearbyintl (_Float128 x) { if (j0 < 0) { - feholdexcept (&env); + libc_feholdexceptl (&env); w = TWO112[sx] + math_opt_barrier (x); t = w - TWO112[sx]; math_force_eval (t); - fesetenv (&env); + libc_fesetenvl (&env); GET_LDOUBLE_MSW64 (i0, t); SET_LDOUBLE_MSW64 (t, (i0 & 0x7fffffffffffffffLL) | (sx << 63)); return t; @@ -70,11 +71,11 @@ __nearbyintl (_Float128 x) else return x; /* x is integral */ } - feholdexcept (&env); + libc_feholdexceptl (&env); w = TWO112[sx] + math_opt_barrier (x); t = w - TWO112[sx]; math_force_eval (t); - fesetenv (&env); + libc_fesetenvl (&env); return t; #endif /* ! USE_NEARBYINTL_BUILTIN */ } diff --git a/sysdeps/x86/fpu/fenv_private.h b/sysdeps/x86/fpu/fenv_private.h index 4b081e015b..23a430362a 100644 --- a/sysdeps/x86/fpu/fenv_private.h +++ b/sysdeps/x86/fpu/fenv_private.h @@ -300,12 +300,16 @@ libc_feresetround_387 (fenv_t *e) SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround_sse, libc_feresetround_sse) # define libc_feholdexcept_setroundf128 libc_feholdexcept_setround_sse # define libc_feupdateenv_testf128 libc_feupdateenv_test_sse +# define libc_feholdexceptf128 libc_feholdexcept_sse +# define libc_fesetenvf128 libc_fesetenv_sse #else /* The 387 rounding mode is used by soft-fp for 32-bit, but whether 387 or SSE exceptions are used depends on whether libgcc was built for SSE math, which is not known when glibc is being built. */ # define libc_feholdexcept_setroundf128 default_libc_feholdexcept_setround # define libc_feupdateenv_testf128 default_libc_feupdateenv_test +# define libc_feholdexceptf128 default_libc_feholdexcept +# define libc_fesetenvf128 default_libc_fesetenv #endif /* We have support for rounding mode context. */ -- 2.23.0 ^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 2/4] Use libc_fe* macros in ldbl-128/s_fmal.c. 2020-03-25 10:06 [PATCH 1/4] Use libc_fe* macros in ldbl-128/s_nearbyintl.c Stefan Liebler @ 2020-03-25 10:06 ` Stefan Liebler 2020-03-25 10:06 ` [PATCH 3/4] Use libc_fe* macros in ldbl-128/e_expl.c Stefan Liebler 2020-03-25 10:06 ` [PATCH 4/4] Use libc_fe* macros in k_standardl.c Stefan Liebler 2 siblings, 0 replies; 14+ messages in thread From: Stefan Liebler @ 2020-03-25 10:06 UTC (permalink / raw) To: libc-alpha; +Cc: Stefan Liebler The calls to feholdexcept, fesetround, feupdateenv, fetestexcept are replaced by the libc_fe* macros as it is also done in dbl-64/s_fma.c. --- sysdeps/ieee754/float128/float128_private.h | 10 +++++++ sysdeps/ieee754/ldbl-128/s_fmal.c | 33 +++++++++++---------- sysdeps/x86/fpu/fenv_private.h | 4 +++ 3 files changed, 32 insertions(+), 15 deletions(-) diff --git a/sysdeps/ieee754/float128/float128_private.h b/sysdeps/ieee754/float128/float128_private.h index af1ed8f3c0..671323035d 100644 --- a/sysdeps/ieee754/float128/float128_private.h +++ b/sysdeps/ieee754/float128/float128_private.h @@ -76,6 +76,16 @@ # define libc_fesetenvl(ENV) libc_fesetenvf128 (ENV) #endif +#ifdef libc_feupdateenvf128 +# undef libc_feupdateenvl +# define libc_feupdateenvl(ENV) libc_feupdateenvf128 (ENV) +#endif + +#ifdef libc_fesetroundf128 +# undef libc_fesetroundl +# define libc_fesetroundl(RM) libc_fesetroundf128 (RM) +#endif + /* misc macros from the header below. */ #include <fix-fp-int-convert-overflow.h> #undef FIX_LDBL_LONG_CONVERT_OVERFLOW diff --git a/sysdeps/ieee754/ldbl-128/s_fmal.c b/sysdeps/ieee754/ldbl-128/s_fmal.c index 7475015bce..f5791b6a8a 100644 --- a/sysdeps/ieee754/ldbl-128/s_fmal.c +++ b/sysdeps/ieee754/ldbl-128/s_fmal.c @@ -23,6 +23,7 @@ #include <ieee754.h> #include <math-barriers.h> #include <math_private.h> +#include <fenv_private.h> #include <libm-alias-ldouble.h> #include <tininess.h> @@ -187,8 +188,7 @@ __fmal (_Float128 x, _Float128 y, _Float128 z) } fenv_t env; - feholdexcept (&env); - fesetround (FE_TONEAREST); + libc_feholdexcept_setroundl (&env, FE_TONEAREST); /* Multiplication m1 + m2 = x * y using Dekker's algorithm. */ #define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1) @@ -216,41 +216,44 @@ __fmal (_Float128 x, _Float128 y, _Float128 z) /* If the result is an exact zero, ensure it has the correct sign. */ if (a1 == 0 && m2 == 0) { - feupdateenv (&env); + libc_feupdateenvl (&env); /* Ensure that round-to-nearest value of z + m1 is not reused. */ z = math_opt_barrier (z); return z + m1; } - fesetround (FE_TOWARDZERO); + libc_fesetroundl (FE_TOWARDZERO); /* Perform m2 + a2 addition with round to odd. */ u.d = a2 + m2; + if (__glibc_unlikely (adjust < 0)) + { + if ((u.ieee.mantissa3 & 1) == 0) + u.ieee.mantissa3 |= libc_fetestexceptl (FE_INEXACT) != 0; + v.d = a1 + u.d; + /* Ensure the addition is not scheduled after fetestexcept call. */ + math_force_eval (v.d); + } + + /* Reset rounding mode and test for inexact simultaneously. */ + int j = libc_feupdateenv_testl (&env, FE_INEXACT) != 0; + if (__glibc_likely (adjust == 0)) { if ((u.ieee.mantissa3 & 1) == 0 && u.ieee.exponent != 0x7fff) - u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0; - feupdateenv (&env); + u.ieee.mantissa3 |= j; /* Result is a1 + u.d. */ return a1 + u.d; } else if (__glibc_likely (adjust > 0)) { if ((u.ieee.mantissa3 & 1) == 0 && u.ieee.exponent != 0x7fff) - u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0; - feupdateenv (&env); + u.ieee.mantissa3 |= j; /* Result is a1 + u.d, scaled up. */ return (a1 + u.d) * L(0x1p113); } else { - if ((u.ieee.mantissa3 & 1) == 0) - u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0; - v.d = a1 + u.d; - /* Ensure the addition is not scheduled after fetestexcept call. */ - math_force_eval (v.d); - int j = fetestexcept (FE_INEXACT) != 0; - feupdateenv (&env); /* Ensure the following computations are performed in default rounding mode instead of just reusing the round to zero computation. */ asm volatile ("" : "=m" (u) : "m" (u)); diff --git a/sysdeps/x86/fpu/fenv_private.h b/sysdeps/x86/fpu/fenv_private.h index 23a430362a..8453aaa270 100644 --- a/sysdeps/x86/fpu/fenv_private.h +++ b/sysdeps/x86/fpu/fenv_private.h @@ -302,6 +302,8 @@ libc_feresetround_387 (fenv_t *e) # define libc_feupdateenv_testf128 libc_feupdateenv_test_sse # define libc_feholdexceptf128 libc_feholdexcept_sse # define libc_fesetenvf128 libc_fesetenv_sse +# define libc_feupdateenvf128 libc_feupdateenv_sse +# define libc_fesetroundf128 libc_fesetround_sse #else /* The 387 rounding mode is used by soft-fp for 32-bit, but whether 387 or SSE exceptions are used depends on whether libgcc was built @@ -310,6 +312,8 @@ libc_feresetround_387 (fenv_t *e) # define libc_feupdateenv_testf128 default_libc_feupdateenv_test # define libc_feholdexceptf128 default_libc_feholdexcept # define libc_fesetenvf128 default_libc_fesetenv +# define libc_feupdateenvf128 default_libc_feupdateenv +# define libc_fesetroundf128 default_libc_fesetround #endif /* We have support for rounding mode context. */ -- 2.23.0 ^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 3/4] Use libc_fe* macros in ldbl-128/e_expl.c. 2020-03-25 10:06 [PATCH 1/4] Use libc_fe* macros in ldbl-128/s_nearbyintl.c Stefan Liebler 2020-03-25 10:06 ` [PATCH 2/4] Use libc_fe* macros in ldbl-128/s_fmal.c Stefan Liebler @ 2020-03-25 10:06 ` Stefan Liebler 2020-03-25 10:13 ` Stefan Liebler 2020-03-25 10:06 ` [PATCH 4/4] Use libc_fe* macros in k_standardl.c Stefan Liebler 2 siblings, 1 reply; 14+ messages in thread From: Stefan Liebler @ 2020-03-25 10:06 UTC (permalink / raw) To: libc-alpha; +Cc: Stefan Liebler The calls to feholdexcept, fesetround and fesetenv are replaced by the libc_fe* macros. --- sysdeps/ieee754/ldbl-128/e_expl.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sysdeps/ieee754/ldbl-128/e_expl.c b/sysdeps/ieee754/ldbl-128/e_expl.c index 37c1538c08..104ace1690 100644 --- a/sysdeps/ieee754/ldbl-128/e_expl.c +++ b/sysdeps/ieee754/ldbl-128/e_expl.c @@ -66,6 +66,7 @@ #include <inttypes.h> #include <math-barriers.h> #include <math_private.h> +#include <fenv_private.h> #include <math-underflow.h> #include <stdlib.h> #include "t_expl.h" @@ -146,9 +147,10 @@ __ieee754_expl (_Float128 x) union ieee854_long_double ex2_u, scale_u; fenv_t oldenv; - feholdexcept (&oldenv); #ifdef FE_TONEAREST - fesetround (FE_TONEAREST); + libc_feholdexcept_setroundl (&oldenv, FE_TONEAREST); +#else + libc_feholdexceptl (&oldenv); #endif /* Calculate n. */ @@ -198,7 +200,7 @@ __ieee754_expl (_Float128 x) math_force_eval (x22); /* Return result. */ - fesetenv (&oldenv); + libc_fesetenvl (&oldenv); result = x22 * ex2_u.d + ex2_u.d; -- 2.23.0 ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 3/4] Use libc_fe* macros in ldbl-128/e_expl.c. 2020-03-25 10:06 ` [PATCH 3/4] Use libc_fe* macros in ldbl-128/e_expl.c Stefan Liebler @ 2020-03-25 10:13 ` Stefan Liebler 2020-03-25 15:00 ` Adhemerval Zanella 0 siblings, 1 reply; 14+ messages in thread From: Stefan Liebler @ 2020-03-25 10:13 UTC (permalink / raw) To: GNU C Library; +Cc: Stefan Liebler Unfortunately, this patch is responsible for testfails on x86_64: math/test-float128-exp.out: Failure: exp (-0x1p-10000): Exception "Underflow" set Failure: exp (-0x2p-16384): Exception "Underflow" set ... math/test-float128-cexp.out: Failure: Real part of: cexp (0x2p-16384 - 0x4p-1076 i): Exception "Underflow" set Failure: Real part of: cexp (0x2p-16384 - 0x8p-152 i): Exception "Underflow" set ... I've stepped through "expf128 (0x1p-10000)" in sysdeps/ieee754/float128/../ldbl-128/e_expl.c: 151: libc_feholdexcept_setroundl (&oldenv, FE_TONEAREST); // before this patch: feholdexcept (&oldenv); fesetround (FE_TONEAREST); 199: x22 = x + x*x*(P1+x*(P2+x*(P3+x*(P4+x*(P5+x*P6))))); 203: libc_fesetenvl (&oldenv); // before this patch: fesetenv (&oldenv); During the evaluation of x22 the underflow exception occures while: <__ieee754_expf128+920> callq 0x7ffff7f2a7c0 <__multf3> which calls __sfp_handle_exceptions(): ae8be: 40 f6 c7 10 test $0x10,%dil ae8c2: 74 0f je ae8d3 <__sfp_handle_exceptions+0x73> ae8c4: d9 74 24 d8 fnstenv -0x28(%rsp) ae8c8: 66 83 4c 24 dc 10 orw $0x10,-0x24(%rsp) ae8ce: d9 64 24 d8 fldenv -0x28(%rsp) ae8d2: 9b fwait According to sysdeps/x86/fpu/fenv_private.h: #ifdef __x86_64__ # define libc_feholdexcept_setroundf128 libc_feholdexcept_setround_sse # define libc_fesetenvf128 libc_fesetenv_sse #else # define libc_feholdexcept_setroundf128 default_libc_feholdexcept_setround # define libc_fesetenvf128 default_libc_fesetenv #endif // On my machine: # define STMXCSR "stmxcsr" # define LDMXCSR "ldmxcsr" static __always_inline void libc_feholdexcept_setround_sse (fenv_t *e, int r) { unsigned int mxcsr; asm (STMXCSR " %0" : "=m" (*&mxcsr)); e->__mxcsr = mxcsr; mxcsr = ((mxcsr | 0x1f80) & ~0x603f) | (r << 3); asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr)); } Whereas the feholdexcept() function is using in ./sysdeps/x86_64/fpu/feholdexcpt.c: int __feholdexcept (fenv_t *envp) { unsigned int mxcsr; /* Store the environment. Recall that fnstenv has a side effect of masking all exceptions. Then clear all exceptions. */ __asm__ ("fnstenv %0\n\t" "stmxcsr %1\n\t" "fnclex" : "=m" (*envp), "=m" (envp->__mxcsr)); /* Set the SSE MXCSR register. */ mxcsr = (envp->__mxcsr | 0x1f80) & ~0x3f; __asm__ ("ldmxcsr %0" : : "m" (*&mxcsr)); return 0; } I assume that the underflow exception keeps active as the pair of fnstenv / fldenv is missing if libc_feholdexcept_setroundf128 / libc_fesetenvf128 is used instead of feholdexcept, fesetround and fesetenv. As I'm not familiar with float128 on x86_64, can anybody please help? Bye, Stefan On 3/25/20 11:06 AM, Stefan Liebler wrote: > The calls to feholdexcept, fesetround and fesetenv are replaced > by the libc_fe* macros. > --- > sysdeps/ieee754/ldbl-128/e_expl.c | 8 +++++--- > 1 file changed, 5 insertions(+), 3 deletions(-) > > diff --git a/sysdeps/ieee754/ldbl-128/e_expl.c b/sysdeps/ieee754/ldbl-128/e_expl.c > index 37c1538c08..104ace1690 100644 > --- a/sysdeps/ieee754/ldbl-128/e_expl.c > +++ b/sysdeps/ieee754/ldbl-128/e_expl.c > @@ -66,6 +66,7 @@ > #include <inttypes.h> > #include <math-barriers.h> > #include <math_private.h> > +#include <fenv_private.h> > #include <math-underflow.h> > #include <stdlib.h> > #include "t_expl.h" > @@ -146,9 +147,10 @@ __ieee754_expl (_Float128 x) > union ieee854_long_double ex2_u, scale_u; > fenv_t oldenv; > > - feholdexcept (&oldenv); > #ifdef FE_TONEAREST > - fesetround (FE_TONEAREST); > + libc_feholdexcept_setroundl (&oldenv, FE_TONEAREST); > +#else > + libc_feholdexceptl (&oldenv); > #endif > > /* Calculate n. */ > @@ -198,7 +200,7 @@ __ieee754_expl (_Float128 x) > math_force_eval (x22); > > /* Return result. */ > - fesetenv (&oldenv); > + libc_fesetenvl (&oldenv); > > result = x22 * ex2_u.d + ex2_u.d; > > ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 3/4] Use libc_fe* macros in ldbl-128/e_expl.c. 2020-03-25 10:13 ` Stefan Liebler @ 2020-03-25 15:00 ` Adhemerval Zanella 2020-03-25 15:07 ` Adhemerval Zanella 0 siblings, 1 reply; 14+ messages in thread From: Adhemerval Zanella @ 2020-03-25 15:00 UTC (permalink / raw) To: libc-alpha On 25/03/2020 07:13, Stefan Liebler via Libc-alpha wrote: > Unfortunately, this patch is responsible for testfails on x86_64: > > math/test-float128-exp.out: > Failure: exp (-0x1p-10000): Exception "Underflow" set > Failure: exp (-0x2p-16384): Exception "Underflow" set > ... > > math/test-float128-cexp.out: > Failure: Real part of: cexp (0x2p-16384 - 0x4p-1076 i): Exception "Underflow" set > Failure: Real part of: cexp (0x2p-16384 - 0x8p-152 i): Exception "Underflow" set The sysdeps/x86/fpu/fenv_private.h states: 296 #ifdef __x86_64__ 297 /* The SSE rounding mode is used by soft-fp (libgcc and glibc) on 298 x86_64, so that must be set for float128 computations. */ 299 # define SET_RESTORE_ROUNDF128(RM) \ 300 SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround_sse, libc_feresetround_sse) So >> diff --git a/sysdeps/ieee754/ldbl-128/e_expl.c b/sysdeps/ieee754/ldbl-128/e_expl.c >> index 37c1538c08..104ace1690 100644 >> --- a/sysdeps/ieee754/ldbl-128/e_expl.c >> +++ b/sysdeps/ieee754/ldbl-128/e_expl.c >> @@ -66,6 +66,7 @@ >> #include <inttypes.h> >> #include <math-barriers.h> >> #include <math_private.h> >> +#include <fenv_private.h> >> #include <math-underflow.h> >> #include <stdlib.h> >> #include "t_expl.h" >> @@ -146,9 +147,10 @@ __ieee754_expl (_Float128 x) >> union ieee854_long_double ex2_u, scale_u; >> fenv_t oldenv; >> - feholdexcept (&oldenv); >> #ifdef FE_TONEAREST >> - fesetround (FE_TONEAREST); >> + libc_feholdexcept_setroundl (&oldenv, FE_TONEAREST); Should be libc_feholdexcept_setroundf128. >> +#else >> + libc_feholdexceptl (&oldenv); And here libc_fesetenvf128. >> #endif >> /* Calculate n. */ >> @@ -198,7 +200,7 @@ __ieee754_expl (_Float128 x) >> math_force_eval (x22); >> /* Return result. */ >> - fesetenv (&oldenv); >> + libc_fesetenvl (&oldenv); >> result = x22 * ex2_u.d + ex2_u.d; It might require extend the libc_*f128 macros to other architectures (not sure). ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 3/4] Use libc_fe* macros in ldbl-128/e_expl.c. 2020-03-25 15:00 ` Adhemerval Zanella @ 2020-03-25 15:07 ` Adhemerval Zanella 2020-03-25 15:42 ` Adhemerval Zanella 2020-03-26 9:08 ` Stefan Liebler 0 siblings, 2 replies; 14+ messages in thread From: Adhemerval Zanella @ 2020-03-25 15:07 UTC (permalink / raw) To: libc-alpha On 25/03/2020 12:00, Adhemerval Zanella wrote: > > > On 25/03/2020 07:13, Stefan Liebler via Libc-alpha wrote: >> Unfortunately, this patch is responsible for testfails on x86_64: >> >> math/test-float128-exp.out: >> Failure: exp (-0x1p-10000): Exception "Underflow" set >> Failure: exp (-0x2p-16384): Exception "Underflow" set >> ... >> >> math/test-float128-cexp.out: >> Failure: Real part of: cexp (0x2p-16384 - 0x4p-1076 i): Exception "Underflow" set >> Failure: Real part of: cexp (0x2p-16384 - 0x8p-152 i): Exception "Underflow" set > > The sysdeps/x86/fpu/fenv_private.h states: > > 296 #ifdef __x86_64__ > 297 /* The SSE rounding mode is used by soft-fp (libgcc and glibc) on > 298 x86_64, so that must be set for float128 computations. */ > 299 # define SET_RESTORE_ROUNDF128(RM) \ > 300 SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround_sse, libc_feresetround_sse) > > So > >>> diff --git a/sysdeps/ieee754/ldbl-128/e_expl.c b/sysdeps/ieee754/ldbl-128/e_expl.c >>> index 37c1538c08..104ace1690 100644 >>> --- a/sysdeps/ieee754/ldbl-128/e_expl.c >>> +++ b/sysdeps/ieee754/ldbl-128/e_expl.c >>> @@ -66,6 +66,7 @@ >>> #include <inttypes.h> >>> #include <math-barriers.h> >>> #include <math_private.h> >>> +#include <fenv_private.h> >>> #include <math-underflow.h> >>> #include <stdlib.h> >>> #include "t_expl.h" >>> @@ -146,9 +147,10 @@ __ieee754_expl (_Float128 x) >>> union ieee854_long_double ex2_u, scale_u; >>> fenv_t oldenv; >>> - feholdexcept (&oldenv); >>> #ifdef FE_TONEAREST >>> - fesetround (FE_TONEAREST); >>> + libc_feholdexcept_setroundl (&oldenv, FE_TONEAREST); > > Should be libc_feholdexcept_setroundf128. But it does not see to help here, so I don't know what is failing as well. > >>> +#else >>> + libc_feholdexceptl (&oldenv); > > And here libc_fesetenvf128. > >>> #endif >>> /* Calculate n. */ >>> @@ -198,7 +200,7 @@ __ieee754_expl (_Float128 x) >>> math_force_eval (x22); >>> /* Return result. */ >>> - fesetenv (&oldenv); >>> + libc_fesetenvl (&oldenv); >>> result = x22 * ex2_u.d + ex2_u.d; > > It might require extend the libc_*f128 macros to other architectures > (not sure). > ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 3/4] Use libc_fe* macros in ldbl-128/e_expl.c. 2020-03-25 15:07 ` Adhemerval Zanella @ 2020-03-25 15:42 ` Adhemerval Zanella 2020-03-26 9:08 ` Stefan Liebler 2020-03-26 9:08 ` Stefan Liebler 1 sibling, 1 reply; 14+ messages in thread From: Adhemerval Zanella @ 2020-03-25 15:42 UTC (permalink / raw) To: libc-alpha On 25/03/2020 12:07, Adhemerval Zanella wrote: > > > On 25/03/2020 12:00, Adhemerval Zanella wrote: >> >> >> On 25/03/2020 07:13, Stefan Liebler via Libc-alpha wrote: >>> Unfortunately, this patch is responsible for testfails on x86_64: >>> >>> math/test-float128-exp.out: >>> Failure: exp (-0x1p-10000): Exception "Underflow" set >>> Failure: exp (-0x2p-16384): Exception "Underflow" set >>> ... >>> >>> math/test-float128-cexp.out: >>> Failure: Real part of: cexp (0x2p-16384 - 0x4p-1076 i): Exception "Underflow" set >>> Failure: Real part of: cexp (0x2p-16384 - 0x8p-152 i): Exception "Underflow" set >> >> The sysdeps/x86/fpu/fenv_private.h states: >> >> 296 #ifdef __x86_64__ >> 297 /* The SSE rounding mode is used by soft-fp (libgcc and glibc) on >> 298 x86_64, so that must be set for float128 computations. */ >> 299 # define SET_RESTORE_ROUNDF128(RM) \ >> 300 SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround_sse, libc_feresetround_sse) >> >> So >> >>>> diff --git a/sysdeps/ieee754/ldbl-128/e_expl.c b/sysdeps/ieee754/ldbl-128/e_expl.c >>>> index 37c1538c08..104ace1690 100644 >>>> --- a/sysdeps/ieee754/ldbl-128/e_expl.c >>>> +++ b/sysdeps/ieee754/ldbl-128/e_expl.c >>>> @@ -66,6 +66,7 @@ >>>> #include <inttypes.h> >>>> #include <math-barriers.h> >>>> #include <math_private.h> >>>> +#include <fenv_private.h> >>>> #include <math-underflow.h> >>>> #include <stdlib.h> >>>> #include "t_expl.h" >>>> @@ -146,9 +147,10 @@ __ieee754_expl (_Float128 x) >>>> union ieee854_long_double ex2_u, scale_u; >>>> fenv_t oldenv; >>>> - feholdexcept (&oldenv); >>>> #ifdef FE_TONEAREST >>>> - fesetround (FE_TONEAREST); >>>> + libc_feholdexcept_setroundl (&oldenv, FE_TONEAREST); >> >> Should be libc_feholdexcept_setroundf128. > > But it does not see to help here, so I don't know what is failing as well. Ok, so what is happening __sfp_handle_exceptions always use 387 exception mode for FP_EX_OVERFLOW and FP_EX_UNDERFLOW: config/i386/sfp-exceptions.c 79 if (_fex & FP_EX_OVERFLOW) 80 { 81 struct fenv temp; 82 asm volatile ("fnstenv\t%0" : "=m" (temp)); 83 temp.__status_word |= FP_EX_OVERFLOW; 84 asm volatile ("fldenv\t%0" : : "m" (temp)); 85 asm volatile ("fwait"); 86 } 87 if (_fex & FP_EX_UNDERFLOW) 88 { 89 struct fenv temp; 90 asm volatile ("fnstenv\t%0" : "=m" (temp)); 91 temp.__status_word |= FP_EX_UNDERFLOW; 92 asm volatile ("fldenv\t%0" : : "m" (temp)); 93 asm volatile ("fwait"); 94 } Different that FP_EX_INEXACT, for instance, where __SSE_MATH__ sets whether SSE is used or not. So I think it is not safe to use the SSE variants for libc_*_testf128, as for i387 we should use the default_* instead. ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 3/4] Use libc_fe* macros in ldbl-128/e_expl.c. 2020-03-25 15:42 ` Adhemerval Zanella @ 2020-03-26 9:08 ` Stefan Liebler 2020-03-26 14:53 ` Adhemerval Zanella 0 siblings, 1 reply; 14+ messages in thread From: Stefan Liebler @ 2020-03-26 9:08 UTC (permalink / raw) To: libc-alpha On 3/25/20 4:42 PM, Adhemerval Zanella via Libc-alpha wrote: > > > On 25/03/2020 12:07, Adhemerval Zanella wrote: >> >> >> On 25/03/2020 12:00, Adhemerval Zanella wrote: >>> >>> >>> On 25/03/2020 07:13, Stefan Liebler via Libc-alpha wrote: >>>> Unfortunately, this patch is responsible for testfails on x86_64: >>>> >>>> math/test-float128-exp.out: >>>> Failure: exp (-0x1p-10000): Exception "Underflow" set >>>> Failure: exp (-0x2p-16384): Exception "Underflow" set >>>> ... >>>> >>>> math/test-float128-cexp.out: >>>> Failure: Real part of: cexp (0x2p-16384 - 0x4p-1076 i): Exception "Underflow" set >>>> Failure: Real part of: cexp (0x2p-16384 - 0x8p-152 i): Exception "Underflow" set >>> >>> The sysdeps/x86/fpu/fenv_private.h states: >>> >>> 296 #ifdef __x86_64__ >>> 297 /* The SSE rounding mode is used by soft-fp (libgcc and glibc) on >>> 298 x86_64, so that must be set for float128 computations. */ >>> 299 # define SET_RESTORE_ROUNDF128(RM) \ >>> 300 SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround_sse, libc_feresetround_sse) >>> >>> So >>> >>>>> diff --git a/sysdeps/ieee754/ldbl-128/e_expl.c b/sysdeps/ieee754/ldbl-128/e_expl.c >>>>> index 37c1538c08..104ace1690 100644 >>>>> --- a/sysdeps/ieee754/ldbl-128/e_expl.c >>>>> +++ b/sysdeps/ieee754/ldbl-128/e_expl.c >>>>> @@ -66,6 +66,7 @@ >>>>> #include <inttypes.h> >>>>> #include <math-barriers.h> >>>>> #include <math_private.h> >>>>> +#include <fenv_private.h> >>>>> #include <math-underflow.h> >>>>> #include <stdlib.h> >>>>> #include "t_expl.h" >>>>> @@ -146,9 +147,10 @@ __ieee754_expl (_Float128 x) >>>>> union ieee854_long_double ex2_u, scale_u; >>>>> fenv_t oldenv; >>>>> - feholdexcept (&oldenv); >>>>> #ifdef FE_TONEAREST >>>>> - fesetround (FE_TONEAREST); >>>>> + libc_feholdexcept_setroundl (&oldenv, FE_TONEAREST); >>> >>> Should be libc_feholdexcept_setroundf128. >> >> But it does not see to help here, so I don't know what is failing as well. > > Ok, so what is happening __sfp_handle_exceptions always use 387 exception > mode for FP_EX_OVERFLOW and FP_EX_UNDERFLOW: > > config/i386/sfp-exceptions.c > > 79 if (_fex & FP_EX_OVERFLOW) > 80 { > 81 struct fenv temp; > 82 asm volatile ("fnstenv\t%0" : "=m" (temp)); > 83 temp.__status_word |= FP_EX_OVERFLOW; > 84 asm volatile ("fldenv\t%0" : : "m" (temp)); > 85 asm volatile ("fwait"); > 86 } > 87 if (_fex & FP_EX_UNDERFLOW) > 88 { > 89 struct fenv temp; > 90 asm volatile ("fnstenv\t%0" : "=m" (temp)); > 91 temp.__status_word |= FP_EX_UNDERFLOW; > 92 asm volatile ("fldenv\t%0" : : "m" (temp)); > 93 asm volatile ("fwait"); > 94 } > Yes this looks like the mentioned disassembly. > Different that FP_EX_INEXACT, for instance, where __SSE_MATH__ sets > whether SSE is used or not. > > So I think it is not safe to use the SSE variants for libc_*_testf128, > as for i387 we should use the default_* instead. > I've just switched to default_* in sysdeps/x86/fpu/fenv_private.h: -#ifdef __x86_64__ +#if 0 /* The SSE rounding mode is used by soft-fp (libgcc and glibc) on x86_64, so that must be set for float128 computations. */ # define SET_RESTORE_ROUNDF128(RM) \ But now there are 7 testfails. For some of them, the max.ulp goes up (over 10), but there are even worse cases. Here are shortened excerpts of the out files: - math/test-float128-clog.out: Failure: Test: Real part of: clog_towardzero (0x2.82b795e420b281a934c6dd315cb2p-4 + 0xf.cd42a15bf9a361243a89663e81e8p-4 i) ulp : 162259276829213363391578010288127.0000 max.ulp : 3.0000 Failure: Test: Real part of: clog_upward (0x2.82b795e420b281a934c6dd315cb2p-4 + 0xf.cd42a15bf9a361243a89663e81e8p-4 i) ulp : 162259276829213363391578010288128.0000 max.ulp : 4.0000 - math/test-float128-clog10.out: Failure: Test: Real part of: clog10_downward (0x3.bea2bd62e35p-4 + 0xf.8e3d619a8d11bfd30b038eep-4 i) ulp : 4.0000 max.ulp : 3.0000 Failure: Test: Real part of: clog10_towardzero (0x2.82b795e420b281a934c6dd315cb2p-4 + 0xf.cd42a15bf9a361243a89663e81e8p-4 i) ulp : 140936617129079063283494433422698.0000 max.ulp : 4.0000 Failure: Test: Real part of: clog10_upward (0x2.82b795e420b281a934c6dd315cb2p-4 + 0xf.cd42a15bf9a361243a89663e81e8p-4 i) ulp : 140936617129079063283494433422698.0000 max.ulp : 4.0000 - math/test-float128-jn.out - math/test-float128-lgamma.out - math/test-float128-tgamma.out: something like: ulp : 12.0000 max.ulp : 4.0000 Failure: tgamma_upward (-0x6.ec00000000000008p+8): errno set to 0, expected 34 (ERANGE) - math/test-float128-y1.out: Failure: Test: y1_downward (0x2p+0) ulp : 13.0000 max.ulp : 4.0000 Failure: Test: y1_towardzero (0x2p+0) ulp : 6.0000 max.ulp : 2.0000 Failure: Test: y1_upward (0x2p+0) ulp : 10.0000 max.ulp : 5.0000 - math/test-float128-yn.out ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 3/4] Use libc_fe* macros in ldbl-128/e_expl.c. 2020-03-26 9:08 ` Stefan Liebler @ 2020-03-26 14:53 ` Adhemerval Zanella 2020-03-27 14:23 ` Stefan Liebler 0 siblings, 1 reply; 14+ messages in thread From: Adhemerval Zanella @ 2020-03-26 14:53 UTC (permalink / raw) To: libc-alpha, Stefan Liebler via Libc-alpha On 26/03/2020 06:08, Stefan Liebler via Libc-alpha wrote: > On 3/25/20 4:42 PM, Adhemerval Zanella via Libc-alpha wrote: >> >> >> On 25/03/2020 12:07, Adhemerval Zanella wrote: >>> >>> >>> On 25/03/2020 12:00, Adhemerval Zanella wrote: >>>> >>>> >>>> On 25/03/2020 07:13, Stefan Liebler via Libc-alpha wrote: >>>>> Unfortunately, this patch is responsible for testfails on x86_64: >>>>> >>>>> math/test-float128-exp.out: >>>>> Failure: exp (-0x1p-10000): Exception "Underflow" set >>>>> Failure: exp (-0x2p-16384): Exception "Underflow" set >>>>> ... >>>>> >>>>> math/test-float128-cexp.out: >>>>> Failure: Real part of: cexp (0x2p-16384 - 0x4p-1076 i): Exception "Underflow" set >>>>> Failure: Real part of: cexp (0x2p-16384 - 0x8p-152 i): Exception "Underflow" set >>>> >>>> The sysdeps/x86/fpu/fenv_private.h states: >>>> >>>> 296 #ifdef __x86_64__ >>>> 297 /* The SSE rounding mode is used by soft-fp (libgcc and glibc) on >>>> 298 x86_64, so that must be set for float128 computations. */ >>>> 299 # define SET_RESTORE_ROUNDF128(RM) \ >>>> 300 SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround_sse, libc_feresetround_sse) >>>> >>>> So >>>> >>>>>> diff --git a/sysdeps/ieee754/ldbl-128/e_expl.c b/sysdeps/ieee754/ldbl-128/e_expl.c >>>>>> index 37c1538c08..104ace1690 100644 >>>>>> --- a/sysdeps/ieee754/ldbl-128/e_expl.c >>>>>> +++ b/sysdeps/ieee754/ldbl-128/e_expl.c >>>>>> @@ -66,6 +66,7 @@ >>>>>> #include <inttypes.h> >>>>>> #include <math-barriers.h> >>>>>> #include <math_private.h> >>>>>> +#include <fenv_private.h> >>>>>> #include <math-underflow.h> >>>>>> #include <stdlib.h> >>>>>> #include "t_expl.h" >>>>>> @@ -146,9 +147,10 @@ __ieee754_expl (_Float128 x) >>>>>> union ieee854_long_double ex2_u, scale_u; >>>>>> fenv_t oldenv; >>>>>> - feholdexcept (&oldenv); >>>>>> #ifdef FE_TONEAREST >>>>>> - fesetround (FE_TONEAREST); >>>>>> + libc_feholdexcept_setroundl (&oldenv, FE_TONEAREST); >>>> >>>> Should be libc_feholdexcept_setroundf128. >>> >>> But it does not see to help here, so I don't know what is failing as well. >> >> Ok, so what is happening __sfp_handle_exceptions always use 387 exception >> mode for FP_EX_OVERFLOW and FP_EX_UNDERFLOW: >> >> config/i386/sfp-exceptions.c >> >> 79 if (_fex & FP_EX_OVERFLOW) >> 80 { >> 81 struct fenv temp; >> 82 asm volatile ("fnstenv\t%0" : "=m" (temp)); >> 83 temp.__status_word |= FP_EX_OVERFLOW; >> 84 asm volatile ("fldenv\t%0" : : "m" (temp)); >> 85 asm volatile ("fwait"); >> 86 } >> 87 if (_fex & FP_EX_UNDERFLOW) >> 88 { >> 89 struct fenv temp; >> 90 asm volatile ("fnstenv\t%0" : "=m" (temp)); >> 91 temp.__status_word |= FP_EX_UNDERFLOW; >> 92 asm volatile ("fldenv\t%0" : : "m" (temp)); >> 93 asm volatile ("fwait"); >> 94 } >> Yes this looks like the mentioned disassembly. >> Different that FP_EX_INEXACT, for instance, where __SSE_MATH__ sets >> whether SSE is used or not. >> >> So I think it is not safe to use the SSE variants for libc_*_testf128, >> as for i387 we should use the default_* instead. >> > I've just switched to default_* in sysdeps/x86/fpu/fenv_private.h: > -#ifdef __x86_64__ > +#if 0 > /* The SSE rounding mode is used by soft-fp (libgcc and glibc) on > x86_64, so that must be set for float128 computations. */ > # define SET_RESTORE_ROUNDF128(RM) \ > > But now there are 7 testfails. For some of them, the max.ulp goes up (over 10), but there are even worse cases. Here are shortened excerpts of the out files: > - math/test-float128-clog.out: > Failure: Test: Real part of: clog_towardzero (0x2.82b795e420b281a934c6dd315cb2p-4 + 0xf.cd42a15bf9a361243a89663e81e8p-4 i) > ulp : 162259276829213363391578010288127.0000 > max.ulp : 3.0000 > Failure: Test: Real part of: clog_upward (0x2.82b795e420b281a934c6dd315cb2p-4 + 0xf.cd42a15bf9a361243a89663e81e8p-4 i) > ulp : 162259276829213363391578010288128.0000 > max.ulp : 4.0000 > > - math/test-float128-clog10.out: > Failure: Test: Real part of: clog10_downward (0x3.bea2bd62e35p-4 + 0xf.8e3d619a8d11bfd30b038eep-4 i) > ulp : 4.0000 > max.ulp : 3.0000 > Failure: Test: Real part of: clog10_towardzero (0x2.82b795e420b281a934c6dd315cb2p-4 + 0xf.cd42a15bf9a361243a89663e81e8p-4 i) > ulp : 140936617129079063283494433422698.0000 > max.ulp : 4.0000 > Failure: Test: Real part of: clog10_upward (0x2.82b795e420b281a934c6dd315cb2p-4 + 0xf.cd42a15bf9a361243a89663e81e8p-4 i) > ulp : 140936617129079063283494433422698.0000 > max.ulp : 4.0000 > > - math/test-float128-jn.out > - math/test-float128-lgamma.out > - math/test-float128-tgamma.out: > something like: > ulp : 12.0000 > max.ulp : 4.0000 > > Failure: tgamma_upward (-0x6.ec00000000000008p+8): errno set to 0, expected 34 (ERANGE) > > - math/test-float128-y1.out: > Failure: Test: y1_downward (0x2p+0) > ulp : 13.0000 > max.ulp : 4.0000 > Failure: Test: y1_towardzero (0x2p+0) > ulp : 6.0000 > max.ulp : 2.0000 > Failure: Test: y1_upward (0x2p+0) > ulp : 10.0000 > max.ulp : 5.0000 > > - math/test-float128-yn.out > So it seems the issue is the mix on how libm fenv function, the internal libc_fe*, and libgcc handles the exception register. The exported fenv operates on both i387 and SSE (since it should work on long double as well), and the internal libc_fe* will set either SSE for float, double, and float128 and i387 for long double (as expected). The libgcc, however, will set either SEE or i387 depending of the exception. This broke the assumption of libc_fe* for float128 where either SSE or i387 will be used. One option might be to force libgcc to not use its __sfp_handle_exceptions on x86_64 and provide one that uses only SEE operations since libgcc does not use 'long double' on float128 operations. The patch below does it and applied on top your patches shows no regressions. And I think we should fix libgcc in a similar manner, since checking on config/i386/64/sfp-machine.h it only support SSE rounding mode. -- diff --git a/sysdeps/x86/fpu/sfp-exceptions.c b/sysdeps/x86/fpu/sfp-exceptions.c new file mode 100644 index 0000000000..676f396bc3 --- /dev/null +++ b/sysdeps/x86/fpu/sfp-exceptions.c @@ -0,0 +1,49 @@ +#include <fenv.h> +#include <float.h> +#include <math-barriers.h> + +#define FP_EX_INVALID 0x01 +#define FP_EX_DENORM 0x02 +#define FP_EX_DIVZERO 0x04 +#define FP_EX_OVERFLOW 0x08 +#define FP_EX_UNDERFLOW 0x10 +#define FP_EX_INEXACT 0x20 +#define FP_EX_ALL \ + (FP_EX_INVALID | FP_EX_DENORM | FP_EX_DIVZERO | FP_EX_OVERFLOW \ + | FP_EX_UNDERFLOW | FP_EX_INEXACT) + +void +__sfp_handle_exceptions (int _fex) +{ + if (_fex & FP_EX_INVALID) + { + float f = 0.0f; + math_force_eval (f / f); + } + if (_fex & FP_EX_DENORM) + { + float f = FLT_MIN, g = 2.0f; + math_force_eval (f / g); + } + if (_fex & FP_EX_DIVZERO) + { + float f = 1.0f, g = 0.0f; + math_force_eval (f / g); + } + if (_fex & FP_EX_OVERFLOW) + { + float force_underflow = FLT_MAX * FLT_MAX; + math_force_eval (force_underflow); + } + if (_fex & FP_EX_UNDERFLOW) + { + float force_overflow = FLT_MIN * FLT_MIN; + math_force_eval (force_overflow); + } + if (_fex & FP_EX_INEXACT) + { + float f = 1.0f, g = 3.0f; + math_force_eval (f / g); + } +} +strong_alias (__sfp_handle_exceptions, __wrap___sfp_handle_exceptions) diff --git a/sysdeps/x86_64/fpu/Makefile b/sysdeps/x86_64/fpu/Makefile index a4ff2723a8..5becb96fa3 100644 --- a/sysdeps/x86_64/fpu/Makefile +++ b/sysdeps/x86_64/fpu/Makefile @@ -25,6 +25,9 @@ endif # Variables for libmvec tests. ifeq ($(subdir),math) +libm-routines += sfp-exceptions +LDFLAGS-m.so += -Wl,--wrap=__sfp_handle_exceptions + ifeq ($(build-mathvec),yes) libmvec-tests += double-vlen2 double-vlen4 double-vlen4-avx2 \ float-vlen4 float-vlen8 float-vlen8-avx2 ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 3/4] Use libc_fe* macros in ldbl-128/e_expl.c. 2020-03-26 14:53 ` Adhemerval Zanella @ 2020-03-27 14:23 ` Stefan Liebler 2020-03-30 18:12 ` Adhemerval Zanella 0 siblings, 1 reply; 14+ messages in thread From: Stefan Liebler @ 2020-03-27 14:23 UTC (permalink / raw) To: libc-alpha On 3/26/20 3:53 PM, Adhemerval Zanella via Libc-alpha wrote: > > > On 26/03/2020 06:08, Stefan Liebler via Libc-alpha wrote: >> On 3/25/20 4:42 PM, Adhemerval Zanella via Libc-alpha wrote: >>> >>> >>> On 25/03/2020 12:07, Adhemerval Zanella wrote: >>>> >>>> >>>> On 25/03/2020 12:00, Adhemerval Zanella wrote: >>>>> >>>>> >>>>> On 25/03/2020 07:13, Stefan Liebler via Libc-alpha wrote: >>>>>> Unfortunately, this patch is responsible for testfails on x86_64: >>>>>> >>>>>> math/test-float128-exp.out: >>>>>> Failure: exp (-0x1p-10000): Exception "Underflow" set >>>>>> Failure: exp (-0x2p-16384): Exception "Underflow" set >>>>>> ... >>>>>> >>>>>> math/test-float128-cexp.out: >>>>>> Failure: Real part of: cexp (0x2p-16384 - 0x4p-1076 i): Exception "Underflow" set >>>>>> Failure: Real part of: cexp (0x2p-16384 - 0x8p-152 i): Exception "Underflow" set >>>>> >>>>> The sysdeps/x86/fpu/fenv_private.h states: >>>>> >>>>> 296 #ifdef __x86_64__ >>>>> 297 /* The SSE rounding mode is used by soft-fp (libgcc and glibc) on >>>>> 298 x86_64, so that must be set for float128 computations. */ >>>>> 299 # define SET_RESTORE_ROUNDF128(RM) \ >>>>> 300 SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround_sse, libc_feresetround_sse) >>>>> >>>>> So >>>>> >>>>>>> diff --git a/sysdeps/ieee754/ldbl-128/e_expl.c b/sysdeps/ieee754/ldbl-128/e_expl.c >>>>>>> index 37c1538c08..104ace1690 100644 >>>>>>> --- a/sysdeps/ieee754/ldbl-128/e_expl.c >>>>>>> +++ b/sysdeps/ieee754/ldbl-128/e_expl.c >>>>>>> @@ -66,6 +66,7 @@ >>>>>>> #include <inttypes.h> >>>>>>> #include <math-barriers.h> >>>>>>> #include <math_private.h> >>>>>>> +#include <fenv_private.h> >>>>>>> #include <math-underflow.h> >>>>>>> #include <stdlib.h> >>>>>>> #include "t_expl.h" >>>>>>> @@ -146,9 +147,10 @@ __ieee754_expl (_Float128 x) >>>>>>> union ieee854_long_double ex2_u, scale_u; >>>>>>> fenv_t oldenv; >>>>>>> - feholdexcept (&oldenv); >>>>>>> #ifdef FE_TONEAREST >>>>>>> - fesetround (FE_TONEAREST); >>>>>>> + libc_feholdexcept_setroundl (&oldenv, FE_TONEAREST); >>>>> >>>>> Should be libc_feholdexcept_setroundf128. >>>> >>>> But it does not see to help here, so I don't know what is failing as well. >>> >>> Ok, so what is happening __sfp_handle_exceptions always use 387 exception >>> mode for FP_EX_OVERFLOW and FP_EX_UNDERFLOW: >>> >>> config/i386/sfp-exceptions.c >>> >>> 79 if (_fex & FP_EX_OVERFLOW) >>> 80 { >>> 81 struct fenv temp; >>> 82 asm volatile ("fnstenv\t%0" : "=m" (temp)); >>> 83 temp.__status_word |= FP_EX_OVERFLOW; >>> 84 asm volatile ("fldenv\t%0" : : "m" (temp)); >>> 85 asm volatile ("fwait"); >>> 86 } >>> 87 if (_fex & FP_EX_UNDERFLOW) >>> 88 { >>> 89 struct fenv temp; >>> 90 asm volatile ("fnstenv\t%0" : "=m" (temp)); >>> 91 temp.__status_word |= FP_EX_UNDERFLOW; >>> 92 asm volatile ("fldenv\t%0" : : "m" (temp)); >>> 93 asm volatile ("fwait"); >>> 94 } >>> Yes this looks like the mentioned disassembly. >>> Different that FP_EX_INEXACT, for instance, where __SSE_MATH__ sets >>> whether SSE is used or not. >>> >>> So I think it is not safe to use the SSE variants for libc_*_testf128, >>> as for i387 we should use the default_* instead. >>> >> I've just switched to default_* in sysdeps/x86/fpu/fenv_private.h: >> -#ifdef __x86_64__ >> +#if 0 >> /* The SSE rounding mode is used by soft-fp (libgcc and glibc) on >> x86_64, so that must be set for float128 computations. */ >> # define SET_RESTORE_ROUNDF128(RM) \ >> >> But now there are 7 testfails. For some of them, the max.ulp goes up (over 10), but there are even worse cases. Here are shortened excerpts of the out files: >> - math/test-float128-clog.out: >> Failure: Test: Real part of: clog_towardzero (0x2.82b795e420b281a934c6dd315cb2p-4 + 0xf.cd42a15bf9a361243a89663e81e8p-4 i) >> ulp : 162259276829213363391578010288127.0000 >> max.ulp : 3.0000 >> Failure: Test: Real part of: clog_upward (0x2.82b795e420b281a934c6dd315cb2p-4 + 0xf.cd42a15bf9a361243a89663e81e8p-4 i) >> ulp : 162259276829213363391578010288128.0000 >> max.ulp : 4.0000 >> >> - math/test-float128-clog10.out: >> Failure: Test: Real part of: clog10_downward (0x3.bea2bd62e35p-4 + 0xf.8e3d619a8d11bfd30b038eep-4 i) >> ulp : 4.0000 >> max.ulp : 3.0000 >> Failure: Test: Real part of: clog10_towardzero (0x2.82b795e420b281a934c6dd315cb2p-4 + 0xf.cd42a15bf9a361243a89663e81e8p-4 i) >> ulp : 140936617129079063283494433422698.0000 >> max.ulp : 4.0000 >> Failure: Test: Real part of: clog10_upward (0x2.82b795e420b281a934c6dd315cb2p-4 + 0xf.cd42a15bf9a361243a89663e81e8p-4 i) >> ulp : 140936617129079063283494433422698.0000 >> max.ulp : 4.0000 >> >> - math/test-float128-jn.out >> - math/test-float128-lgamma.out >> - math/test-float128-tgamma.out: >> something like: >> ulp : 12.0000 >> max.ulp : 4.0000 >> >> Failure: tgamma_upward (-0x6.ec00000000000008p+8): errno set to 0, expected 34 (ERANGE) >> >> - math/test-float128-y1.out: >> Failure: Test: y1_downward (0x2p+0) >> ulp : 13.0000 >> max.ulp : 4.0000 >> Failure: Test: y1_towardzero (0x2p+0) >> ulp : 6.0000 >> max.ulp : 2.0000 >> Failure: Test: y1_upward (0x2p+0) >> ulp : 10.0000 >> max.ulp : 5.0000 >> >> - math/test-float128-yn.out >> > > So it seems the issue is the mix on how libm fenv function, the internal > libc_fe*, and libgcc handles the exception register. The exported fenv > operates on both i387 and SSE (since it should work on long double as well), > and the internal libc_fe* will set either SSE for float, double, and float128 > and i387 for long double (as expected). > > The libgcc, however, will set either SEE or i387 depending of the exception. > This broke the assumption of libc_fe* for float128 where either SSE or i387 > will be used. > > One option might be to force libgcc to not use its __sfp_handle_exceptions > on x86_64 and provide one that uses only SEE operations since libgcc does > not use 'long double' on float128 operations. The patch below does it > and applied on top your patches shows no regressions. Great news. Thanks Adhemerval. I've also successfully build and run the testsuite with your patch on top of mine and with only your patch without mine. As e.g. __multf3 or __addtf3 is used in various f128 functions, can you please first commit your patch? Then I will add a reference to this commit id in the commit-message. One other question: Why are the soft-fp functions (for add / multiply) called at all. Are the corresponding hardware instructions not available on all x86_64 machines? Or do we miss a compiler flag? > > And I think we should fix libgcc in a similar manner, since checking on > config/i386/64/sfp-machine.h it only support SSE rounding mode. > > -- > > diff --git a/sysdeps/x86/fpu/sfp-exceptions.c b/sysdeps/x86/fpu/sfp-exceptions.c > new file mode 100644 > index 0000000000..676f396bc3 > --- /dev/null > +++ b/sysdeps/x86/fpu/sfp-exceptions.c > @@ -0,0 +1,49 @@ > +#include <fenv.h> > +#include <float.h> > +#include <math-barriers.h> > + > +#define FP_EX_INVALID 0x01 > +#define FP_EX_DENORM 0x02 > +#define FP_EX_DIVZERO 0x04 > +#define FP_EX_OVERFLOW 0x08 > +#define FP_EX_UNDERFLOW 0x10 > +#define FP_EX_INEXACT 0x20 > +#define FP_EX_ALL \ > + (FP_EX_INVALID | FP_EX_DENORM | FP_EX_DIVZERO | FP_EX_OVERFLOW \ > + | FP_EX_UNDERFLOW | FP_EX_INEXACT) > + > +void > +__sfp_handle_exceptions (int _fex) > +{ > + if (_fex & FP_EX_INVALID) > + { > + float f = 0.0f; > + math_force_eval (f / f); > + } > + if (_fex & FP_EX_DENORM) > + { > + float f = FLT_MIN, g = 2.0f; > + math_force_eval (f / g); > + } > + if (_fex & FP_EX_DIVZERO) > + { > + float f = 1.0f, g = 0.0f; > + math_force_eval (f / g); > + } > + if (_fex & FP_EX_OVERFLOW) > + { > + float force_underflow = FLT_MAX * FLT_MAX; > + math_force_eval (force_underflow); > + } > + if (_fex & FP_EX_UNDERFLOW) > + { > + float force_overflow = FLT_MIN * FLT_MIN; > + math_force_eval (force_overflow); > + } > + if (_fex & FP_EX_INEXACT) > + { > + float f = 1.0f, g = 3.0f; > + math_force_eval (f / g); > + } > +} > +strong_alias (__sfp_handle_exceptions, __wrap___sfp_handle_exceptions) > diff --git a/sysdeps/x86_64/fpu/Makefile b/sysdeps/x86_64/fpu/Makefile > index a4ff2723a8..5becb96fa3 100644 > --- a/sysdeps/x86_64/fpu/Makefile > +++ b/sysdeps/x86_64/fpu/Makefile > @@ -25,6 +25,9 @@ endif > > # Variables for libmvec tests. > ifeq ($(subdir),math) > +libm-routines += sfp-exceptions > +LDFLAGS-m.so += -Wl,--wrap=__sfp_handle_exceptions > + > ifeq ($(build-mathvec),yes) > libmvec-tests += double-vlen2 double-vlen4 double-vlen4-avx2 \ > float-vlen4 float-vlen8 float-vlen8-avx2 > ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 3/4] Use libc_fe* macros in ldbl-128/e_expl.c. 2020-03-27 14:23 ` Stefan Liebler @ 2020-03-30 18:12 ` Adhemerval Zanella 2020-03-31 7:39 ` Stefan Liebler 0 siblings, 1 reply; 14+ messages in thread From: Adhemerval Zanella @ 2020-03-30 18:12 UTC (permalink / raw) To: libc-alpha On 27/03/2020 11:23, Stefan Liebler via Libc-alpha wrote: > On 3/26/20 3:53 PM, Adhemerval Zanella via Libc-alpha wrote: >> >> >> On 26/03/2020 06:08, Stefan Liebler via Libc-alpha wrote: >>> On 3/25/20 4:42 PM, Adhemerval Zanella via Libc-alpha wrote: >>>> >>>> >>>> On 25/03/2020 12:07, Adhemerval Zanella wrote: >>>>> >>>>> >>>>> On 25/03/2020 12:00, Adhemerval Zanella wrote: >>>>>> >>>>>> >>>>>> On 25/03/2020 07:13, Stefan Liebler via Libc-alpha wrote: >>>>>>> Unfortunately, this patch is responsible for testfails on x86_64: >>>>>>> >>>>>>> math/test-float128-exp.out: >>>>>>> Failure: exp (-0x1p-10000): Exception "Underflow" set >>>>>>> Failure: exp (-0x2p-16384): Exception "Underflow" set >>>>>>> ... >>>>>>> >>>>>>> math/test-float128-cexp.out: >>>>>>> Failure: Real part of: cexp (0x2p-16384 - 0x4p-1076 i): Exception "Underflow" set >>>>>>> Failure: Real part of: cexp (0x2p-16384 - 0x8p-152 i): Exception "Underflow" set >>>>>> >>>>>> The sysdeps/x86/fpu/fenv_private.h states: >>>>>> >>>>>> 296 #ifdef __x86_64__ >>>>>> 297 /* The SSE rounding mode is used by soft-fp (libgcc and glibc) on >>>>>> 298 x86_64, so that must be set for float128 computations. */ >>>>>> 299 # define SET_RESTORE_ROUNDF128(RM) \ >>>>>> 300 SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround_sse, libc_feresetround_sse) >>>>>> >>>>>> So >>>>>> >>>>>>>> diff --git a/sysdeps/ieee754/ldbl-128/e_expl.c b/sysdeps/ieee754/ldbl-128/e_expl.c >>>>>>>> index 37c1538c08..104ace1690 100644 >>>>>>>> --- a/sysdeps/ieee754/ldbl-128/e_expl.c >>>>>>>> +++ b/sysdeps/ieee754/ldbl-128/e_expl.c >>>>>>>> @@ -66,6 +66,7 @@ >>>>>>>> #include <inttypes.h> >>>>>>>> #include <math-barriers.h> >>>>>>>> #include <math_private.h> >>>>>>>> +#include <fenv_private.h> >>>>>>>> #include <math-underflow.h> >>>>>>>> #include <stdlib.h> >>>>>>>> #include "t_expl.h" >>>>>>>> @@ -146,9 +147,10 @@ __ieee754_expl (_Float128 x) >>>>>>>> union ieee854_long_double ex2_u, scale_u; >>>>>>>> fenv_t oldenv; >>>>>>>> - feholdexcept (&oldenv); >>>>>>>> #ifdef FE_TONEAREST >>>>>>>> - fesetround (FE_TONEAREST); >>>>>>>> + libc_feholdexcept_setroundl (&oldenv, FE_TONEAREST); >>>>>> >>>>>> Should be libc_feholdexcept_setroundf128. >>>>> >>>>> But it does not see to help here, so I don't know what is failing as well. >>>> >>>> Ok, so what is happening __sfp_handle_exceptions always use 387 exception >>>> mode for FP_EX_OVERFLOW and FP_EX_UNDERFLOW: >>>> >>>> config/i386/sfp-exceptions.c >>>> >>>> 79 if (_fex & FP_EX_OVERFLOW) >>>> 80 { >>>> 81 struct fenv temp; >>>> 82 asm volatile ("fnstenv\t%0" : "=m" (temp)); >>>> 83 temp.__status_word |= FP_EX_OVERFLOW; >>>> 84 asm volatile ("fldenv\t%0" : : "m" (temp)); >>>> 85 asm volatile ("fwait"); >>>> 86 } >>>> 87 if (_fex & FP_EX_UNDERFLOW) >>>> 88 { >>>> 89 struct fenv temp; >>>> 90 asm volatile ("fnstenv\t%0" : "=m" (temp)); >>>> 91 temp.__status_word |= FP_EX_UNDERFLOW; >>>> 92 asm volatile ("fldenv\t%0" : : "m" (temp)); >>>> 93 asm volatile ("fwait"); >>>> 94 } >>>> Yes this looks like the mentioned disassembly. >>>> Different that FP_EX_INEXACT, for instance, where __SSE_MATH__ sets >>>> whether SSE is used or not. >>>> >>>> So I think it is not safe to use the SSE variants for libc_*_testf128, >>>> as for i387 we should use the default_* instead. >>>> >>> I've just switched to default_* in sysdeps/x86/fpu/fenv_private.h: >>> -#ifdef __x86_64__ >>> +#if 0 >>> /* The SSE rounding mode is used by soft-fp (libgcc and glibc) on >>> x86_64, so that must be set for float128 computations. */ >>> # define SET_RESTORE_ROUNDF128(RM) \ >>> >>> But now there are 7 testfails. For some of them, the max.ulp goes up (over 10), but there are even worse cases. Here are shortened excerpts of the out files: >>> - math/test-float128-clog.out: >>> Failure: Test: Real part of: clog_towardzero (0x2.82b795e420b281a934c6dd315cb2p-4 + 0xf.cd42a15bf9a361243a89663e81e8p-4 i) >>> ulp : 162259276829213363391578010288127.0000 >>> max.ulp : 3.0000 >>> Failure: Test: Real part of: clog_upward (0x2.82b795e420b281a934c6dd315cb2p-4 + 0xf.cd42a15bf9a361243a89663e81e8p-4 i) >>> ulp : 162259276829213363391578010288128.0000 >>> max.ulp : 4.0000 >>> >>> - math/test-float128-clog10.out: >>> Failure: Test: Real part of: clog10_downward (0x3.bea2bd62e35p-4 + 0xf.8e3d619a8d11bfd30b038eep-4 i) >>> ulp : 4.0000 >>> max.ulp : 3.0000 >>> Failure: Test: Real part of: clog10_towardzero (0x2.82b795e420b281a934c6dd315cb2p-4 + 0xf.cd42a15bf9a361243a89663e81e8p-4 i) >>> ulp : 140936617129079063283494433422698.0000 >>> max.ulp : 4.0000 >>> Failure: Test: Real part of: clog10_upward (0x2.82b795e420b281a934c6dd315cb2p-4 + 0xf.cd42a15bf9a361243a89663e81e8p-4 i) >>> ulp : 140936617129079063283494433422698.0000 >>> max.ulp : 4.0000 >>> >>> - math/test-float128-jn.out >>> - math/test-float128-lgamma.out >>> - math/test-float128-tgamma.out: >>> something like: >>> ulp : 12.0000 >>> max.ulp : 4.0000 >>> >>> Failure: tgamma_upward (-0x6.ec00000000000008p+8): errno set to 0, expected 34 (ERANGE) >>> >>> - math/test-float128-y1.out: >>> Failure: Test: y1_downward (0x2p+0) >>> ulp : 13.0000 >>> max.ulp : 4.0000 >>> Failure: Test: y1_towardzero (0x2p+0) >>> ulp : 6.0000 >>> max.ulp : 2.0000 >>> Failure: Test: y1_upward (0x2p+0) >>> ulp : 10.0000 >>> max.ulp : 5.0000 >>> >>> - math/test-float128-yn.out >>> >> >> So it seems the issue is the mix on how libm fenv function, the internal >> libc_fe*, and libgcc handles the exception register. The exported fenv >> operates on both i387 and SSE (since it should work on long double as well), >> and the internal libc_fe* will set either SSE for float, double, and float128 >> and i387 for long double (as expected). >> >> The libgcc, however, will set either SEE or i387 depending of the exception. >> This broke the assumption of libc_fe* for float128 where either SSE or i387 >> will be used. >> >> One option might be to force libgcc to not use its __sfp_handle_exceptions >> on x86_64 and provide one that uses only SEE operations since libgcc does >> not use 'long double' on float128 operations. The patch below does it >> and applied on top your patches shows no regressions. > > Great news. Thanks Adhemerval. > I've also successfully build and run the testsuite with your patch on top of mine and with only your patch without mine. > > As e.g. __multf3 or __addtf3 is used in various f128 functions, can you please first commit your patch? Then I will add a reference to this commit id in the commit-message. I will send a RFC for this patch, we need to check with x86 maintainers if this the desirable direction and if I got everything right. > > One other question: Why are the soft-fp functions (for add / multiply) called at all. Are the corresponding hardware instructions not available on all x86_64 machines? Or do we miss a compiler flag? The float128 on gcc/x86_64 is implemented by soft-fp library in libgcc [1] and its ABI passes arguments through SSE register [2]. [1] https://stackoverflow.com/questions/26639477/what-exactly-is-a-float128-if-im-using-gcc-4-9-on-x86-64 [2] https://github.com/hjl-tools/x86-psABI/wiki/X86-psABI ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 3/4] Use libc_fe* macros in ldbl-128/e_expl.c. 2020-03-30 18:12 ` Adhemerval Zanella @ 2020-03-31 7:39 ` Stefan Liebler 0 siblings, 0 replies; 14+ messages in thread From: Stefan Liebler @ 2020-03-31 7:39 UTC (permalink / raw) To: libc-alpha On 3/30/20 8:12 PM, Adhemerval Zanella via Libc-alpha wrote: > > > On 27/03/2020 11:23, Stefan Liebler via Libc-alpha wrote: >> On 3/26/20 3:53 PM, Adhemerval Zanella via Libc-alpha wrote: >>> >>> >>> On 26/03/2020 06:08, Stefan Liebler via Libc-alpha wrote: >>>> On 3/25/20 4:42 PM, Adhemerval Zanella via Libc-alpha wrote: >>>>> >>>>> >>>>> On 25/03/2020 12:07, Adhemerval Zanella wrote: >>>>>> >>>>>> >>>>>> On 25/03/2020 12:00, Adhemerval Zanella wrote: >>>>>>> >>>>>>> >>>>>>> On 25/03/2020 07:13, Stefan Liebler via Libc-alpha wrote: >>>>>>>> Unfortunately, this patch is responsible for testfails on x86_64: >>>>>>>> >>>>>>>> math/test-float128-exp.out: >>>>>>>> Failure: exp (-0x1p-10000): Exception "Underflow" set >>>>>>>> Failure: exp (-0x2p-16384): Exception "Underflow" set >>>>>>>> ... >>>>>>>> >>>>>>>> math/test-float128-cexp.out: >>>>>>>> Failure: Real part of: cexp (0x2p-16384 - 0x4p-1076 i): Exception "Underflow" set >>>>>>>> Failure: Real part of: cexp (0x2p-16384 - 0x8p-152 i): Exception "Underflow" set >>>>>>> >>>>>>> The sysdeps/x86/fpu/fenv_private.h states: >>>>>>> >>>>>>> 296 #ifdef __x86_64__ >>>>>>> 297 /* The SSE rounding mode is used by soft-fp (libgcc and glibc) on >>>>>>> 298 x86_64, so that must be set for float128 computations. */ >>>>>>> 299 # define SET_RESTORE_ROUNDF128(RM) \ >>>>>>> 300 SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround_sse, libc_feresetround_sse) >>>>>>> >>>>>>> So >>>>>>> >>>>>>>>> diff --git a/sysdeps/ieee754/ldbl-128/e_expl.c b/sysdeps/ieee754/ldbl-128/e_expl.c >>>>>>>>> index 37c1538c08..104ace1690 100644 >>>>>>>>> --- a/sysdeps/ieee754/ldbl-128/e_expl.c >>>>>>>>> +++ b/sysdeps/ieee754/ldbl-128/e_expl.c >>>>>>>>> @@ -66,6 +66,7 @@ >>>>>>>>> #include <inttypes.h> >>>>>>>>> #include <math-barriers.h> >>>>>>>>> #include <math_private.h> >>>>>>>>> +#include <fenv_private.h> >>>>>>>>> #include <math-underflow.h> >>>>>>>>> #include <stdlib.h> >>>>>>>>> #include "t_expl.h" >>>>>>>>> @@ -146,9 +147,10 @@ __ieee754_expl (_Float128 x) >>>>>>>>> union ieee854_long_double ex2_u, scale_u; >>>>>>>>> fenv_t oldenv; >>>>>>>>> - feholdexcept (&oldenv); >>>>>>>>> #ifdef FE_TONEAREST >>>>>>>>> - fesetround (FE_TONEAREST); >>>>>>>>> + libc_feholdexcept_setroundl (&oldenv, FE_TONEAREST); >>>>>>> >>>>>>> Should be libc_feholdexcept_setroundf128. >>>>>> >>>>>> But it does not see to help here, so I don't know what is failing as well. >>>>> >>>>> Ok, so what is happening __sfp_handle_exceptions always use 387 exception >>>>> mode for FP_EX_OVERFLOW and FP_EX_UNDERFLOW: >>>>> >>>>> config/i386/sfp-exceptions.c >>>>> >>>>> 79 if (_fex & FP_EX_OVERFLOW) >>>>> 80 { >>>>> 81 struct fenv temp; >>>>> 82 asm volatile ("fnstenv\t%0" : "=m" (temp)); >>>>> 83 temp.__status_word |= FP_EX_OVERFLOW; >>>>> 84 asm volatile ("fldenv\t%0" : : "m" (temp)); >>>>> 85 asm volatile ("fwait"); >>>>> 86 } >>>>> 87 if (_fex & FP_EX_UNDERFLOW) >>>>> 88 { >>>>> 89 struct fenv temp; >>>>> 90 asm volatile ("fnstenv\t%0" : "=m" (temp)); >>>>> 91 temp.__status_word |= FP_EX_UNDERFLOW; >>>>> 92 asm volatile ("fldenv\t%0" : : "m" (temp)); >>>>> 93 asm volatile ("fwait"); >>>>> 94 } >>>>> Yes this looks like the mentioned disassembly. >>>>> Different that FP_EX_INEXACT, for instance, where __SSE_MATH__ sets >>>>> whether SSE is used or not. >>>>> >>>>> So I think it is not safe to use the SSE variants for libc_*_testf128, >>>>> as for i387 we should use the default_* instead. >>>>> >>>> I've just switched to default_* in sysdeps/x86/fpu/fenv_private.h: >>>> -#ifdef __x86_64__ >>>> +#if 0 >>>> /* The SSE rounding mode is used by soft-fp (libgcc and glibc) on >>>> x86_64, so that must be set for float128 computations. */ >>>> # define SET_RESTORE_ROUNDF128(RM) \ >>>> >>>> But now there are 7 testfails. For some of them, the max.ulp goes up (over 10), but there are even worse cases. Here are shortened excerpts of the out files: >>>> - math/test-float128-clog.out: >>>> Failure: Test: Real part of: clog_towardzero (0x2.82b795e420b281a934c6dd315cb2p-4 + 0xf.cd42a15bf9a361243a89663e81e8p-4 i) >>>> ulp : 162259276829213363391578010288127.0000 >>>> max.ulp : 3.0000 >>>> Failure: Test: Real part of: clog_upward (0x2.82b795e420b281a934c6dd315cb2p-4 + 0xf.cd42a15bf9a361243a89663e81e8p-4 i) >>>> ulp : 162259276829213363391578010288128.0000 >>>> max.ulp : 4.0000 >>>> >>>> - math/test-float128-clog10.out: >>>> Failure: Test: Real part of: clog10_downward (0x3.bea2bd62e35p-4 + 0xf.8e3d619a8d11bfd30b038eep-4 i) >>>> ulp : 4.0000 >>>> max.ulp : 3.0000 >>>> Failure: Test: Real part of: clog10_towardzero (0x2.82b795e420b281a934c6dd315cb2p-4 + 0xf.cd42a15bf9a361243a89663e81e8p-4 i) >>>> ulp : 140936617129079063283494433422698.0000 >>>> max.ulp : 4.0000 >>>> Failure: Test: Real part of: clog10_upward (0x2.82b795e420b281a934c6dd315cb2p-4 + 0xf.cd42a15bf9a361243a89663e81e8p-4 i) >>>> ulp : 140936617129079063283494433422698.0000 >>>> max.ulp : 4.0000 >>>> >>>> - math/test-float128-jn.out >>>> - math/test-float128-lgamma.out >>>> - math/test-float128-tgamma.out: >>>> something like: >>>> ulp : 12.0000 >>>> max.ulp : 4.0000 >>>> >>>> Failure: tgamma_upward (-0x6.ec00000000000008p+8): errno set to 0, expected 34 (ERANGE) >>>> >>>> - math/test-float128-y1.out: >>>> Failure: Test: y1_downward (0x2p+0) >>>> ulp : 13.0000 >>>> max.ulp : 4.0000 >>>> Failure: Test: y1_towardzero (0x2p+0) >>>> ulp : 6.0000 >>>> max.ulp : 2.0000 >>>> Failure: Test: y1_upward (0x2p+0) >>>> ulp : 10.0000 >>>> max.ulp : 5.0000 >>>> >>>> - math/test-float128-yn.out >>>> >>> >>> So it seems the issue is the mix on how libm fenv function, the internal >>> libc_fe*, and libgcc handles the exception register. The exported fenv >>> operates on both i387 and SSE (since it should work on long double as well), >>> and the internal libc_fe* will set either SSE for float, double, and float128 >>> and i387 for long double (as expected). >>> >>> The libgcc, however, will set either SEE or i387 depending of the exception. >>> This broke the assumption of libc_fe* for float128 where either SSE or i387 >>> will be used. >>> >>> One option might be to force libgcc to not use its __sfp_handle_exceptions >>> on x86_64 and provide one that uses only SEE operations since libgcc does >>> not use 'long double' on float128 operations. The patch below does it >>> and applied on top your patches shows no regressions. >> >> Great news. Thanks Adhemerval. >> I've also successfully build and run the testsuite with your patch on top of mine and with only your patch without mine. >> >> As e.g. __multf3 or __addtf3 is used in various f128 functions, can you please first commit your patch? Then I will add a reference to this commit id in the commit-message. > > I will send a RFC for this patch, we need to check with x86 maintainers > if this the desirable direction and if I got everything right. > >> >> One other question: Why are the soft-fp functions (for add / multiply) called at all. Are the corresponding hardware instructions not available on all x86_64 machines? Or do we miss a compiler flag? > > The float128 on gcc/x86_64 is implemented by soft-fp library in libgcc [1] > and its ABI passes arguments through SSE register [2]. > > [1] https://stackoverflow.com/questions/26639477/what-exactly-is-a-float128-if-im-using-gcc-4-9-on-x86-64 > [2] https://github.com/hjl-tools/x86-psABI/wiki/X86-psABI > Thanks for the info and for working on this. ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 3/4] Use libc_fe* macros in ldbl-128/e_expl.c. 2020-03-25 15:07 ` Adhemerval Zanella 2020-03-25 15:42 ` Adhemerval Zanella @ 2020-03-26 9:08 ` Stefan Liebler 1 sibling, 0 replies; 14+ messages in thread From: Stefan Liebler @ 2020-03-26 9:08 UTC (permalink / raw) To: libc-alpha On 3/25/20 4:07 PM, Adhemerval Zanella via Libc-alpha wrote: > > > On 25/03/2020 12:00, Adhemerval Zanella wrote: >> >> >> On 25/03/2020 07:13, Stefan Liebler via Libc-alpha wrote: >>> Unfortunately, this patch is responsible for testfails on x86_64: >>> >>> math/test-float128-exp.out: >>> Failure: exp (-0x1p-10000): Exception "Underflow" set >>> Failure: exp (-0x2p-16384): Exception "Underflow" set >>> ... >>> >>> math/test-float128-cexp.out: >>> Failure: Real part of: cexp (0x2p-16384 - 0x4p-1076 i): Exception "Underflow" set >>> Failure: Real part of: cexp (0x2p-16384 - 0x8p-152 i): Exception "Underflow" set >> >> The sysdeps/x86/fpu/fenv_private.h states: >> >> 296 #ifdef __x86_64__ >> 297 /* The SSE rounding mode is used by soft-fp (libgcc and glibc) on >> 298 x86_64, so that must be set for float128 computations. */ >> 299 # define SET_RESTORE_ROUNDF128(RM) \ >> 300 SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround_sse, libc_feresetround_sse) >> >> So >> >>>> diff --git a/sysdeps/ieee754/ldbl-128/e_expl.c b/sysdeps/ieee754/ldbl-128/e_expl.c >>>> index 37c1538c08..104ace1690 100644 >>>> --- a/sysdeps/ieee754/ldbl-128/e_expl.c >>>> +++ b/sysdeps/ieee754/ldbl-128/e_expl.c >>>> @@ -66,6 +66,7 @@ >>>> #include <inttypes.h> >>>> #include <math-barriers.h> >>>> #include <math_private.h> >>>> +#include <fenv_private.h> >>>> #include <math-underflow.h> >>>> #include <stdlib.h> >>>> #include "t_expl.h" >>>> @@ -146,9 +147,10 @@ __ieee754_expl (_Float128 x) >>>> union ieee854_long_double ex2_u, scale_u; >>>> fenv_t oldenv; >>>> - feholdexcept (&oldenv); >>>> #ifdef FE_TONEAREST >>>> - fesetround (FE_TONEAREST); >>>> + libc_feholdexcept_setroundl (&oldenv, FE_TONEAREST); >> >> Should be libc_feholdexcept_setroundf128. > > But it does not see to help here, so I don't know what is failing as well. > It does not help as this is already the case. The float128 exp is build with sysdeps/ieee754/float128/e_expf128.c which includes sysdeps/ieee754/float128/float128_private.h before including sysdeps/ieee754/ldbl-128/e_expl.c. float128_private.h contains things like that: ... #include <fenv_private.h> ... #ifdef libc_feholdexcept_setroundf128 # undef libc_feholdexcept_setroundl # define libc_feholdexcept_setroundl(ENV, RM) \ libc_feholdexcept_setroundf128 (ENV, RM) #endif ... #ifdef libc_fesetenvf128 # undef libc_fesetenvl # define libc_fesetenvl(ENV) libc_fesetenvf128 (ENV) #endif >> >>>> +#else >>>> + libc_feholdexceptl (&oldenv); >> >> And here libc_fesetenvf128. >> >>>> #endif >>>> /* Calculate n. */ >>>> @@ -198,7 +200,7 @@ __ieee754_expl (_Float128 x) >>>> math_force_eval (x22); >>>> /* Return result. */ >>>> - fesetenv (&oldenv); >>>> + libc_fesetenvl (&oldenv); >>>> result = x22 * ex2_u.d + ex2_u.d; >> >> It might require extend the libc_*f128 macros to other architectures >> (not sure). >> ^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 4/4] Use libc_fe* macros in k_standardl.c. 2020-03-25 10:06 [PATCH 1/4] Use libc_fe* macros in ldbl-128/s_nearbyintl.c Stefan Liebler 2020-03-25 10:06 ` [PATCH 2/4] Use libc_fe* macros in ldbl-128/s_fmal.c Stefan Liebler 2020-03-25 10:06 ` [PATCH 3/4] Use libc_fe* macros in ldbl-128/e_expl.c Stefan Liebler @ 2020-03-25 10:06 ` Stefan Liebler 2 siblings, 0 replies; 14+ messages in thread From: Stefan Liebler @ 2020-03-25 10:06 UTC (permalink / raw) To: libc-alpha; +Cc: Stefan Liebler The calls to feholdexcept and fesetenv are replaced by the libc_fe* macros. --- sysdeps/ieee754/k_standardl.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sysdeps/ieee754/k_standardl.c b/sysdeps/ieee754/k_standardl.c index a93d53cde9..286741879d 100644 --- a/sysdeps/ieee754/k_standardl.c +++ b/sysdeps/ieee754/k_standardl.c @@ -34,6 +34,7 @@ #include <math-barriers.h> #include <math-svid-compat.h> #include <fenv.h> +#include <fenv_private.h> #include <float.h> #include <errno.h> @@ -53,12 +54,12 @@ __kernel_standard_l (long double x, long double y, int type) struct exception exc; fenv_t env; - feholdexcept (&env); + libc_feholdexceptl (&env); dx = x; dy = y; math_force_eval (dx); math_force_eval (dy); - fesetenv (&env); + libc_fesetenvl (&env); switch (type) { -- 2.23.0 ^ permalink raw reply [flat|nested] 14+ messages in thread
end of thread, other threads:[~2020-03-31 7:39 UTC | newest] Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2020-03-25 10:06 [PATCH 1/4] Use libc_fe* macros in ldbl-128/s_nearbyintl.c Stefan Liebler 2020-03-25 10:06 ` [PATCH 2/4] Use libc_fe* macros in ldbl-128/s_fmal.c Stefan Liebler 2020-03-25 10:06 ` [PATCH 3/4] Use libc_fe* macros in ldbl-128/e_expl.c Stefan Liebler 2020-03-25 10:13 ` Stefan Liebler 2020-03-25 15:00 ` Adhemerval Zanella 2020-03-25 15:07 ` Adhemerval Zanella 2020-03-25 15:42 ` Adhemerval Zanella 2020-03-26 9:08 ` Stefan Liebler 2020-03-26 14:53 ` Adhemerval Zanella 2020-03-27 14:23 ` Stefan Liebler 2020-03-30 18:12 ` Adhemerval Zanella 2020-03-31 7:39 ` Stefan Liebler 2020-03-26 9:08 ` Stefan Liebler 2020-03-25 10:06 ` [PATCH 4/4] Use libc_fe* macros in k_standardl.c Stefan Liebler
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).