[PATCH 3/4] Add ILP32 support to aarch64

public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed

* [PATCH 3/4] Add ILP32 support to aarch64
@ 2017-08-03 15:36 Steve Ellcey
  2017-08-03 17:47 ` Joseph Myers
  0 siblings, 1 reply; 13+ messages in thread
From: Steve Ellcey @ 2017-08-03 15:36 UTC (permalink / raw)
  To: libc-alpha

[-- Attachment #1: Type: text/plain, Size: 1202 bytes --]

Here are some fixes to the floating point to integer conversion
routines for aarch64.

Steve Ellcey
sellcey@cavium.com

2017-08-03Â Â Steve EllceyÂ Â <sellcey@cavium.com>

	* sysdeps/aarch64/fpu/math_private.h (libc_feclearexcept_aarch64):
	New function.
	(libc_feclearexcept, libc_feclearexceptf, libc_feclearexceptl):
	New defines.
	* sysdeps/aarch64/fpu/s_llrint.c (OREG_SIZE): New macro.
	* sysdeps/aarch64/fpu/s_llround.c (OREG_SIZE): Likewise.
	* sysdeps/aarch64/fpu/s_llrintf.c (OREGS, IREGS): Remove.
	(IREG_SIZE, OREG_SIZE): New macros.
	* sysdeps/aarch64/fpu/s_llroundf.c: (OREGS, IREGS): Remove.
	(IREG_SIZE, OREG_SIZE): New macros.
	* sysdeps/aarch64/fpu/s_lrintf.c (IREGS): Remove.
	(IREG_SIZE): New macro.
	* sysdeps/aarch64/fpu/s_lroundf.c (IREGS): Remove.
	(IREG_SIZE): New macro.
	* sysdeps/aarch64/fpu/s_lrint.c (math_private.h): New include.
	(IREG_SIZE, OREG_SIZE): Initialize if not already set.
	(OREGS, IREGS): Set based on IREG_SIZE and OREG_SIZE.
	(__CONCATX): Clear INEXACT if INVALID is set.
	* sysdeps/aarch64/fpu/s_lround.c (IREG_SIZE, OREG_SIZE):
	Initialize if not already set.
Â Â Â Â Â Â Â Â (OREGS, IREGS): Set based on IREG_SIZE and OREG_SIZE.

[-- Attachment #2: aarch64-ilp32-math.patch --]
[-- Type: text/x-patch, Size: 5073 bytes --]

diff --git a/sysdeps/aarch64/fpu/math_private.h b/sysdeps/aarch64/fpu/math_private.h
index 807111e..d5a141d5 100644
--- a/sysdeps/aarch64/fpu/math_private.h
+++ b/sysdeps/aarch64/fpu/math_private.h
@@ -134,6 +134,20 @@ libc_fetestexcept_aarch64 (int ex)
 #define libc_fetestexceptl libc_fetestexcept_aarch64
 
 static __always_inline void
+libc_feclearexcept_aarch64 (int ex)
+{
+  fpu_fpsr_t fpsr;
+
+  _FPU_GETFPSR (fpsr);
+  fpsr &= ~((fpu_fpsr_t) ex);
+  _FPU_SETFPSR (fpsr);
+}
+
+#define libc_feclearexcept  libc_feclearexcept_aarch64
+#define libc_feclearexceptf libc_feclearexcept_aarch64
+#define libc_feclearexceptl libc_feclearexcept_aarch64
+
+static __always_inline void
 libc_fesetenv_aarch64 (const fenv_t *envp)
 {
   fpu_control_t fpcr;
diff --git a/sysdeps/aarch64/fpu/s_llrint.c b/sysdeps/aarch64/fpu/s_llrint.c
index c0d0d0e..57821c0 100644
--- a/sysdeps/aarch64/fpu/s_llrint.c
+++ b/sysdeps/aarch64/fpu/s_llrint.c
@@ -18,4 +18,5 @@
 
 #define FUNC llrint
 #define OTYPE long long int
+#define OREG_SIZE 64
 #include <s_lrint.c>
diff --git a/sysdeps/aarch64/fpu/s_llrintf.c b/sysdeps/aarch64/fpu/s_llrintf.c
index 67724c6..98ed4f8 100644
--- a/sysdeps/aarch64/fpu/s_llrintf.c
+++ b/sysdeps/aarch64/fpu/s_llrintf.c
@@ -18,6 +18,7 @@
 
 #define FUNC llrintf
 #define ITYPE float
-#define IREGS "s"
+#define IREG_SIZE 32
 #define OTYPE long long int
+#define OREG_SIZE 64
 #include <s_lrint.c>
diff --git a/sysdeps/aarch64/fpu/s_llround.c b/sysdeps/aarch64/fpu/s_llround.c
index ed4b192..ef7aedf 100644
--- a/sysdeps/aarch64/fpu/s_llround.c
+++ b/sysdeps/aarch64/fpu/s_llround.c
@@ -18,4 +18,5 @@
 
 #define FUNC llround
 #define OTYPE long long int
+#define OREG_SIZE 64
 #include <s_lround.c>
diff --git a/sysdeps/aarch64/fpu/s_llroundf.c b/sysdeps/aarch64/fpu/s_llroundf.c
index 360ce8b..294f0f4 100644
--- a/sysdeps/aarch64/fpu/s_llroundf.c
+++ b/sysdeps/aarch64/fpu/s_llroundf.c
@@ -18,6 +18,7 @@
 
 #define FUNC llroundf
 #define ITYPE float
-#define IREGS "s"
+#define IREG_SIZE 32
 #define OTYPE long long int
+#define OREG_SIZE 64
 #include <s_lround.c>
diff --git a/sysdeps/aarch64/fpu/s_lrint.c b/sysdeps/aarch64/fpu/s_lrint.c
index 8c61a03..9f90385 100644
--- a/sysdeps/aarch64/fpu/s_lrint.c
+++ b/sysdeps/aarch64/fpu/s_lrint.c
@@ -16,7 +16,9 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
+#include <math_private.h>
 #include <math.h>
+#include <fenv.h>
 
 #ifndef FUNC
 # define FUNC lrint
@@ -24,18 +26,37 @@
 
 #ifndef ITYPE
 # define ITYPE double
-# define IREGS "d"
+# define IREG_SIZE 64
 #else
-# ifndef IREGS
-#  error IREGS not defined
+# ifndef IREG_SIZE
+#  error IREG_SIZE not defined
 # endif
 #endif
 
 #ifndef OTYPE
 # define OTYPE long int
+# ifdef __ILP32__
+#  define OREG_SIZE 32
+# else
+#  define OREG_SIZE 64
+# endif
+#else
+# ifndef OREG_SIZE
+#  error OREG_SIZE not defined
+# endif
 #endif
 
-#define OREGS "x"
+#if IREG_SIZE == 32
+# define IREGS "s"
+#else
+# define IREGS "d"
+#endif
+
+#if OREG_SIZE == 32
+# define OREGS "w"
+#else
+# define OREGS "x"
+#endif
 
 #define __CONCATX(a,b) __CONCAT(a,b)
 
@@ -47,6 +68,13 @@ __CONCATX(__,FUNC) (ITYPE x)
   asm ( "frintx" "\t%" IREGS "1, %" IREGS "2\n\t"
         "fcvtzs" "\t%" OREGS "0, %" IREGS "1"
         : "=r" (result), "=w" (temp) : "w" (x) );
+#if OREG_SIZE == 32
+  /* The rounding step may set FE_INEXEXACT and converting to a 32 bit
+     value may set FE_INVALID.  We do not want FE_INEXACT set when
+     FE_INVALID has been set.  */
+  if (libc_fetestexcept_aarch64 (FE_INVALID))
+    libc_feclearexcept_aarch64 (FE_INEXACT);
+#endif
   return result;
 }
 
diff --git a/sysdeps/aarch64/fpu/s_lrintf.c b/sysdeps/aarch64/fpu/s_lrintf.c
index a995e4b..2e73271 100644
--- a/sysdeps/aarch64/fpu/s_lrintf.c
+++ b/sysdeps/aarch64/fpu/s_lrintf.c
@@ -18,5 +18,5 @@
 
 #define FUNC lrintf
 #define ITYPE float
-#define IREGS "s"
+#define IREG_SIZE 32
 #include <s_lrint.c>
diff --git a/sysdeps/aarch64/fpu/s_lround.c b/sysdeps/aarch64/fpu/s_lround.c
index 9be9e7f..1f77d82 100644
--- a/sysdeps/aarch64/fpu/s_lround.c
+++ b/sysdeps/aarch64/fpu/s_lround.c
@@ -24,18 +24,37 @@
 
 #ifndef ITYPE
 # define ITYPE double
-# define IREGS "d"
+# define IREG_SIZE 64
 #else
-# ifndef IREGS
-#  error IREGS not defined
+# ifndef IREG_SIZE
+#  error IREG_SIZE not defined
 # endif
 #endif
 
 #ifndef OTYPE
 # define OTYPE long int
+# ifdef __ILP32__
+#  define OREG_SIZE 32
+# else
+#  define OREG_SIZE 64
+# endif
+#else
+# ifndef OREG_SIZE
+#  error OREG_SIZE not defined
+# endif
+#endif
+
+#if IREG_SIZE == 32
+# define IREGS "s"
+#else
+# define IREGS "d"
 #endif
 
-#define OREGS "x"
+#if OREG_SIZE == 32
+# define OREGS "w"
+#else
+# define OREGS "x"
+#endif
 
 #define __CONCATX(a,b) __CONCAT(a,b)
 
diff --git a/sysdeps/aarch64/fpu/s_lroundf.c b/sysdeps/aarch64/fpu/s_lroundf.c
index 4a066d4..b30ddb6 100644
--- a/sysdeps/aarch64/fpu/s_lroundf.c
+++ b/sysdeps/aarch64/fpu/s_lroundf.c
@@ -18,5 +18,5 @@
 
 #define FUNC lroundf
 #define ITYPE float
-#define IREGS "s"
+#define IREG_SIZE 32
 #include <s_lround.c>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/4] Add ILP32 support to aarch64
  2017-08-03 15:36 [PATCH 3/4] Add ILP32 support to aarch64 Steve Ellcey
@ 2017-08-03 17:47 ` Joseph Myers
  2017-08-03 18:22   ` Steve Ellcey
  0 siblings, 1 reply; 13+ messages in thread
From: Joseph Myers @ 2017-08-03 17:47 UTC (permalink / raw)
  To: Steve Ellcey; +Cc: libc-alpha

On Thu, 3 Aug 2017, Steve Ellcey wrote:

> +#if OREG_SIZE == 32
> +  /* The rounding step may set FE_INEXEXACT and converting to a 32 bit
> +     value may set FE_INVALID.  We do not want FE_INEXACT set when
> +     FE_INVALID has been set.  */
> +  if (libc_fetestexcept_aarch64 (FE_INVALID))
> +    libc_feclearexcept_aarch64 (FE_INEXACT);
> +#endif

This sort of thing is never correct, because it would clear an "inexact" 
exception that was already set on entry to the function, and functions 
other than <fenv.h> specified to do so should never clear already-raised 
exceptions.

(Also, typo "FE_INEXEXACT".)

-- 
Joseph S. Myers
joseph@codesourcery.com

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/4] Add ILP32 support to aarch64
  2017-08-03 17:47 ` Joseph Myers
@ 2017-08-03 18:22   ` Steve Ellcey
  2017-08-03 19:48     ` Joseph Myers
  0 siblings, 1 reply; 13+ messages in thread
From: Steve Ellcey @ 2017-08-03 18:22 UTC (permalink / raw)
  To: Joseph Myers; +Cc: libc-alpha

On Thu, 2017-08-03 at 17:47 +0000, Joseph Myers wrote:
> On Thu, 3 Aug 2017, Steve Ellcey wrote:
> 
> > 
> > +#if OREG_SIZE == 32
> > +Â Â /* The rounding step may set FE_INEXEXACT and converting to a 32
> > bit
> > +Â Â Â Â Â value may set FE_INVALID.Â Â We do not want FE_INEXACT set when
> > +Â Â Â Â Â FE_INVALID has been set.Â Â */
> > +Â Â if (libc_fetestexcept_aarch64 (FE_INVALID))
> > +Â Â Â Â libc_feclearexcept_aarch64 (FE_INEXACT);
> > +#endif
> This sort of thing is never correct, because it would clear an
> "inexact"Â 
> exception that was already set on entry to the function, and
> functionsÂ 
> other than <fenv.h> specified to do so should never clear already-
> raisedÂ 
> exceptions.
> 
> (Also, typo "FE_INEXEXACT".)

I hadn't considered that. Â So maybe I could save the environment
(feholdexcept), do the calculation and see which exceptions, if any,
got raised. Â Then restore the original environment (fesetenv) and raise
one or the other exceptions if needed. Â Does that sound like a workable
solution?

Steve Ellcey
sellcey@cavium.com

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/4] Add ILP32 support to aarch64
  2017-08-03 18:22   ` Steve Ellcey
@ 2017-08-03 19:48     ` Joseph Myers
  0 siblings, 0 replies; 13+ messages in thread
From: Joseph Myers @ 2017-08-03 19:48 UTC (permalink / raw)
  To: Steve Ellcey; +Cc: libc-alpha

[-- Attachment #1: Type: text/plain, Size: 786 bytes --]

On Thu, 3 Aug 2017, Steve Ellcey wrote:

> I hadn't considered that. Â So maybe I could save the environment
> (feholdexcept), do the calculation and see which exceptions, if any,
> got raised. Â Then restore the original environment (fesetenv) and raise
> one or the other exceptions if needed. Â Does that sound like a workable
> solution?

Yes, you could do that (see x86/x86_64 nearbyintl for example; that needs 
to preserve "invalid" for signaling NaNs, avoid "inexact" from frndint, 
but not clear any "inexact" that was already raised).  Whether doing so is 
optimal, versus using another implementation of the function for ILP32, 
may depend on the AArch64 performance characteristics of saving and 
restoring the environment.

-- 
Joseph S. Myers
joseph@codesourcery.com

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/4] Add ILP32 support to aarch64
@ 2017-08-03 22:33 Wilco Dijkstra
  2017-08-04  0:12 ` Joseph Myers
  0 siblings, 1 reply; 13+ messages in thread
From: Wilco Dijkstra @ 2017-08-03 22:33 UTC (permalink / raw)
  To: Ellcey, Steve, Joseph Myers; +Cc: nd, libc-alpha

Steve Ellecy wrote:
> I hadn't considered that.  So maybe I could save the environment
> (feholdexcept), do the calculation and see which exceptions, if any,
> got raised.  Then restore the original environment (fesetenv) and raise
> one or the other exceptions if needed.  Does that sound like a workable
> solution?

The fenv calls are slow on fast OoO cores so are a bad idea in fast paths.
The generic implementation may well be faster... I'm not sure where the
requirement of not raising inexact comes from (I don't see it in the definition
of lrint, and we generally don't care since inexact is set by almost every FP
calculation), but if it is absolutely required you'd special case values larger
than LONG_MAX.

Wilco

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/4] Add ILP32 support to aarch64
  2017-08-03 22:33 Wilco Dijkstra
@ 2017-08-04  0:12 ` Joseph Myers
  2017-08-04 23:15   ` Steve Ellcey
  0 siblings, 1 reply; 13+ messages in thread
From: Joseph Myers @ 2017-08-04  0:12 UTC (permalink / raw)
  To: Wilco Dijkstra; +Cc: Ellcey, Steve, nd, libc-alpha

On Thu, 3 Aug 2017, Wilco Dijkstra wrote:

> The generic implementation may well be faster... I'm not sure where the
> requirement of not raising inexact comes from (I don't see it in the definition
> of lrint, and we generally don't care since inexact is set by almost every FP
> calculation), but if it is absolutely required you'd special case values larger
> than LONG_MAX.

The requirement comes from lrint being bound to IEEE 754 conversion 
operations, so only raising inexact under the conditions specified and no 
spurious inexact.

-- 
Joseph S. Myers
joseph@codesourcery.com

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/4] Add ILP32 support to aarch64
  2017-08-04  0:12 ` Joseph Myers
@ 2017-08-04 23:15   ` Steve Ellcey
  2017-08-08 15:02     ` Szabolcs Nagy
  0 siblings, 1 reply; 13+ messages in thread
From: Steve Ellcey @ 2017-08-04 23:15 UTC (permalink / raw)
  To: Joseph Myers, Wilco Dijkstra; +Cc: Ellcey, Steve, nd, libc-alpha

[-- Attachment #1: Type: text/plain, Size: 1795 bytes --]

On Fri, 2017-08-04 at 00:12 +0000, Joseph Myers wrote:
> On Thu, 3 Aug 2017, Wilco Dijkstra wrote:
> 
> > The generic implementation may well be faster... I'm not sure where the
> > requirement of not raising inexact comes from (I don't see it in the definition
> > of lrint, and we generally don't care since inexact is set by almost every FP
> > calculation), but if it is absolutely required you'd special case values larger
> > than LONG_MAX.
> The requirement comes from lrint being bound to IEEE 754 conversionÂ 
> operations, so only raising inexact under the conditions specified and noÂ 
> spurious inexact.


Here is a new version of this patch. Â It (mostly) avoids fenv calls
when not needed and preserves any exceptions that may be set on entry
to the function.

Steve Ellcey
sellcey@cavium.com


2017-08-04Â Â Steve EllceyÂ Â <sellcey@cavium.com>

	* sysdeps/aarch64/fpu/s_llrint.c (OREG_SIZE): New macro.
	* sysdeps/aarch64/fpu/s_llround.c (OREG_SIZE): Likewise.
	* sysdeps/aarch64/fpu/s_llrintf.c (OREGS, IREGS): Remove.
	(IREG_SIZE, OREG_SIZE): New macros.
	* sysdeps/aarch64/fpu/s_llroundf.c: (OREGS, IREGS): Remove.
	(IREG_SIZE, OREG_SIZE): New macros.
	* sysdeps/aarch64/fpu/s_lrintf.c (IREGS): Remove.
	(IREG_SIZE): New macro.
	* sysdeps/aarch64/fpu/s_lroundf.c (IREGS): Remove.
	(IREG_SIZE): New macro.
	* sysdeps/aarch64/fpu/s_lrint.c (math_private.h, fenv.h, stdint.h):
	New includes.
	(IREG_SIZE, OREG_SIZE): Initialize if not already set.
	(OREGS, IREGS): Set based on IREG_SIZE and OREG_SIZE.
	(__CONCATX): Handle exceptions correctly on large values that may
	set FE_INVALID.
	* sysdeps/aarch64/fpu/s_lround.c (IREG_SIZE, OREG_SIZE):
	Initialize if not already set.
Â Â Â Â Â Â Â Â (OREGS, IREGS): Set based on IREG_SIZE and OREG_SIZE.


[-- Attachment #2: aarch64-ilp32-math.patch --]
[-- Type: text/x-patch, Size: 4879 bytes --]

diff --git a/sysdeps/aarch64/fpu/s_llrint.c b/sysdeps/aarch64/fpu/s_llrint.c
index c0d0d0e..57821c0 100644
--- a/sysdeps/aarch64/fpu/s_llrint.c
+++ b/sysdeps/aarch64/fpu/s_llrint.c
@@ -18,4 +18,5 @@
 
 #define FUNC llrint
 #define OTYPE long long int
+#define OREG_SIZE 64
 #include <s_lrint.c>
diff --git a/sysdeps/aarch64/fpu/s_llrintf.c b/sysdeps/aarch64/fpu/s_llrintf.c
index 67724c6..98ed4f8 100644
--- a/sysdeps/aarch64/fpu/s_llrintf.c
+++ b/sysdeps/aarch64/fpu/s_llrintf.c
@@ -18,6 +18,7 @@
 
 #define FUNC llrintf
 #define ITYPE float
-#define IREGS "s"
+#define IREG_SIZE 32
 #define OTYPE long long int
+#define OREG_SIZE 64
 #include <s_lrint.c>
diff --git a/sysdeps/aarch64/fpu/s_llround.c b/sysdeps/aarch64/fpu/s_llround.c
index ed4b192..ef7aedf 100644
--- a/sysdeps/aarch64/fpu/s_llround.c
+++ b/sysdeps/aarch64/fpu/s_llround.c
@@ -18,4 +18,5 @@
 
 #define FUNC llround
 #define OTYPE long long int
+#define OREG_SIZE 64
 #include <s_lround.c>
diff --git a/sysdeps/aarch64/fpu/s_llroundf.c b/sysdeps/aarch64/fpu/s_llroundf.c
index 360ce8b..294f0f4 100644
--- a/sysdeps/aarch64/fpu/s_llroundf.c
+++ b/sysdeps/aarch64/fpu/s_llroundf.c
@@ -18,6 +18,7 @@
 
 #define FUNC llroundf
 #define ITYPE float
-#define IREGS "s"
+#define IREG_SIZE 32
 #define OTYPE long long int
+#define OREG_SIZE 64
 #include <s_lround.c>
diff --git a/sysdeps/aarch64/fpu/s_lrint.c b/sysdeps/aarch64/fpu/s_lrint.c
index 8c61a03..19f9b5b 100644
--- a/sysdeps/aarch64/fpu/s_lrint.c
+++ b/sysdeps/aarch64/fpu/s_lrint.c
@@ -16,7 +16,10 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
+#include <math_private.h>
 #include <math.h>
+#include <fenv.h>
+#include <stdint.h>
 
 #ifndef FUNC
 # define FUNC lrint
@@ -24,18 +27,37 @@
 
 #ifndef ITYPE
 # define ITYPE double
-# define IREGS "d"
+# define IREG_SIZE 64
 #else
-# ifndef IREGS
-#  error IREGS not defined
+# ifndef IREG_SIZE
+#  error IREG_SIZE not defined
 # endif
 #endif
 
 #ifndef OTYPE
 # define OTYPE long int
+# ifdef __ILP32__
+#  define OREG_SIZE 32
+# else
+#  define OREG_SIZE 64
+# endif
+#else
+# ifndef OREG_SIZE
+#  error OREG_SIZE not defined
+# endif
+#endif
+
+#if IREG_SIZE == 32
+# define IREGS "s"
+#else
+# define IREGS "d"
 #endif
 
-#define OREGS "x"
+#if OREG_SIZE == 32
+# define OREGS "w"
+#else
+# define OREGS "x"
+#endif
 
 #define __CONCATX(a,b) __CONCAT(a,b)
 
@@ -44,6 +66,33 @@ __CONCATX(__,FUNC) (ITYPE x)
 {
   OTYPE result;
   ITYPE temp;
+
+#if IREG_SIZE == 64 && OREG_SIZE == 32
+  if (__builtin_fabs (x) > INT32_MAX - 2)
+    {
+      /* Converting large values to a 32 bit in may cause the frintx/fcvtza
+	 sequence to set both FE_INVALID and FE_INEXACT.  To avoid this
+         we save and restore the FE and only set one or the other.  */
+
+      fenv_t env;
+      bool invalid_p, inexact_p;
+
+      libc_feholdexcept (&env);
+      asm ( "frintx" "\t%" IREGS "1, %" IREGS "2\n\t"
+	    "fcvtzs" "\t%" OREGS "0, %" IREGS "1"
+	    : "=r" (result), "=w" (temp) : "w" (x) );
+      invalid_p = libc_fetestexcept (FE_INVALID);
+      inexact_p = libc_fetestexcept (FE_INEXACT);
+      libc_fesetenv (&env);
+
+      if (invalid_p)
+	feraiseexcept (FE_INVALID);
+      else if (inexact_p)
+	feraiseexcept (FE_INEXACT);
+
+      return result;
+  }
+#endif
   asm ( "frintx" "\t%" IREGS "1, %" IREGS "2\n\t"
         "fcvtzs" "\t%" OREGS "0, %" IREGS "1"
         : "=r" (result), "=w" (temp) : "w" (x) );
diff --git a/sysdeps/aarch64/fpu/s_lrintf.c b/sysdeps/aarch64/fpu/s_lrintf.c
index a995e4b..2e73271 100644
--- a/sysdeps/aarch64/fpu/s_lrintf.c
+++ b/sysdeps/aarch64/fpu/s_lrintf.c
@@ -18,5 +18,5 @@
 
 #define FUNC lrintf
 #define ITYPE float
-#define IREGS "s"
+#define IREG_SIZE 32
 #include <s_lrint.c>
diff --git a/sysdeps/aarch64/fpu/s_lround.c b/sysdeps/aarch64/fpu/s_lround.c
index 9be9e7f..1f77d82 100644
--- a/sysdeps/aarch64/fpu/s_lround.c
+++ b/sysdeps/aarch64/fpu/s_lround.c
@@ -24,18 +24,37 @@
 
 #ifndef ITYPE
 # define ITYPE double
-# define IREGS "d"
+# define IREG_SIZE 64
 #else
-# ifndef IREGS
-#  error IREGS not defined
+# ifndef IREG_SIZE
+#  error IREG_SIZE not defined
 # endif
 #endif
 
 #ifndef OTYPE
 # define OTYPE long int
+# ifdef __ILP32__
+#  define OREG_SIZE 32
+# else
+#  define OREG_SIZE 64
+# endif
+#else
+# ifndef OREG_SIZE
+#  error OREG_SIZE not defined
+# endif
+#endif
+
+#if IREG_SIZE == 32
+# define IREGS "s"
+#else
+# define IREGS "d"
 #endif
 
-#define OREGS "x"
+#if OREG_SIZE == 32
+# define OREGS "w"
+#else
+# define OREGS "x"
+#endif
 
 #define __CONCATX(a,b) __CONCAT(a,b)
 
diff --git a/sysdeps/aarch64/fpu/s_lroundf.c b/sysdeps/aarch64/fpu/s_lroundf.c
index 4a066d4..b30ddb6 100644
--- a/sysdeps/aarch64/fpu/s_lroundf.c
+++ b/sysdeps/aarch64/fpu/s_lroundf.c
@@ -18,5 +18,5 @@
 
 #define FUNC lroundf
 #define ITYPE float
-#define IREGS "s"
+#define IREG_SIZE 32
 #include <s_lround.c>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/4] Add ILP32 support to aarch64
  2017-08-04 23:15   ` Steve Ellcey
@ 2017-08-08 15:02     ` Szabolcs Nagy
  2017-08-08 15:23       ` Szabolcs Nagy
                         ` (2 more replies)
  0 siblings, 3 replies; 13+ messages in thread
From: Szabolcs Nagy @ 2017-08-08 15:02 UTC (permalink / raw)
  To: sellcey, Joseph Myers, Wilco Dijkstra; +Cc: nd, Ellcey, Steve, libc-alpha

On 05/08/17 00:15, Steve Ellcey wrote:
> On Fri, 2017-08-04 at 00:12 +0000, Joseph Myers wrote:
>> > On Thu, 3 Aug 2017, Wilco Dijkstra wrote:
>> > 
>>> > > The generic implementation may well be faster... I'm not sure where the
>>> > > requirement of not raising inexact comes from (I don't see it in the definition
>>> > > of lrint, and we generally don't care since inexact is set by almost every FP
>>> > > calculation), but if it is absolutely required you'd special case values larger
>>> > > than LONG_MAX.
>> > The requirement comes from lrint being bound to IEEE 754 conversion 
>> > operations, so only raising inexact under the conditions specified and no 
>> > spurious inexact.
> 
> Here is a new version of this patch.  It (mostly) avoids fenv calls
> when not needed and preserves any exceptions that may be set on entry
> to the function.
> 
...
> +#if IREG_SIZE == 64 && OREG_SIZE == 32
> +  if (__builtin_fabs (x) > INT32_MAX - 2)

i don't understand the -2 here.

> +    {
> +      /* Converting large values to a 32 bit in may cause the frintx/fcvtza

s/in/int/

> +	 sequence to set both FE_INVALID and FE_INEXACT.  To avoid this
> +         we save and restore the FE and only set one or the other.  */
> +
> +      fenv_t env;
> +      bool invalid_p, inexact_p;
> +
> +      libc_feholdexcept (&env);
> +      asm ( "frintx" "\t%" IREGS "1, %" IREGS "2\n\t"
> +	    "fcvtzs" "\t%" OREGS "0, %" IREGS "1"
> +	    : "=r" (result), "=w" (temp) : "w" (x) );
> +      invalid_p = libc_fetestexcept (FE_INVALID);
> +      inexact_p = libc_fetestexcept (FE_INEXACT);

multiple flags can be tested/raised in a single call.

> +      libc_fesetenv (&env);
> +
> +      if (invalid_p)
> +	feraiseexcept (FE_INVALID);
> +      else if (inexact_p)
> +	feraiseexcept (FE_INEXACT);
> +

i think correct trapping is not guaranteed by glibc,
only correct status flags when the function returns,
so spurious inexact is not a problem if it is already
raised, and then i expect better code gen for the
inexact clearing approach:

if (fabs (x) > INT32_MAX && fetestexcept (FE_INEXACT) == 0)
  {
    asm (...);
    if (fetestexcept (FE_INVALID|FE_INEXACT) == (FE_INVALID|FE_INEXACT))
      feclearexcept (FE_INEXACT);
  }
else
  asm (...);


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/4] Add ILP32 support to aarch64
  2017-08-08 15:02     ` Szabolcs Nagy
@ 2017-08-08 15:23       ` Szabolcs Nagy
  2017-08-08 17:22       ` Joseph Myers
  2017-08-08 18:10       ` Steve Ellcey
  2 siblings, 0 replies; 13+ messages in thread
From: Szabolcs Nagy @ 2017-08-08 15:23 UTC (permalink / raw)
  To: sellcey, Joseph Myers, Wilco Dijkstra; +Cc: nd, Ellcey, Steve, libc-alpha

On 08/08/17 16:01, Szabolcs Nagy wrote:
> On 05/08/17 00:15, Steve Ellcey wrote:
>> On Fri, 2017-08-04 at 00:12 +0000, Joseph Myers wrote:
>>>> On Thu, 3 Aug 2017, Wilco Dijkstra wrote:
>>>>
>>>>>> The generic implementation may well be faster... I'm not sure where the
>>>>>> requirement of not raising inexact comes from (I don't see it in the definition
>>>>>> of lrint, and we generally don't care since inexact is set by almost every FP
>>>>>> calculation), but if it is absolutely required you'd special case values larger
>>>>>> than LONG_MAX.
>>>> The requirement comes from lrint being bound to IEEE 754 conversion 
>>>> operations, so only raising inexact under the conditions specified and no 
>>>> spurious inexact.
>>
>> Here is a new version of this patch.  It (mostly) avoids fenv calls
>> when not needed and preserves any exceptions that may be set on entry
>> to the function.
>>
> ...
>> +#if IREG_SIZE == 64 && OREG_SIZE == 32
>> +  if (__builtin_fabs (x) > INT32_MAX - 2)
> 
> i don't understand the -2 here.
> 
>> +    {
>> +      /* Converting large values to a 32 bit in may cause the frintx/fcvtza
> 
> s/in/int/
> 
>> +	 sequence to set both FE_INVALID and FE_INEXACT.  To avoid this
>> +         we save and restore the FE and only set one or the other.  */
>> +
>> +      fenv_t env;
>> +      bool invalid_p, inexact_p;
>> +
>> +      libc_feholdexcept (&env);
>> +      asm ( "frintx" "\t%" IREGS "1, %" IREGS "2\n\t"
>> +	    "fcvtzs" "\t%" OREGS "0, %" IREGS "1"
>> +	    : "=r" (result), "=w" (temp) : "w" (x) );
>> +      invalid_p = libc_fetestexcept (FE_INVALID);
>> +      inexact_p = libc_fetestexcept (FE_INEXACT);
> 
> multiple flags can be tested/raised in a single call.
> 
>> +      libc_fesetenv (&env);
>> +
>> +      if (invalid_p)
>> +	feraiseexcept (FE_INVALID);
>> +      else if (inexact_p)
>> +	feraiseexcept (FE_INEXACT);
>> +
> 
> i think correct trapping is not guaranteed by glibc,
> only correct status flags when the function returns,
> so spurious inexact is not a problem if it is already
> raised, and then i expect better code gen for the
> inexact clearing approach:
> 
> if (fabs (x) > INT32_MAX && fetestexcept (FE_INEXACT) == 0)
>   {
>     asm (...);
>     if (fetestexcept (FE_INVALID|FE_INEXACT) == (FE_INVALID|FE_INEXACT))
>       feclearexcept (FE_INEXACT);

Wilco pointed out to me that this approach would be
more complicated because invalid may be already raised
so you need to check that too, clear it if it's set
and restore it at the end..

>   }
> else
>   asm (...);
> 
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/4] Add ILP32 support to aarch64
  2017-08-08 15:02     ` Szabolcs Nagy
  2017-08-08 15:23       ` Szabolcs Nagy
@ 2017-08-08 17:22       ` Joseph Myers
  2017-08-08 18:10       ` Steve Ellcey
  2 siblings, 0 replies; 13+ messages in thread
From: Joseph Myers @ 2017-08-08 17:22 UTC (permalink / raw)
  To: Szabolcs Nagy; +Cc: sellcey, Wilco Dijkstra, nd, Ellcey, Steve, libc-alpha

On Tue, 8 Aug 2017, Szabolcs Nagy wrote:

> i think correct trapping is not guaranteed by glibc,
> only correct status flags when the function returns,
> so spurious inexact is not a problem if it is already
> raised, and then i expect better code gen for the
> inexact clearing approach:

Since we have APIs for enabling / disabling exception traps, I think it's 
expected that any spurious exceptions raised internally will be raised 
inside an feholdexcept context so the user doesn't see traps.  (We do not 
claim anything about the number of times a given exception is raised 
within a function, beyond whether it's zero or nonzero, or about the order 
in which different exceptions are raised by a function raising multiple 
exceptions, or about which subexceptions are raised on architectures such 
as powerpc that support subexceptions.)

-- 
Joseph S. Myers
joseph@codesourcery.com

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/4] Add ILP32 support to aarch64
  2017-08-08 15:02     ` Szabolcs Nagy
  2017-08-08 15:23       ` Szabolcs Nagy
  2017-08-08 17:22       ` Joseph Myers
@ 2017-08-08 18:10       ` Steve Ellcey
  2017-08-28 14:53         ` Richard Henderson
  2 siblings, 1 reply; 13+ messages in thread
From: Steve Ellcey @ 2017-08-08 18:10 UTC (permalink / raw)
  To: Szabolcs Nagy, Joseph Myers, Wilco Dijkstra; +Cc: nd, Ellcey, Steve, libc-alpha

[-- Attachment #1: Type: text/plain, Size: 2945 bytes --]

On Tue, 2017-08-08 at 16:01 +0100, Szabolcs Nagy wrote:
>Â 
> > +#if IREG_SIZE == 64 && OREG_SIZE == 32
> > +Â Â if (__builtin_fabs (x) > INT32_MAX - 2)
> i don't understand the -2 here.

I was confused and trying to handle the fact that fabs(INT32_MIN) !=
INT32_MAX. Â I have removed the -2 and am just comparing to INT32_MAX
and that seems to work fine. Â Since fabs(INT32_MIN) is greater than
INT32_MAX we may unnecessarily enter this if statement for values
between Â INT32_MIN and INT32_MIN+1 but that should not cause any
failures, just a slowdown.

> > +Â Â Â Â {
> > +Â Â Â Â Â Â /* Converting large values to a 32 bit in may cause the
> > frintx/fcvtza
> s/in/int/

Fixed that.

> > +Â Â Â Â Â Â invalid_p = libc_fetestexcept (FE_INVALID);
> > +Â Â Â Â Â Â inexact_p = libc_fetestexcept (FE_INEXACT);
> multiple flags can be tested/raised in a single call.

Good point. Â I changed this to one call and saved the flags in an
integer variable for checking later.

> > +Â Â Â Â Â Â libc_fesetenv (&env);
> > +
> > +Â Â Â Â Â Â if (invalid_p)
> > +	feraiseexcept (FE_INVALID);
> > +Â Â Â Â Â Â else if (inexact_p)
> > +	feraiseexcept (FE_INEXACT);
> > +
> i think correct trapping is not guaranteed by glibc,
> only correct status flags when the function returns,
> so spurious inexact is not a problem if it is already
> raised, and then i expect better code gen for the
> inexact clearing approach:
> 
> if (fabs (x) > INT32_MAX && fetestexcept (FE_INEXACT) == 0)
> Â  {
> Â Â Â Â asm (...);
> Â Â Â Â if (fetestexcept (FE_INVALID|FE_INEXACT) ==
> (FE_INVALID|FE_INEXACT))
> Â Â Â Â Â Â feclearexcept (FE_INEXACT);
> Â  }
> else
> Â  asm (...);

As you mentioned in your followup email, we have to worry about
FE_INVALID being set on entry too. Â I have attached an updated
version of my patch.

Steve Ellcey
sellcey@cavium.com


2017-08-08Â Â Steve EllceyÂ Â <sellcey@cavium.com>

	* sysdeps/aarch64/fpu/s_llrint.c (OREG_SIZE): New macro.
	* sysdeps/aarch64/fpu/s_llround.c (OREG_SIZE): Likewise.
	* sysdeps/aarch64/fpu/s_llrintf.c (OREGS, IREGS): Remove.
	(IREG_SIZE, OREG_SIZE): New macros.
	* sysdeps/aarch64/fpu/s_llroundf.c: (OREGS, IREGS): Remove.
	(IREG_SIZE, OREG_SIZE): New macros.
	* sysdeps/aarch64/fpu/s_lrintf.c (IREGS): Remove.
	(IREG_SIZE): New macro.
	* sysdeps/aarch64/fpu/s_lroundf.c (IREGS): Remove.
	(IREG_SIZE): New macro.
	* sysdeps/aarch64/fpu/s_lrint.c (math_private.h, fenv.h, stdint.h):
	New includes.
	(IREG_SIZE, OREG_SIZE): Initialize if not already set.
	(OREGS, IREGS): Set based on IREG_SIZE and OREG_SIZE.
	(__CONCATX): Handle exceptions correctly on large values that may
	set FE_INVALID.
	* sysdeps/aarch64/fpu/s_lround.c (IREG_SIZE, OREG_SIZE):
	Initialize if not already set.
Â Â Â Â Â Â Â Â (OREGS, IREGS): Set based on IREG_SIZE and OREG_SIZE.

[-- Attachment #2: math.patch --]
[-- Type: text/x-patch, Size: 4844 bytes --]

diff --git a/sysdeps/aarch64/fpu/s_llrint.c b/sysdeps/aarch64/fpu/s_llrint.c
index c0d0d0e..57821c0 100644
--- a/sysdeps/aarch64/fpu/s_llrint.c
+++ b/sysdeps/aarch64/fpu/s_llrint.c
@@ -18,4 +18,5 @@
 
 #define FUNC llrint
 #define OTYPE long long int
+#define OREG_SIZE 64
 #include <s_lrint.c>
diff --git a/sysdeps/aarch64/fpu/s_llrintf.c b/sysdeps/aarch64/fpu/s_llrintf.c
index 67724c6..98ed4f8 100644
--- a/sysdeps/aarch64/fpu/s_llrintf.c
+++ b/sysdeps/aarch64/fpu/s_llrintf.c
@@ -18,6 +18,7 @@
 
 #define FUNC llrintf
 #define ITYPE float
-#define IREGS "s"
+#define IREG_SIZE 32
 #define OTYPE long long int
+#define OREG_SIZE 64
 #include <s_lrint.c>
diff --git a/sysdeps/aarch64/fpu/s_llround.c b/sysdeps/aarch64/fpu/s_llround.c
index ed4b192..ef7aedf 100644
--- a/sysdeps/aarch64/fpu/s_llround.c
+++ b/sysdeps/aarch64/fpu/s_llround.c
@@ -18,4 +18,5 @@
 
 #define FUNC llround
 #define OTYPE long long int
+#define OREG_SIZE 64
 #include <s_lround.c>
diff --git a/sysdeps/aarch64/fpu/s_llroundf.c b/sysdeps/aarch64/fpu/s_llroundf.c
index 360ce8b..294f0f4 100644
--- a/sysdeps/aarch64/fpu/s_llroundf.c
+++ b/sysdeps/aarch64/fpu/s_llroundf.c
@@ -18,6 +18,7 @@
 
 #define FUNC llroundf
 #define ITYPE float
-#define IREGS "s"
+#define IREG_SIZE 32
 #define OTYPE long long int
+#define OREG_SIZE 64
 #include <s_lround.c>
diff --git a/sysdeps/aarch64/fpu/s_lrint.c b/sysdeps/aarch64/fpu/s_lrint.c
index 8c61a03..ed0135c 100644
--- a/sysdeps/aarch64/fpu/s_lrint.c
+++ b/sysdeps/aarch64/fpu/s_lrint.c
@@ -16,7 +16,10 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
+#include <math_private.h>
 #include <math.h>
+#include <fenv.h>
+#include <stdint.h>
 
 #ifndef FUNC
 # define FUNC lrint
@@ -24,18 +27,37 @@
 
 #ifndef ITYPE
 # define ITYPE double
-# define IREGS "d"
+# define IREG_SIZE 64
 #else
-# ifndef IREGS
-#  error IREGS not defined
+# ifndef IREG_SIZE
+#  error IREG_SIZE not defined
 # endif
 #endif
 
 #ifndef OTYPE
 # define OTYPE long int
+# ifdef __ILP32__
+#  define OREG_SIZE 32
+# else
+#  define OREG_SIZE 64
+# endif
+#else
+# ifndef OREG_SIZE
+#  error OREG_SIZE not defined
+# endif
+#endif
+
+#if IREG_SIZE == 32
+# define IREGS "s"
+#else
+# define IREGS "d"
 #endif
 
-#define OREGS "x"
+#if OREG_SIZE == 32
+# define OREGS "w"
+#else
+# define OREGS "x"
+#endif
 
 #define __CONCATX(a,b) __CONCAT(a,b)
 
@@ -44,6 +66,32 @@ __CONCATX(__,FUNC) (ITYPE x)
 {
   OTYPE result;
   ITYPE temp;
+
+#if IREG_SIZE == 64 && OREG_SIZE == 32
+  if (__builtin_fabs (x) > INT32_MAX)
+    {
+      /* Converting large values to a 32 bit int may cause the frintx/fcvtza
+	 sequence to set both FE_INVALID and FE_INEXACT.  To avoid this
+         we save and restore the FE and only set one or the other.  */
+
+      fenv_t env;
+      int feflags;
+
+      libc_feholdexcept (&env);
+      asm ( "frintx" "\t%" IREGS "1, %" IREGS "2\n\t"
+	    "fcvtzs" "\t%" OREGS "0, %" IREGS "1"
+	    : "=r" (result), "=w" (temp) : "w" (x) );
+      feflags = libc_fetestexcept (FE_INVALID | FE_INEXACT);
+      libc_fesetenv (&env);
+
+      if (feflags & FE_INVALID)
+	feraiseexcept (FE_INVALID);
+      else if (feflags & FE_INEXACT)
+	feraiseexcept (FE_INEXACT);
+
+      return result;
+  }
+#endif
   asm ( "frintx" "\t%" IREGS "1, %" IREGS "2\n\t"
         "fcvtzs" "\t%" OREGS "0, %" IREGS "1"
         : "=r" (result), "=w" (temp) : "w" (x) );
diff --git a/sysdeps/aarch64/fpu/s_lrintf.c b/sysdeps/aarch64/fpu/s_lrintf.c
index a995e4b..2e73271 100644
--- a/sysdeps/aarch64/fpu/s_lrintf.c
+++ b/sysdeps/aarch64/fpu/s_lrintf.c
@@ -18,5 +18,5 @@
 
 #define FUNC lrintf
 #define ITYPE float
-#define IREGS "s"
+#define IREG_SIZE 32
 #include <s_lrint.c>
diff --git a/sysdeps/aarch64/fpu/s_lround.c b/sysdeps/aarch64/fpu/s_lround.c
index 9be9e7f..1f77d82 100644
--- a/sysdeps/aarch64/fpu/s_lround.c
+++ b/sysdeps/aarch64/fpu/s_lround.c
@@ -24,18 +24,37 @@
 
 #ifndef ITYPE
 # define ITYPE double
-# define IREGS "d"
+# define IREG_SIZE 64
 #else
-# ifndef IREGS
-#  error IREGS not defined
+# ifndef IREG_SIZE
+#  error IREG_SIZE not defined
 # endif
 #endif
 
 #ifndef OTYPE
 # define OTYPE long int
+# ifdef __ILP32__
+#  define OREG_SIZE 32
+# else
+#  define OREG_SIZE 64
+# endif
+#else
+# ifndef OREG_SIZE
+#  error OREG_SIZE not defined
+# endif
+#endif
+
+#if IREG_SIZE == 32
+# define IREGS "s"
+#else
+# define IREGS "d"
 #endif
 
-#define OREGS "x"
+#if OREG_SIZE == 32
+# define OREGS "w"
+#else
+# define OREGS "x"
+#endif
 
 #define __CONCATX(a,b) __CONCAT(a,b)
 
diff --git a/sysdeps/aarch64/fpu/s_lroundf.c b/sysdeps/aarch64/fpu/s_lroundf.c
index 4a066d4..b30ddb6 100644
--- a/sysdeps/aarch64/fpu/s_lroundf.c
+++ b/sysdeps/aarch64/fpu/s_lroundf.c
@@ -18,5 +18,5 @@
 
 #define FUNC lroundf
 #define ITYPE float
-#define IREGS "s"
+#define IREG_SIZE 32
 #include <s_lround.c>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/4] Add ILP32 support to aarch64
  2017-08-08 18:10       ` Steve Ellcey
@ 2017-08-28 14:53         ` Richard Henderson
  2017-08-29 10:33           ` Szabolcs Nagy
  0 siblings, 1 reply; 13+ messages in thread
From: Richard Henderson @ 2017-08-28 14:53 UTC (permalink / raw)
  To: sellcey, Szabolcs Nagy, Joseph Myers, Wilco Dijkstra
  Cc: nd, Ellcey, Steve, libc-alpha

[-- Attachment #1: Type: text/plain, Size: 1097 bytes --]

On 08/08/2017 11:10 AM, Steve Ellcey wrote:
> @@ -44,6 +66,32 @@ __CONCATX(__,FUNC) (ITYPE x)
>  {
>    OTYPE result;
>    ITYPE temp;
> +
> +#if IREG_SIZE == 64 && OREG_SIZE == 32
> +  if (__builtin_fabs (x) > INT32_MAX)
> +    {
> +      /* Converting large values to a 32 bit int may cause the frintx/fcvtza
> +	 sequence to set both FE_INVALID and FE_INEXACT.  To avoid this
> +         we save and restore the FE and only set one or the other.  */
> +
> +      fenv_t env;
> +      int feflags;
> +
> +      libc_feholdexcept (&env);
> +      asm ( "frintx" "\t%" IREGS "1, %" IREGS "2\n\t"
> +	    "fcvtzs" "\t%" OREGS "0, %" IREGS "1"
> +	    : "=r" (result), "=w" (temp) : "w" (x) );
> +      feflags = libc_fetestexcept (FE_INVALID | FE_INEXACT);
> +      libc_fesetenv (&env);
> +
> +      if (feflags & FE_INVALID)
> +	feraiseexcept (FE_INVALID);
> +      else if (feflags & FE_INEXACT)
> +	feraiseexcept (FE_INEXACT);
> +
> +      return result;
> +  }
> +#endif

Surely it is simply better to do the conversion in one step, getting the proper
flags set the first time.  Like so.


r~

[-- Attachment #2: zz --]
[-- Type: text/plain, Size: 1166 bytes --]

diff --git a/sysdeps/aarch64/fpu/s_lrint.c b/sysdeps/aarch64/fpu/s_lrint.c
index 8c61a039bf..a6ac070fa6 100644
--- a/sysdeps/aarch64/fpu/s_lrint.c
+++ b/sysdeps/aarch64/fpu/s_lrint.c
@@ -17,6 +17,7 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <math.h>
+#include <get-rounding-mode.h>
 
 #ifndef FUNC
 # define FUNC lrint
@@ -43,10 +44,25 @@ OTYPE
 __CONCATX(__,FUNC) (ITYPE x)
 {
   OTYPE result;
-  ITYPE temp;
-  asm ( "frintx" "\t%" IREGS "1, %" IREGS "2\n\t"
-        "fcvtzs" "\t%" OREGS "0, %" IREGS "1"
-        : "=r" (result), "=w" (temp) : "w" (x) );
+  switch (get_rounding_mode ())
+    {
+    case FE_TONEAREST:
+      asm volatile ("fcvtns" "\t%" OREGS "0, %" IREGS "1"
+		    : "=r" (result) : "w" (x));
+      break;
+    case FE_UPWARD:
+      asm volatile ("fcvtps" "\t%" OREGS "0, %" IREGS "1"
+		    : "=r" (result) : "w" (x));
+      break;
+    case FE_DOWNWARD:
+      asm volatile ("fcvtms" "\t%" OREGS "0, %" IREGS "1"
+		    : "=r" (result) : "w" (x));
+      break;
+    default:
+    case FE_TOWARDZERO:
+      asm volatile ("fcvtzs" "\t%" OREGS "0, %" IREGS "1"
+		    : "=r" (result) : "w" (x));
+    }
   return result;
 }
 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/4] Add ILP32 support to aarch64
  2017-08-28 14:53         ` Richard Henderson
@ 2017-08-29 10:33           ` Szabolcs Nagy
  0 siblings, 0 replies; 13+ messages in thread
From: Szabolcs Nagy @ 2017-08-29 10:33 UTC (permalink / raw)
  To: Richard Henderson, sellcey, Joseph Myers, Wilco Dijkstra
  Cc: nd, Ellcey, Steve, libc-alpha

On 28/08/17 15:52, Richard Henderson wrote:
> Surely it is simply better to do the conversion in one step, getting the proper
> flags set the first time.  Like so.
> 

hm true, for ilp32, but on lp64 you don't want
unnecessary fpcr access and dispatch on the rounding mode.

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2017-08-29 10:33 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-08-03 15:36 [PATCH 3/4] Add ILP32 support to aarch64 Steve Ellcey
2017-08-03 17:47 ` Joseph Myers
2017-08-03 18:22   ` Steve Ellcey
2017-08-03 19:48     ` Joseph Myers
2017-08-03 22:33 Wilco Dijkstra
2017-08-04  0:12 ` Joseph Myers
2017-08-04 23:15   ` Steve Ellcey
2017-08-08 15:02     ` Szabolcs Nagy
2017-08-08 15:23       ` Szabolcs Nagy
2017-08-08 17:22       ` Joseph Myers
2017-08-08 18:10       ` Steve Ellcey
2017-08-28 14:53         ` Richard Henderson
2017-08-29 10:33           ` Szabolcs Nagy

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).