public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* [PATCH 3/4] Add ILP32 support to aarch64
@ 2017-08-03 15:36 Steve Ellcey
  2017-08-03 17:47 ` Joseph Myers
  0 siblings, 1 reply; 13+ messages in thread
From: Steve Ellcey @ 2017-08-03 15:36 UTC (permalink / raw)
  To: libc-alpha

[-- Attachment #1: Type: text/plain, Size: 1202 bytes --]

Here are some fixes to the floating point to integer conversion
routines for aarch64.

Steve Ellcey
sellcey@cavium.com

2017-08-03  Steve Ellcey  <sellcey@cavium.com>

	* sysdeps/aarch64/fpu/math_private.h (libc_feclearexcept_aarch64):
	New function.
	(libc_feclearexcept, libc_feclearexceptf, libc_feclearexceptl):
	New defines.
	* sysdeps/aarch64/fpu/s_llrint.c (OREG_SIZE): New macro.
	* sysdeps/aarch64/fpu/s_llround.c (OREG_SIZE): Likewise.
	* sysdeps/aarch64/fpu/s_llrintf.c (OREGS, IREGS): Remove.
	(IREG_SIZE, OREG_SIZE): New macros.
	* sysdeps/aarch64/fpu/s_llroundf.c: (OREGS, IREGS): Remove.
	(IREG_SIZE, OREG_SIZE): New macros.
	* sysdeps/aarch64/fpu/s_lrintf.c (IREGS): Remove.
	(IREG_SIZE): New macro.
	* sysdeps/aarch64/fpu/s_lroundf.c (IREGS): Remove.
	(IREG_SIZE): New macro.
	* sysdeps/aarch64/fpu/s_lrint.c (math_private.h): New include.
	(IREG_SIZE, OREG_SIZE): Initialize if not already set.
	(OREGS, IREGS): Set based on IREG_SIZE and OREG_SIZE.
	(__CONCATX): Clear INEXACT if INVALID is set.
	* sysdeps/aarch64/fpu/s_lround.c (IREG_SIZE, OREG_SIZE):
	Initialize if not already set.
        (OREGS, IREGS): Set based on IREG_SIZE and OREG_SIZE.

[-- Attachment #2: aarch64-ilp32-math.patch --]
[-- Type: text/x-patch, Size: 5073 bytes --]

diff --git a/sysdeps/aarch64/fpu/math_private.h b/sysdeps/aarch64/fpu/math_private.h
index 807111e..d5a141d5 100644
--- a/sysdeps/aarch64/fpu/math_private.h
+++ b/sysdeps/aarch64/fpu/math_private.h
@@ -134,6 +134,20 @@ libc_fetestexcept_aarch64 (int ex)
 #define libc_fetestexceptl libc_fetestexcept_aarch64
 
 static __always_inline void
+libc_feclearexcept_aarch64 (int ex)
+{
+  fpu_fpsr_t fpsr;
+
+  _FPU_GETFPSR (fpsr);
+  fpsr &= ~((fpu_fpsr_t) ex);
+  _FPU_SETFPSR (fpsr);
+}
+
+#define libc_feclearexcept  libc_feclearexcept_aarch64
+#define libc_feclearexceptf libc_feclearexcept_aarch64
+#define libc_feclearexceptl libc_feclearexcept_aarch64
+
+static __always_inline void
 libc_fesetenv_aarch64 (const fenv_t *envp)
 {
   fpu_control_t fpcr;
diff --git a/sysdeps/aarch64/fpu/s_llrint.c b/sysdeps/aarch64/fpu/s_llrint.c
index c0d0d0e..57821c0 100644
--- a/sysdeps/aarch64/fpu/s_llrint.c
+++ b/sysdeps/aarch64/fpu/s_llrint.c
@@ -18,4 +18,5 @@
 
 #define FUNC llrint
 #define OTYPE long long int
+#define OREG_SIZE 64
 #include <s_lrint.c>
diff --git a/sysdeps/aarch64/fpu/s_llrintf.c b/sysdeps/aarch64/fpu/s_llrintf.c
index 67724c6..98ed4f8 100644
--- a/sysdeps/aarch64/fpu/s_llrintf.c
+++ b/sysdeps/aarch64/fpu/s_llrintf.c
@@ -18,6 +18,7 @@
 
 #define FUNC llrintf
 #define ITYPE float
-#define IREGS "s"
+#define IREG_SIZE 32
 #define OTYPE long long int
+#define OREG_SIZE 64
 #include <s_lrint.c>
diff --git a/sysdeps/aarch64/fpu/s_llround.c b/sysdeps/aarch64/fpu/s_llround.c
index ed4b192..ef7aedf 100644
--- a/sysdeps/aarch64/fpu/s_llround.c
+++ b/sysdeps/aarch64/fpu/s_llround.c
@@ -18,4 +18,5 @@
 
 #define FUNC llround
 #define OTYPE long long int
+#define OREG_SIZE 64
 #include <s_lround.c>
diff --git a/sysdeps/aarch64/fpu/s_llroundf.c b/sysdeps/aarch64/fpu/s_llroundf.c
index 360ce8b..294f0f4 100644
--- a/sysdeps/aarch64/fpu/s_llroundf.c
+++ b/sysdeps/aarch64/fpu/s_llroundf.c
@@ -18,6 +18,7 @@
 
 #define FUNC llroundf
 #define ITYPE float
-#define IREGS "s"
+#define IREG_SIZE 32
 #define OTYPE long long int
+#define OREG_SIZE 64
 #include <s_lround.c>
diff --git a/sysdeps/aarch64/fpu/s_lrint.c b/sysdeps/aarch64/fpu/s_lrint.c
index 8c61a03..9f90385 100644
--- a/sysdeps/aarch64/fpu/s_lrint.c
+++ b/sysdeps/aarch64/fpu/s_lrint.c
@@ -16,7 +16,9 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
+#include <math_private.h>
 #include <math.h>
+#include <fenv.h>
 
 #ifndef FUNC
 # define FUNC lrint
@@ -24,18 +26,37 @@
 
 #ifndef ITYPE
 # define ITYPE double
-# define IREGS "d"
+# define IREG_SIZE 64
 #else
-# ifndef IREGS
-#  error IREGS not defined
+# ifndef IREG_SIZE
+#  error IREG_SIZE not defined
 # endif
 #endif
 
 #ifndef OTYPE
 # define OTYPE long int
+# ifdef __ILP32__
+#  define OREG_SIZE 32
+# else
+#  define OREG_SIZE 64
+# endif
+#else
+# ifndef OREG_SIZE
+#  error OREG_SIZE not defined
+# endif
 #endif
 
-#define OREGS "x"
+#if IREG_SIZE == 32
+# define IREGS "s"
+#else
+# define IREGS "d"
+#endif
+
+#if OREG_SIZE == 32
+# define OREGS "w"
+#else
+# define OREGS "x"
+#endif
 
 #define __CONCATX(a,b) __CONCAT(a,b)
 
@@ -47,6 +68,13 @@ __CONCATX(__,FUNC) (ITYPE x)
   asm ( "frintx" "\t%" IREGS "1, %" IREGS "2\n\t"
         "fcvtzs" "\t%" OREGS "0, %" IREGS "1"
         : "=r" (result), "=w" (temp) : "w" (x) );
+#if OREG_SIZE == 32
+  /* The rounding step may set FE_INEXEXACT and converting to a 32 bit
+     value may set FE_INVALID.  We do not want FE_INEXACT set when
+     FE_INVALID has been set.  */
+  if (libc_fetestexcept_aarch64 (FE_INVALID))
+    libc_feclearexcept_aarch64 (FE_INEXACT);
+#endif
   return result;
 }
 
diff --git a/sysdeps/aarch64/fpu/s_lrintf.c b/sysdeps/aarch64/fpu/s_lrintf.c
index a995e4b..2e73271 100644
--- a/sysdeps/aarch64/fpu/s_lrintf.c
+++ b/sysdeps/aarch64/fpu/s_lrintf.c
@@ -18,5 +18,5 @@
 
 #define FUNC lrintf
 #define ITYPE float
-#define IREGS "s"
+#define IREG_SIZE 32
 #include <s_lrint.c>
diff --git a/sysdeps/aarch64/fpu/s_lround.c b/sysdeps/aarch64/fpu/s_lround.c
index 9be9e7f..1f77d82 100644
--- a/sysdeps/aarch64/fpu/s_lround.c
+++ b/sysdeps/aarch64/fpu/s_lround.c
@@ -24,18 +24,37 @@
 
 #ifndef ITYPE
 # define ITYPE double
-# define IREGS "d"
+# define IREG_SIZE 64
 #else
-# ifndef IREGS
-#  error IREGS not defined
+# ifndef IREG_SIZE
+#  error IREG_SIZE not defined
 # endif
 #endif
 
 #ifndef OTYPE
 # define OTYPE long int
+# ifdef __ILP32__
+#  define OREG_SIZE 32
+# else
+#  define OREG_SIZE 64
+# endif
+#else
+# ifndef OREG_SIZE
+#  error OREG_SIZE not defined
+# endif
+#endif
+
+#if IREG_SIZE == 32
+# define IREGS "s"
+#else
+# define IREGS "d"
 #endif
 
-#define OREGS "x"
+#if OREG_SIZE == 32
+# define OREGS "w"
+#else
+# define OREGS "x"
+#endif
 
 #define __CONCATX(a,b) __CONCAT(a,b)
 
diff --git a/sysdeps/aarch64/fpu/s_lroundf.c b/sysdeps/aarch64/fpu/s_lroundf.c
index 4a066d4..b30ddb6 100644
--- a/sysdeps/aarch64/fpu/s_lroundf.c
+++ b/sysdeps/aarch64/fpu/s_lroundf.c
@@ -18,5 +18,5 @@
 
 #define FUNC lroundf
 #define ITYPE float
-#define IREGS "s"
+#define IREG_SIZE 32
 #include <s_lround.c>

^ permalink raw reply	[flat|nested] 13+ messages in thread
* Re: [PATCH 3/4] Add ILP32 support to aarch64
@ 2017-08-03 22:33 Wilco Dijkstra
  2017-08-04  0:12 ` Joseph Myers
  0 siblings, 1 reply; 13+ messages in thread
From: Wilco Dijkstra @ 2017-08-03 22:33 UTC (permalink / raw)
  To: Ellcey, Steve, Joseph Myers; +Cc: nd, libc-alpha

Steve Ellecy wrote:
> I hadn't considered that.  So maybe I could save the environment
> (feholdexcept), do the calculation and see which exceptions, if any,
> got raised.  Then restore the original environment (fesetenv) and raise
> one or the other exceptions if needed.  Does that sound like a workable
> solution?

The fenv calls are slow on fast OoO cores so are a bad idea in fast paths.
The generic implementation may well be faster... I'm not sure where the
requirement of not raising inexact comes from (I don't see it in the definition
of lrint, and we generally don't care since inexact is set by almost every FP
calculation), but if it is absolutely required you'd special case values larger
than LONG_MAX.

Wilco

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2017-08-29 10:33 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-08-03 15:36 [PATCH 3/4] Add ILP32 support to aarch64 Steve Ellcey
2017-08-03 17:47 ` Joseph Myers
2017-08-03 18:22   ` Steve Ellcey
2017-08-03 19:48     ` Joseph Myers
2017-08-03 22:33 Wilco Dijkstra
2017-08-04  0:12 ` Joseph Myers
2017-08-04 23:15   ` Steve Ellcey
2017-08-08 15:02     ` Szabolcs Nagy
2017-08-08 15:23       ` Szabolcs Nagy
2017-08-08 17:22       ` Joseph Myers
2017-08-08 18:10       ` Steve Ellcey
2017-08-28 14:53         ` Richard Henderson
2017-08-29 10:33           ` Szabolcs Nagy

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).