public inbox for libc-ports@sourceware.org
 help / color / mirror / Atom feed
* [PATCH] Speed up libm on MIPS
@ 2013-09-19 22:58 Steve Ellcey
  2013-09-20  3:32 ` Carlos O'Donell
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Steve Ellcey @ 2013-09-19 22:58 UTC (permalink / raw)
  To: libc-ports

[-- Attachment #1: Type: text/plain, Size: 1809 bytes --]

This patch defines various inline routines and macros used by the math
library in order to speed up libm on MIPS.  It does not affect
soft-float builds but for hard-float builds 'make bench' shows a
speed-up.  With an o32 little-endian glibc, sin() went from 27792.6
iter/s to 31293.6 iter/s.  On n32 it went from 32955.2 to 36179.7 and on
n64 from 33074.7 to 36242. exp() went from 45742.4 to 56511.2 on o32 and
pow() went from 19008.8 to 20508.7.  I have attached the original and
new bench.out files for o32, n32, and n64 ABIs in case you want to see
more of the data.  These are all little-endian hard-float runs.

I ran 'make check' and 'make bench' using the o32, n32, and n64 ABIs
with big and little endian and with hard and soft float to verify there
were no failures.  I did run into an unrelated problem that is being
fixed (https://sourceware.org/ml/libc-alpha/2013-09/msg00601.html) but
there were no other failures except the expected ones for MIPS.

OK for checkin?

Steve Ellcey
sellcey@mips.com


2013-09-18  Steve Ellcey  <sellcey@mips.com>

	* sysdeps/mips/math_private.h (libc_feholdexcept_mips): New function.
	(libc_feholdexcept): New macro.
	(libc_feholdexceptf): New macro.
	(libc_feholdexceptl): New macro.
	(libc_fesetround_mips): New function.
	(libc_fesetround): New macro.
	(libc_fesetroundf): New macro.
	(libc_fesetroundl): New macro.
	(libc_feholdexcept_setround_mips): New function.
	(libc_feholdexcept_setround): New macro.
	(libc_feholdexcept_setroundf): New macro.
	(libc_feholdexcept_setroundl): New macro.
	(libc_fesetenv_mips): New function.
	(libc_fesetenv): New macro.
	(libc_fesetenvf): New macro.
	(libc_fesetenvl): New macro.
	(libc_feupdateenv_mips): New function.
	(libc_feupdateenv): New macro.
	(libc_feupdateenvf): New macro.
	(libc_feupdateenvl): New macro.


[-- Attachment #2: mips-libm.patch --]
[-- Type: text/x-patch, Size: 3370 bytes --]

diff --git a/ports/sysdeps/mips/math_private.h b/ports/sysdeps/mips/math_private.h
index 6b99957..0ac18fd 100644
--- a/ports/sysdeps/mips/math_private.h
+++ b/ports/sysdeps/mips/math_private.h
@@ -26,6 +26,119 @@
 # define HIGH_ORDER_BIT_IS_SET_FOR_SNAN
 #endif
 
+/* Inline functions to speed up the math library implementation.  The
+   default versions of these routines are in generic/math_private.h
+   and call fesetround, feholdexcept, etc.  These routines use inlined
+   code instead.  */
+
+#ifdef __mips_hard_float
+
+#include <fenv.h>
+#include <fenv_libc.h>
+#include <fpu_control.h>
+
+static __always_inline void
+libc_feholdexcept_mips (fenv_t *envp)
+{
+  fpu_control_t cw;
+
+  /* Save the current state.  */
+  _FPU_GETCW (cw);
+  envp->__fp_control_register = cw;
+
+  /* Clear all exception enable bits and flags.  */
+  cw &= ~(_FPU_MASK_V|_FPU_MASK_Z|_FPU_MASK_O|_FPU_MASK_U|_FPU_MASK_I|FE_ALL_EXCEPT);
+  _FPU_SETCW (cw);
+}
+#define libc_feholdexcept libc_feholdexcept_mips
+#define libc_feholdexceptf libc_feholdexcept_mips
+#define libc_feholdexceptl libc_feholdexcept_mips
+
+static __always_inline void
+libc_fesetround_mips (int round)
+{
+  fpu_control_t cw;
+
+  /* Get current state.  */
+  _FPU_GETCW (cw);
+
+  /* Set rounding bits.  */
+  cw &= ~0x3;
+  cw |= round;
+
+  /* Set new state.  */
+  _FPU_SETCW (cw);
+}
+#define libc_fesetround libc_fesetround_mips
+#define libc_fesetroundf libc_fesetround_mips
+#define libc_fesetroundl libc_fesetround_mips
+
+static __always_inline void
+libc_feholdexcept_setround_mips (fenv_t *envp, int round)
+{
+  fpu_control_t cw;
+
+  /* Save the current state.  */
+  _FPU_GETCW (cw);
+  envp->__fp_control_register = cw;
+
+  /* Clear all exception enable bits and flags.  */
+  cw &= ~(_FPU_MASK_V|_FPU_MASK_Z|_FPU_MASK_O|_FPU_MASK_U|_FPU_MASK_I|FE_ALL_EXCEPT);
+
+  /* Set rounding bits.  */
+  cw &= ~0x3;
+  cw |= round;
+
+  /* Set new state.  */
+  _FPU_SETCW (cw);
+}
+#define libc_feholdexcept_setround libc_feholdexcept_setround_mips
+#define libc_feholdexcept_setroundf libc_feholdexcept_setround_mips
+#define libc_feholdexcept_setroundl libc_feholdexcept_setround_mips
+
+static __always_inline void
+libc_fesetenv_mips (fenv_t *envp)
+{
+  fpu_control_t cw;
+
+  /* Read first current state to flush fpu pipeline.  */
+  _FPU_GETCW (cw);
+
+  if (envp == FE_DFL_ENV)
+    _FPU_SETCW (_FPU_DEFAULT);
+  else if (envp == FE_NOMASK_ENV)
+    _FPU_SETCW (_FPU_IEEE);
+  else
+    _FPU_SETCW (envp->__fp_control_register);
+}
+#define libc_fesetenv libc_fesetenv_mips
+#define libc_fesetenvf libc_fesetenv_mips
+#define libc_fesetenvl libc_fesetenv_mips
+
+static __always_inline void
+libc_feupdateenv_mips (fenv_t *envp)
+{
+  int temp;
+
+  /* Save current exceptions.  */
+  _FPU_GETCW (temp);
+
+  /* Set flag bits (which are accumulative), and *also* set the
+     cause bits. The setting of the cause bits is what actually causes
+     the hardware to generate the exception, if the corresponding enable
+     bit is set as well.  */
+  temp &= FE_ALL_EXCEPT;
+  temp |= envp->__fp_control_register | (temp << CAUSE_SHIFT);
+
+  /* Set new state.  */
+  _FPU_SETCW (temp);
+}
+#define libc_feupdateenv libc_feupdateenv_mips
+#define libc_feupdateenvf libc_feupdateenv_mips
+#define libc_feupdateenvl libc_feupdateenv_mips
+
+#endif
+
 #include_next <math_private.h>
 
 #endif

[-- Attachment #3: bench.out.n32.new --]
[-- Type: text/plain, Size: 1934 bytes --]

acos(): ITERS:532000: TOTAL:10.0683s, MAX:25054.1ns, MIN:17428.4ns, 52839.1 iter/s
acosh(): ITERS:2.04e+06: TOTAL:9.99298s, MAX:4928.31ns, MIN:4892.42ns, 204143 iter/s
asin(): ITERS:686000: TOTAL:10.0062s, MAX:18001.7ns, MIN:13154ns, 68557.4 iter/s
asinh(): ITERS:280000: TOTAL:10.1287s, MAX:40636.9ns, MIN:32055.1ns, 27644.1 iter/s
atan(): ITERS:105000: TOTAL:10.0616s, MAX:96043.1ns, MIN:95542.6ns, 10435.7 iter/s
atan(768bits): ITERS:12000: TOTAL:12.5019s, MAX:1.04221e+06ns, MIN:1.04131e+06ns, 959.856 iter/s
atanh(): ITERS:405000: TOTAL:10.0183s, MAX:27907.3ns, MIN:20119.8ns, 40426.1 iter/s
cos(): ITERS:475000: TOTAL:10.0102s, MAX:21549.6ns, MIN:20940.9ns, 47451.8 iter/s
cos(768bits): ITERS:15000: TOTAL:12.821s, MAX:856673ns, MIN:853644ns, 1169.95 iter/s
cosh(): ITERS:525000: TOTAL:10.0263s, MAX:20817.1ns, MIN:17965.7ns, 52362.1 iter/s
exp(): ITERS:566000: TOTAL:10.0136s, MAX:17762.1ns, MIN:17662ns, 56523.1 iter/s
exp(768bits): ITERS:12000: TOTAL:10.822s, MAX:901916ns, MIN:901784ns, 1108.85 iter/s
log(): ITERS:345000: TOTAL:10.0168s, MAX:29081.5ns, MIN:29018.1ns, 34442.1 iter/s
modf(): ITERS:4.578e+06: TOTAL:9.98804s, MAX:2209.36ns, MIN:2180.49ns, 458348 iter/s
pow(): ITERS:205000: TOTAL:9.99997s, MAX:48842.1ns, MIN:48755ns, 20500.1 iter/s
pow(768bits): ITERS:4000: TOTAL:10.6838s, MAX:2.67106e+06ns, MIN:2.67088e+06ns, 374.399 iter/s
rint(): ITERS:4.664e+06: TOTAL:9.98899s, MAX:2603.1ns, MIN:1694.6ns, 466914 iter/s
sin(): ITERS:364000: TOTAL:10.0675s, MAX:43930.5ns, MIN:16965.2ns, 36156 iter/s
sin(768bits): ITERS:14000: TOTAL:12.0352s, MAX:872409ns, MIN:841528ns, 1163.26 iter/s
sinh(): ITERS:480000: TOTAL:10.0587s, MAX:27480.9ns, MIN:17896.2ns, 47719.9 iter/s
tan(): ITERS:106000: TOTAL:10.003s, MAX:94573ns, MIN:94216.6ns, 10596.8 iter/s
tan(768bits): ITERS:12000: TOTAL:10.0388s, MAX:836644ns, MIN:836541ns, 1195.36 iter/s
tanh(): ITERS:530000: TOTAL:10.0419s, MAX:19923.8ns, MIN:15258.4ns, 52778.9 iter/s

[-- Attachment #4: bench.out.n32.orig --]
[-- Type: text/plain, Size: 1932 bytes --]

acos(): ITERS:532000: TOTAL:10.0494s, MAX:24988.5ns, MIN:17393.8ns, 52938.3 iter/s
acosh(): ITERS:2.045e+06: TOTAL:10.0148s, MAX:4926.44ns, MIN:4892.35ns, 204197 iter/s
asin(): ITERS:686000: TOTAL:10.0047s, MAX:17996.6ns, MIN:13151.9ns, 68568 iter/s
asinh(): ITERS:250000: TOTAL:10.1422s, MAX:44901.1ns, MIN:36586.6ns, 24649.5 iter/s
atan(): ITERS:105000: TOTAL:10.0577s, MAX:96028.1ns, MIN:95510.1ns, 10439.8 iter/s
atan(768bits): ITERS:12000: TOTAL:12.4969s, MAX:1.04183e+06ns, MIN:1.04087e+06ns, 960.242 iter/s
atanh(): ITERS:405000: TOTAL:10.0205s, MAX:27924.3ns, MIN:20120.6ns, 40417.3 iter/s
cos(): ITERS:450000: TOTAL:10.1085s, MAX:22945.7ns, MIN:22329ns, 44516.8 iter/s
cos(768bits): ITERS:15000: TOTAL:12.8936s, MAX:861549ns, MIN:858153ns, 1163.37 iter/s
cosh(): ITERS:485000: TOTAL:10.0718s, MAX:24978.4ns, MIN:17966ns, 48154.1 iter/s
exp(): ITERS:458000: TOTAL:10.0126s, MAX:21926.5ns, MIN:21835.2ns, 45742.4 iter/s
exp(768bits): ITERS:12000: TOTAL:10.8563s, MAX:904721ns, MIN:904659ns, 1105.35 iter/s
log(): ITERS:345000: TOTAL:10.0135s, MAX:29085.3ns, MIN:28987.7ns, 34453.4 iter/s
modf(): ITERS:4.578e+06: TOTAL:9.98804s, MAX:2207.71ns, MIN:2180.49ns, 458348 iter/s
pow(): ITERS:191000: TOTAL:10.048s, MAX:52698.4ns, MIN:52553.9ns, 19008.8 iter/s
pow(768bits): ITERS:4000: TOTAL:10.6961s, MAX:2.67403e+06ns, MIN:2.67399e+06ns, 373.97 iter/s
rint(): ITERS:4.66e+06: TOTAL:9.9863s, MAX:2612.42ns, MIN:1692.47ns, 466639 iter/s
sin(): ITERS:336000: TOTAL:10.1956s, MAX:46684.9ns, MIN:19560.2ns, 32955.2 iter/s
sin(768bits): ITERS:14000: TOTAL:12.0577s, MAX:874345ns, MIN:842891ns, 1161.09 iter/s
sinh(): ITERS:480000: TOTAL:10.0587s, MAX:27492.4ns, MIN:17885ns, 47719.7 iter/s
tan(): ITERS:103000: TOTAL:10.0194s, MAX:97472.8ns, MIN:97119.2ns, 10280 iter/s
tan(768bits): ITERS:12000: TOTAL:10.1011s, MAX:841801ns, MIN:841724ns, 1187.99 iter/s
tanh(): ITERS:530000: TOTAL:10.0356s, MAX:19909ns, MIN:15249.3ns, 52812 iter/s

[-- Attachment #5: bench.out.n64.new --]
[-- Type: text/plain, Size: 1940 bytes --]

acos(): ITERS:532000: TOTAL:10.0954s, MAX:25351.3ns, MIN:17378.1ns, 52697.1 iter/s
acosh(): ITERS:2.045e+06: TOTAL:10.0105s, MAX:4929.58ns, MIN:4887.36ns, 204285 iter/s
asin(): ITERS:693000: TOTAL:10.0979s, MAX:17990.6ns, MIN:13145.9ns, 68628.1 iter/s
asinh(): ITERS:280000: TOTAL:10.1205s, MAX:40593.1ns, MIN:32045.6ns, 27666.6 iter/s
atan(): ITERS:105000: TOTAL:10.0638s, MAX:96113.2ns, MIN:95533.1ns, 10433.4 iter/s
atan(768bits): ITERS:12000: TOTAL:12.4383s, MAX:1.03693e+06ns, MIN:1.03598e+06ns, 964.765 iter/s
atanh(): ITERS:405000: TOTAL:10.0074s, MAX:27854.8ns, MIN:20098.2ns, 40470 iter/s
cos(): ITERS:465000: TOTAL:10.0621s, MAX:22119ns, MIN:21497.7ns, 46213 iter/s
cos(768bits): ITERS:15000: TOTAL:12.2595s, MAX:819220ns, MIN:815965ns, 1223.54 iter/s
cosh(): ITERS:525000: TOTAL:10.025s, MAX:20816.1ns, MIN:17947.1ns, 52369.1 iter/s
exp(): ITERS:565000: TOTAL:10.0036s, MAX:17767.2ns, MIN:17670.7ns, 56479.8 iter/s
exp(768bits): ITERS:12000: TOTAL:10.1473s, MAX:845673ns, MIN:845566ns, 1182.58 iter/s
log(): ITERS:353000: TOTAL:10.0114s, MAX:28404.5ns, MIN:28342.7ns, 35259.9 iter/s
modf(): ITERS:4.582e+06: TOTAL:9.99066s, MAX:2210.94ns, MIN:2179.14ns, 458629 iter/s
pow(): ITERS:204000: TOTAL:9.9986s, MAX:49098.2ns, MIN:48929.1ns, 20402.9 iter/s
pow(768bits): ITERS:4000: TOTAL:10.2081s, MAX:2.55206e+06ns, MIN:2.55202e+06ns, 391.844 iter/s
rint(): ITERS:4.684e+06: TOTAL:9.98779s, MAX:2594.65ns, MIN:1684.21ns, 468973 iter/s
sin(): ITERS:364000: TOTAL:10.0356s, MAX:43426.2ns, MIN:16959.4ns, 36270.7 iter/s
sin(768bits): ITERS:14000: TOTAL:11.4962s, MAX:835972ns, MIN:804764ns, 1217.79 iter/s
sinh(): ITERS:480000: TOTAL:10.0494s, MAX:27467.6ns, MIN:17871.2ns, 47764.2 iter/s
tan(): ITERS:106000: TOTAL:10.0145s, MAX:94658.5ns, MIN:94262.7ns, 10584.6 iter/s
tan(768bits): ITERS:13000: TOTAL:10.3794s, MAX:798457ns, MIN:798377ns, 1252.48 iter/s
tanh(): ITERS:530000: TOTAL:10.0379s, MAX:19949.7ns, MIN:15252.3ns, 52800 iter/s

[-- Attachment #6: bench.out.n64.orig --]
[-- Type: text/plain, Size: 1940 bytes --]

acos(): ITERS:532000: TOTAL:10.0871s, MAX:25293.6ns, MIN:17373.6ns, 52740.8 iter/s
acosh(): ITERS:2.04e+06: TOTAL:9.99365s, MAX:4934.46ns, MIN:4893.03ns, 204130 iter/s
asin(): ITERS:686000: TOTAL:9.99681s, MAX:18011.1ns, MIN:13145.5ns, 68621.9 iter/s
asinh(): ITERS:250000: TOTAL:10.1391s, MAX:44865.5ns, MIN:36601.9ns, 24657 iter/s
atan(): ITERS:105000: TOTAL:10.0623s, MAX:96109.1ns, MIN:95512.9ns, 10435 iter/s
atan(768bits): ITERS:12000: TOTAL:12.4621s, MAX:1.03888e+06ns, MIN:1.03804e+06ns, 962.919 iter/s
atanh(): ITERS:405000: TOTAL:10.0084s, MAX:27863.6ns, MIN:20098.1ns, 40466.1 iter/s
cos(): ITERS:435000: TOTAL:10.0854s, MAX:23695.9ns, MIN:23030.3ns, 43131.6 iter/s
cos(768bits): ITERS:15000: TOTAL:12.2231s, MAX:816993ns, MIN:813610ns, 1227.19 iter/s
cosh(): ITERS:485000: TOTAL:10.0618s, MAX:24978.3ns, MIN:17940.9ns, 48202.1 iter/s
exp(): ITERS:458000: TOTAL:10.003s, MAX:21899.9ns, MIN:21814.4ns, 45786.5 iter/s
exp(768bits): ITERS:12000: TOTAL:10.1867s, MAX:848999ns, MIN:848807ns, 1178.01 iter/s
log(): ITERS:353000: TOTAL:10.0201s, MAX:28448.3ns, MIN:28356.9ns, 35229.2 iter/s
modf(): ITERS:4.582e+06: TOTAL:9.99066s, MAX:2206.73ns, MIN:2179.14ns, 458629 iter/s
pow(): ITERS:189000: TOTAL:10.001s, MAX:52994.9ns, MIN:52847.3ns, 18898.2 iter/s
pow(768bits): ITERS:4000: TOTAL:10.2235s, MAX:2.55596e+06ns, MIN:2.55583e+06ns, 391.256 iter/s
rint(): ITERS:4.684e+06: TOTAL:9.98718s, MAX:2598.14ns, MIN:1684.21ns, 469001 iter/s
sin(): ITERS:336000: TOTAL:10.1588s, MAX:46169.6ns, MIN:19511.5ns, 33074.7 iter/s
sin(768bits): ITERS:14000: TOTAL:11.4866s, MAX:832865ns, MIN:802973ns, 1218.82 iter/s
sinh(): ITERS:480000: TOTAL:10.046s, MAX:27424.4ns, MIN:17864.3ns, 47780.2 iter/s
tan(): ITERS:103000: TOTAL:10.0269s, MAX:97522.4ns, MIN:97150.2ns, 10272.4 iter/s
tan(768bits): ITERS:13000: TOTAL:10.3771s, MAX:798328ns, MIN:798048ns, 1252.75 iter/s
tanh(): ITERS:530000: TOTAL:10.0417s, MAX:19968.3ns, MIN:15264.2ns, 52780 iter/s

[-- Attachment #7: bench.out.o32.new --]
[-- Type: text/plain, Size: 1934 bytes --]

acos(): ITERS:469000: TOTAL:10.1016s, MAX:27283.1ns, MIN:20192.5ns, 46428.3 iter/s
acosh(): ITERS:1.225e+06: TOTAL:9.99691s, MAX:8189.19ns, MIN:8145.06ns, 122538 iter/s
asin(): ITERS:581000: TOTAL:10.0315s, MAX:20378.8ns, MIN:15523.3ns, 57917.7 iter/s
asinh(): ITERS:245000: TOTAL:10.0956s, MAX:45475.1ns, MIN:37275.3ns, 24268 iter/s
atan(): ITERS:93000: TOTAL:10.1256s, MAX:109072ns, MIN:108675ns, 9184.61 iter/s
atan(768bits): ITERS:12000: TOTAL:12.597s, MAX:1.05011e+06ns, MIN:1.04928e+06ns, 952.608 iter/s
atanh(): ITERS:345000: TOTAL:10.1009s, MAX:32196.4ns, MIN:24779.9ns, 34155.3 iter/s
cos(): ITERS:350000: TOTAL:10.0385s, MAX:29354.6ns, MIN:28487.7ns, 34865.9 iter/s
cos(768bits): ITERS:15000: TOTAL:11.8981s, MAX:794589ns, MIN:789894ns, 1260.7 iter/s
cosh(): ITERS:470000: TOTAL:10.0629s, MAX:22804.4ns, MIN:20493.5ns, 46706.3 iter/s
exp(): ITERS:521000: TOTAL:10.0052s, MAX:19237.1ns, MIN:19192.7ns, 52072.7 iter/s
exp(768bits): ITERS:12000: TOTAL:10.2866s, MAX:857315ns, MIN:857154ns, 1166.56 iter/s
log(): ITERS:262000: TOTAL:10.0202s, MAX:38301.7ns, MIN:38230.6ns, 26147.1 iter/s
modf(): ITERS:3.124e+06: TOTAL:9.99642s, MAX:3224.4ns, MIN:3195.4ns, 312512 iter/s
pow(): ITERS:182000: TOTAL:10.0499s, MAX:55302.1ns, MIN:55183ns, 18109.6 iter/s
pow(768bits): ITERS:4000: TOTAL:10.1846s, MAX:2.54641e+06ns, MIN:2.54601e+06ns, 392.751 iter/s
rint(): ITERS:4.724e+06: TOTAL:9.99597s, MAX:2579.64ns, MIN:1674.89ns, 472591 iter/s
sin(): ITERS:315000: TOTAL:10.0621s, MAX:47002.5ns, MIN:17693.3ns, 31305.7 iter/s
sin(768bits): ITERS:14000: TOTAL:11.0583s, MAX:799640ns, MIN:769023ns, 1266.01 iter/s
sinh(): ITERS:430000: TOTAL:10.1115s, MAX:30142.2ns, MIN:20424.6ns, 42526 iter/s
tan(): ITERS:98000: TOTAL:10.0428s, MAX:102592ns, MIN:102427ns, 9758.25 iter/s
tan(768bits): ITERS:14000: TOTAL:10.4189s, MAX:744311ns, MIN:744172ns, 1343.71 iter/s
tanh(): ITERS:460000: TOTAL:10.0976s, MAX:22994.1ns, MIN:18007.3ns, 45555.5 iter/s

[-- Attachment #8: bench.out.o32.orig --]
[-- Type: text/plain, Size: 1935 bytes --]

acos(): ITERS:469000: TOTAL:10.1029s, MAX:27322.6ns, MIN:20194.9ns, 46422.2 iter/s
acosh(): ITERS:1.23e+06: TOTAL:10.035s, MAX:8215.86ns, MIN:8143.5ns, 122571 iter/s
asin(): ITERS:581000: TOTAL:10.0239s, MAX:20383.9ns, MIN:15514.3ns, 57961.3 iter/s
asinh(): ITERS:220000: TOTAL:10.2001s, MAX:50684.3ns, MIN:42365.5ns, 21568.3 iter/s
atan(): ITERS:93000: TOTAL:10.1136s, MAX:109034ns, MIN:108434ns, 9195.55 iter/s
atan(768bits): ITERS:12000: TOTAL:12.5784s, MAX:1.0487e+06ns, MIN:1.04774e+06ns, 954.017 iter/s
atanh(): ITERS:345000: TOTAL:10.0996s, MAX:32227.5ns, MIN:24747.8ns, 34159.7 iter/s
cos(): ITERS:310000: TOTAL:10.1436s, MAX:33298.5ns, MIN:32524.1ns, 30561 iter/s
cos(768bits): ITERS:15000: TOTAL:11.9489s, MAX:798020ns, MIN:793229ns, 1255.35 iter/s
cosh(): ITERS:420000: TOTAL:10.0282s, MAX:28992.1ns, MIN:20488.2ns, 41882 iter/s
exp(): ITERS:394000: TOTAL:9.99972s, MAX:25426.7ns, MIN:25348.8ns, 39401.1 iter/s
exp(768bits): ITERS:12000: TOTAL:10.3538s, MAX:862910ns, MIN:862726ns, 1158.99 iter/s
log(): ITERS:262000: TOTAL:10.0177s, MAX:38285.9ns, MIN:38207.8ns, 26153.8 iter/s
modf(): ITERS:3.122e+06: TOTAL:9.99342s, MAX:3261.05ns, MIN:3195.4ns, 312406 iter/s
pow(): ITERS:159000: TOTAL:10.0406s, MAX:63219.5ns, MIN:63069.7ns, 15835.8 iter/s
pow(768bits): ITERS:4000: TOTAL:10.2002s, MAX:2.55022e+06ns, MIN:2.54979e+06ns, 392.149 iter/s
rint(): ITERS:4.728e+06: TOTAL:9.99821s, MAX:2586.03ns, MIN:1674.89ns, 472885 iter/s
sin(): ITERS:280000: TOTAL:10.0746s, MAX:51663.2ns, MIN:21503.7ns, 27792.6 iter/s
sin(768bits): ITERS:14000: TOTAL:11.1083s, MAX:802937ns, MIN:772717ns, 1260.31 iter/s
sinh(): ITERS:430000: TOTAL:10.1115s, MAX:30117.6ns, MIN:20413.7ns, 42525.9 iter/s
tan(): ITERS:94000: TOTAL:10.0103s, MAX:106672ns, MIN:106263ns, 9390.33 iter/s
tan(768bits): ITERS:14000: TOTAL:10.4675s, MAX:747732ns, MIN:747644ns, 1337.47 iter/s
tanh(): ITERS:460000: TOTAL:10.0919s, MAX:22996.1ns, MIN:17996.8ns, 45580.9 iter/s

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2013-09-23 16:42 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-09-19 22:58 [PATCH] Speed up libm on MIPS Steve Ellcey
2013-09-20  3:32 ` Carlos O'Donell
2013-09-20 16:51   ` Steve Ellcey
2013-09-20 17:06     ` Carlos O'Donell
2013-09-21 18:47       ` Maciej W. Rozycki
2013-09-22 17:40         ` Carlos O'Donell
2013-09-23 16:42           ` Steve Ellcey
2013-09-20 15:01 ` Richard Henderson
2013-09-20 15:35 ` Joseph S. Myers

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).