#include /* must define a data model */ #ifndef __LP64__ #define __LP32__ 1 #endif /* found out 24 Jan 2019 from new computer build */ /* gcc does not define __LITTLE_ENDIAN__ or __BIG_ENDIAN__ */ #ifndef __LITTLE_ENDIAN__ #if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) #define __LITTLE_ENDIAN__ #else #define __BIG_ENDIAN__ #endif #endif typedef signed char int8_t; typedef unsigned char uint8_t; typedef short int16_t; typedef unsigned short uint16_t; typedef int int32_t; typedef unsigned int uint32_t; typedef long long int64_t; typedef unsigned long long uint64_t; #if __SIZEOF_INT128__ == 16 typedef __int128 int128_t; typedef unsigned __int128 uint128_t; #endif typedef union { #if __SIZEOF_INT128__ == 16 uint128_t uo; #endif #ifdef __BIG_ENDIAN__ struct { uint64_t ull1, ull0; } ulls; struct { uint32_t ul3, ul2, ul1, ul0; } uls; struct { uint16_t us7, us6, us5, us4, us3, us2, us1, us0; } uss; #else /* __LITTLE_ENDIAN__ */ struct { uint64_t ull0, ull1; } ulls; struct { uint32_t ul0, ul1, ul2, ul3; } uls; struct { uint16_t us0, us1, us2, us3, us4, us5, us6, us7; } uss; #endif } octle; /* defined in as_stdio.h */ #define quadruple octle /* gcc appears to only defines _LP64 __LP64__ (2019) */ /* only models with 64bit ints */ #if defined (__ILP64__) || defined (__SILP64__) typedef int64_t intmax_t; typedef uint64_t uintmax_t; #elif defined (__LP32__) typedef int16_t intmax_t; typedef uint16_t uintmax_t; #else /* __LP64__ has 32bit int */ typedef int32_t intmax_t; typedef uint32_t uintmax_t; #endif /* these are all register size based */ #if defined (__LP32__) || defined (__ILP32__) typedef int32_t intptr_t; typedef uint32_t uintptr_t; #define __SIZEOF_REGISTER_T__ 4 #define __SIZEOF_SIZE_T__ 4 #else typedef int64_t intptr_t; typedef uint64_t uintptr_t; #define __SIZEOF_REGISTER_T__ 8 #define __SIZEOF_SIZE_T__ 8 #endif typedef intptr_t register_t; typedef intptr_t _intptr_t; typedef uintptr_t u_intptr_t; typedef intptr_t ssize_t; /* Used for a count of bytes or an error indication. */ typedef uintptr_t size_t; /* Used for sizes of objects. */ typedef intptr_t ptrdiff_t; /* Used for subtracting two pointers. */ typedef uintptr_t clock_t; /* Used for system times in clock ticks or CLOCKS_PER_SEC . */ typedef intptr_t time_t; /* Used for time in seconds. */ #define NULL ((void *)0) #ifdef __APPLE__ #define FILE FILE_mac #elif defined __linux /* !__APPLE__ */ typedef struct _IO_FILE _IO_FILE; #define FILE _IO_FILE #else /* !__APPLE__ !__CYGWIN__ !__linux */ #error: unknown FILE structure #endif /* FILE * */ extern void *malloc(size_t); extern void free(void *); extern int puts(const char *); size_t fwrite(const void *restrict, size_t, size_t, FILE *restrict); extern FILE* stdout; /* in ldtostr.c */ extern unsigned char * realtostr(uint32_t [5], unsigned char **, int, int, int, unsigned); extern unsigned char * realtohex(uint32_t [5], unsigned char **, int, int, int, unsigned); /* guarentee use of this printf */ int phx_printf(const char *format, ...); /* local defines */ #define ALT_PREFIX 0002 #define PREC_PREFIX 0400 /* converts to w160 type used by realtostr, * current use as 63-63 with implicit '1"-implicit nil */ static void qtow(quadruple qdbl, uint32_t w160[5]) { /* required strict aliasing conversion */ octle *iqdbl; unsigned char *cqdbl = (unsigned char*)&qdbl; iqdbl = (octle*)cqdbl; /* long double shifted to high bit of uint128_t, with implicit * bit becoming explicit at bit 127 */ w160[4] = iqdbl->uss.us7; *(uint64_t*)&w160[2] = (iqdbl->ulls.ull1 << 15) | (iqdbl->uls.ul1 >> 17); /* lower 64 set same position as upper, upper gets 'implicit '1' */ iqdbl->uss.us3 &= 1; *(uint64_t*)&w160[0] = (iqdbl->ulls.ull0 << 14); /* place implicit bit for all but denormal */ w160[3] |= (w160[4] != 0) << 31; } /* converts to w160 type used by realtostr */ static void ldtow(long double ldbl, uint32_t w160[5]) { /* required strict aliasing conversion */ octle *ildbl; unsigned char *cldbl = (unsigned char*)&ldbl; ildbl = (octle*)cldbl; w160[4] = ildbl->uss.us4; *(uint64_t*)&w160[2] = ildbl->ulls.ull0; *(uint64_t*)w160 = 0; } /* converts to w160 type used by realtostr */ static void dtow(double dbl, uint32_t w160[5]) { /* required strict aliasing conversion */ uint64_t *idbl; unsigned char *cdbl = (unsigned char*)&dbl; idbl = (uint64_t*)cdbl; *(uint64_t*)&w160[2] = *idbl << 11; w160[4] = (uint32_t)(*idbl >> 52); if ((w160[4] & 0x7FF) == 0x7FF) w160[4] = (w160[4] << 4) | 0x000F; else if (((w160[4] & 0x7FF) == 0) && (*(uint64_t*)&w160[2] == 0)) { w160[4] <<= 4; } else { w160[4] = ((w160[4] & 0x7FF) + 16383 - 1023) | ((w160[4] & 0x800) << 4); /* place implicit bit for all but denormal */ w160[3] |= ((w160[4] & 0x7FFF) != 0x3C00) << 31; } *(uint64_t*)w160 = 0; } /* not sure usefulness, gcc's code forces doubles in printf. Not sure of overhead * created by gcc and/or POSIX requirement of double argument. static void ftow(float flt, uint32_t w160[5]) { *(uint64_t*)w160 = 0; *(uint64_t*)&w160[2] = 0; w160[4] = (*(uint32_t*)&flt) >> 23; w160[3] = (*(uint32_t*)&flt) << 9; if ((w160[4] & 0xFF) == 0xFF) w160[4] = (w160[4] << 8) | 0x00FF; else w160[4] = ((w160[4] & 0xFF) + 16383 - 127) | ((w160[4] & 0x100) << 7); } */ /* locate dependent utf8 loading of decimal point */ /* for newlib demo return ascii */ struct __params { unsigned char **s; unsigned char *slimit; //locale_t ld; }; static unsigned __decimalLoad(unsigned char *s, struct __params *params) { (void)params; *s = '.'; return (1); } #define BUFSTRSIZE 64 unsigned char * __format(const unsigned char *format, struct __params *params, va_list arg) { unsigned char buffer[BUFSTRSIZE]; /* size of 64bit binary ouput no prec, * field nor alt goes here */ unsigned char **s = params->s; unsigned char *slimit = params->slimit; unsigned char *strBuf = buffer, *strPtr, *strEnd, *sPtr = *s; uint64_t val_ull; /* for any sizeof() > 4 bytes, if * unsigned is 64bits it will bypass * val_ul to go here */ unsigned val_ul; /* need unsigned instead of unsigned long * incase unsigned 64bits */ uint32_t arg_flags; /* arg_flags needs to be 32bits??? */ int arg_prec, arg_field, n; unsigned char ch, sign, arg_size; /* arg_size max value is sizeof(long * double) if int goes 128bits still * within char size */ uintptr_t *vPtr = NULL; --format; goto L500; do { strEnd = strPtr = &strBuf[BUFSTRSIZE]; arg_flags = 0; arg_size = sizeof(int); arg_prec = arg_field = -1; sign = 0; next_ch: ch = *++format; EO_EXTENSIONS: if (!ch) break; switch (ch) { /* Removed Flags for demo */ /* Removed some Length modifiers for demo */ case 'l': if ((ch = *++format) != 'l') { /* and ll */ arg_size = sizeof(long); goto EO_EXTENSIONS; } arg_size = sizeof(long long); goto next_ch; case 'q': arg_size = sizeof(octle); goto next_ch; /* 128bit */ case 'L': arg_size = sizeof(long double); goto next_ch; /* long double */ /* Removed some Field-Width / Precision for demo */ case '.': ch = *++format; arg_flags |= PREC_PREFIX; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': n = ch - 48; while ((unsigned)((ch = *++format) - '0') <= 9) n = (n * 10) + (ch - '0'); if (ch != '$') { if (arg_flags & PREC_PREFIX) arg_prec = n; else arg_field = n; arg_flags &= ~PREC_PREFIX; goto EO_EXTENSIONS; } goto next_ch; /* Removed all Formats... but real */ case 'a': case 'e': case 'f': case 'g': case 'A': case 'E': case 'F': case 'G': /* gcc doesn't permit floats, it converts to doubles */ { unsigned char *(*toStr)(uint32_t [5], unsigned char **, int, int, int, unsigned); uint32_t w160[5]; uint32_t dp; /* 4 byte decimal point param */ int32_t sch; __decimalLoad((unsigned char*)&dp, params); if ((ch | 0x20) == 'a') ch -= 'A', toStr = realtohex; else ch -= 'E', toStr = realtostr; sch = format[-1]; /* force 128bit long double into quad parsing */ if (sch == 'q') { #if (__LDBL_MANT_DIG__ != 64) quad_parse: #endif sch = (('q' << 8) | ch); qtow(((vPtr == NULL) ? va_arg(arg, octle) : *(octle *)vPtr), w160); } else if (sch == 'L') { #if (__LDBL_MANT_DIG__ != 64) goto quad_parse; #endif sch = (('L' << 8) | ch); ldtow(((vPtr == NULL) ? va_arg(arg, long double) : *(long double *)vPtr), w160); } else { sch = (('l' << 8) | ch); dtow(((vPtr == NULL) ? va_arg(arg, double) : *(double *)vPtr), w160); } arg_flags |= (strBuf != buffer) << 7; strPtr = toStr(w160, &strBuf, sch, arg_prec, (int)arg_flags, dp); } if (strPtr == NULL) goto error; val_ul = (n = (int)(strPtr - strBuf)); strPtr = strBuf; arg_prec = 0; arg_flags &= ~ALT_PREFIX; /* grouping code removed for demo */ } /* end switch */ /* main output section */ /* adjust the different output factors */ if ((arg_prec -= n) < 0) arg_prec = 0; if ((arg_field -= (arg_prec + val_ul)) < 0) arg_field = 0; /* is there output space? removed for demo */ /* different formatting removed for demo */ if (sign != 0) *sPtr++ = sign; if (arg_prec) do *sPtr++ = '0'; while ((--arg_prec)); if (n & 1) *sPtr++ = *strPtr++; if (n & 2) { *(int16_t *)sPtr = *(int16_t *)strPtr; sPtr += sizeof(int16_t); strPtr += sizeof(int16_t); } if ((n >>= 2)) do { *(int32_t *)sPtr = *(int32_t *)strPtr; sPtr += sizeof(int32_t); strPtr += sizeof(int32_t); } while ((--n)); if (arg_field) do *sPtr++ = ' '; while ((--arg_field)); /* parsing of format string */ do { L500: ch = *++format; if ((sPtr >= slimit)) { --format; /* removed buffer extreme */ puts("this is demo, don't be cute!"); goto end; } *sPtr = ch; if (!ch) goto end; if (ch == '%') break; sPtr++; } while (1); } while (1); *sPtr = 0; end: if (strBuf != buffer) free(strBuf); return sPtr; error: if (strBuf != buffer) free(strBuf); *--sPtr = 0; return (*s - 12); } #define FORMAT_LINE_MAX 4096 static int phx_vfprintf(FILE *stream, const char *format, va_list arg) { //_vfprintf(FILE *stream, const char *format, va_list arg, locale_t ld) { unsigned char *sPtr, *wrPtr, *rdPtr; ssize_t to_write; size_t u8Sz = FORMAT_LINE_MAX; struct __params params; /* make sure writable stream, Remove writable for demo */ if ( (stream == NULL) //|| (!_io_writable(stream)) || (format == NULL) ) return -1; /* create buffer to 'write' into */ if ((wrPtr = (unsigned char*)malloc(u8Sz)) == NULL) return -1; /* label rdPtr as . If conversion, rdPtr will be allocated * and filled with UTF-8 version of */ rdPtr = (unsigned char*)format; /* UTF-8 input */ params.s = &wrPtr; params.slimit = (wrPtr + u8Sz); //params.ld = ld; sPtr = __format((const unsigned char *)rdPtr, ¶ms, arg); /* UTF-8 text ouput */ if ((to_write = (ssize_t)(sPtr - wrPtr)) > 0) to_write = fwrite((const void *)wrPtr, (size_t)1, (size_t)to_write, stream); free(wrPtr); return (int)to_write; } int phx_printf(const char *format, ...) { int written; va_list arg; va_start(arg, format); written = phx_vfprintf((FILE *)stdout, format, arg); //written = _vfprintf((FILE *)stdout, format, arg, NULL); va_end(arg); return written; } /* * realtostr - convert real numbers to formated latn numerical string * realtohex - convert real numbers to formated hexadecimal string * qtold - convert quadruple-precision number to a system defined long double * ldtod - convert a system defined long double to a double-precision number */ /* C langauge - string literals */ #ifdef __BIG_ENDIAN__ #define cconst16_t(a,b) (uint16_t)(((uint8_t)(a) << 8) | (uint8_t)(b)) #define cconst32_t(a,b,c,d) (uint32_t)(((uint8_t)(a) << 24) | ((uint8_t)(b) << 16) | ((uint8_t)(c) << 8) | (uint8_t)(d)) #else #define cconst16_t(a,b) (uint16_t)(((uint8_t)(b) << 8) | (uint8_t)(a)) #define cconst32_t(a,b,c,d) (uint32_t)(((uint8_t)(d) << 24) | ((uint8_t)(c) << 16) | ((uint8_t)(b) << 8) | (uint8_t)(a)) #endif /* compiler convience macro */ #ifndef __GNUC_PREREQ #if defined __GNUC__ && defined __GNUC_MINOR__ #define __GNUC_PREREQ(major,minor) \ (((__GNUC__ << 16) + __GNUC_MINOR__) >= (((major) << 16) + (minor))) #else #define __GNUC_PREREQ(major,minor) 0 #endif #endif /* __GNUC_PREREQ */ #ifndef __LDBL_MANT_DIG__ #define __LDBL_MANT_DIG__ LDBL_MANT_DIG #endif /* endian break down of uint64_t */ #ifdef __BIG_ENDIAN__ typedef struct { uint16_t w3; uint16_t w2; uint16_t w1; uint16_t w0; } _ull_ws; typedef struct { uint32_t hi; uint32_t lo; } _ull_ls; #elif defined (__LITTLE_ENDIAN__) typedef struct { uint16_t w0; uint16_t w1; uint16_t w2; uint16_t w3; } _ull_ws; typedef struct { uint32_t lo; uint32_t hi; } _ull_ls; #else #error: undefined endianness #endif typedef union { uint64_t ull; _ull_ls uls; _ull_ws uss; } _ull_t; #ifdef __ppc__ #define CNTLZW(x,y) \ do __asm__("cntlzw %0,%1" : "=r" (x) : "r" (y)); while(0) #elif defined (__i386__) #define CNTLZW(x,y) \ do { __asm__("bsr %1,%0" : "=r" (x) : "r" (y)); x = 31 - x; } while(0) #elif defined (__x86_64__) #define CNTLZW(x,y) \ do { __asm__("bsr %1,%0" : "=r" (x) : "r" (y)); x = 31 - x; } while(0) #define CNTLZD(x,y) \ do { __asm__("bsr %1,%0" : "=r" (x) : "r" (y)); x = 63 - x; } while(0) #else /* about 50% slower, 3 instructions vs 31 instructions */ #define CNTLZW(x,y) \ do { unsigned t = y; \ t |= t >> 1; \ t |= t >> 2; \ t |= t >> 4; \ t |= t >> 8; \ t |= t >> 16; \ t = t - ((t >> 1) & 0x55555555); \ t = (t & 0x33333333) + ((t >> 2) & 0x33333333); \ x = 32 - (((t + (t >> 4) & 0xF0F0F0F) * 0x1010101) >> 24); \ } while (0); #endif /* used for __fractional128 stack zeroing */ extern void *memset(void *, int, size_t); long double qtold(quadruple); double ldtod(long double); #define _CARRY 1000000000000000000ULL /* 0x 0DE0B6B3 A7640000 */ /* for rounding */ static const uint64_t _fcarry[] = { 5, 50, 500, 5000, 50000, 500000, 5000000, 50000000, 500000000, 5000000000ULL, 50000000000ULL, 500000000000ULL, 5000000000000ULL, 50000000000000ULL, 500000000000000ULL, 5000000000000000ULL, 50000000000000000ULL, 500000000000000000ULL, /*18*/ 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000, 10000000000ULL, 100000000000ULL, 1000000000000ULL, 10000000000000ULL, 100000000000000ULL, 1000000000000000ULL, 10000000000000000ULL, 100000000000000000ULL, _CARRY }; /* for cases: nan, inf, hex */ static const char *_const_outStr = "0123456789abcdefnanpinfe0x0.00000123456789ABCDEFNANPINFE0X0.0000"; /*** internal routines ***/ /* generic, div() moved to 64_t to address endianess */ static uint64_t divlu2r(uint64_t) __attribute__ ((noinline)); static void _ulltobuffer_18lr(uint64_t, unsigned char *) __attribute__ ((noinline)); static void _ulltobuffer_nlr(uint64_t, unsigned char *, int) __attribute__ ((noinline)); static int _first_digit(uint64_t); /* ldtostr specific */ static unsigned char *__n_char_sequence(uint32_t [5], unsigned char *, int); static uint32_t __fractional(_ull_t *, _ull_t); static uint32_t __fractional_128(_ull_t *, _ull_t); static uint32_t __positive_exponent_fp(_ull_t *, int); static uint32_t __negative_exponent_fp(_ull_t *, int); /* from fprintf... needs to be the same */ #define ZERO_PREFIX 0001 #define ALT_PREFIX 0002 #define LEFT_PREFIX 0004 #define SIGN_PREFIX 0010 #define SPC_PREFIX 0020 #define STAR_PREFIX 0040 #define GRP_PREFIX 0100 #define USGN_PREFIX 0200 #define PREC_PREFIX 0400 #define ALLOC_BUF USGN_PREFIX #define BUFSTRSIZE 64 /* XXX using upper 16 to pass 'unit' max from calcation prior to entry of x_exponent */ #define UPPER_TYPE 0x20 #define IS_UPPER_TYPE(a) (!((a) & UPPER_TYPE)) #define IS_QUAD(a) ((uint8_t)((a) >> 8) == (uint8_t)'q') #define IS_LDBL(a) ((uint8_t)((a) >> 8) == (uint8_t)'L') #define IS_DBL(a) ((uint8_t)((a) >> 8) == (uint8_t)'l') #define IS_FLT(a) ((uint8_t)((a) >> 8) == 0) #define TYPE_ZERO 0x40 #define IS_TYPE_ZERO(a) ((a) & TYPE_ZERO) #define TYPE_E 0 #define TYPE_F 1 #define TYPE_G 2 #define IS_TYPE_E(a) (((a) & 3) == 0) #define IS_TYPE_F(a) ((a) & TYPE_F) #define IS_TYPE_G(a) ((a) & TYPE_G) #define SURPRESS_NSEQ(a) (((a) & 4) != 0) #pragma mark *** Support Mathematics *** static uint64_t divlu2r(uint64_t c) { _ull_t q; uint32_t mConst = 2305843009U; // 2^61 / 10^9 uint32_t tConst = 1000000000U; q.uls.hi = (uint32_t)(c >> 32); q.uls.lo = ((uint32_t)c); if (q.uls.hi != 0) { uint32_t shift; CNTLZW(shift, q.uls.hi); q.uls.hi = (uint32_t)((((q.uls.hi << shift) | (q.uls.lo >> (32 - shift))) * (uint64_t)mConst) >> 32); if (shift > 3) q.uls.hi >>= (shift - 3); } else { q.uls.hi = (uint32_t)(((q.uls.lo | 1) * (uint64_t)mConst) >> (32 + 29)); } q.uls.lo = (uint32_t)(c - ((uint64_t)q.uls.hi * (uint32_t)tConst)); if (q.uls.lo >= tConst) q.uls.hi += 1, q.uls.lo -= tConst; return q.ull; } static void _ulltobuffer_18lr(uint64_t c, unsigned char *buf) { _ull_t a, b; int n; --buf; /* in 4.60, 2.30 fixed point integers */ b.ull = (uint64_t)(((uint32_t)c) << 2) * 2882303762U; a.ull = (uint64_t)(((uint32_t)(c >> 32)) << 2) * 2882303762U; n = 9; goto L1; do { b.uls.hi = (((b.uls.hi << 4) >> 4) * 10) + (b.uls.lo >> 28); a.uls.hi = (((a.uls.hi << 4) >> 4) * 10) + (a.uls.lo >> 28); b.uls.lo <<= 4; a.uls.lo <<= 4; L1: *++buf = (a.uls.hi >> 28) + '0'; buf[9] = (b.uls.hi >> 28) + '0'; } while ((--n)); } /* went for speed */ static void _ulltobuffer_nlr(uint64_t c, unsigned char *buf, int m) { _ull_t b; int n; --buf; /* in 4.60, 2.30 fixed point integers */ c <<= 2; b.ull = (uint64_t)((uint32_t)(c >> 32)) * 2882303762U; n = (m <= 9) ? m : 9; m -= 9; goto L1; do { b.uls.hi = (((b.uls.hi << 4) >> 4) * 10) + (b.uls.lo >> 28); b.uls.lo <<= 4; L1: *++buf = (b.uls.hi >> 28) + '0'; } while ((--n)); if (m > 0) { n = m; b.ull = (uint64_t)((uint32_t)c) * 2882303762U; goto L2; do { b.uls.hi = (((b.uls.hi << 4) >> 4) * 10) + (b.uls.lo >> 28); b.uls.lo <<= 4; L2: *++buf = (b.uls.hi >> 28) + '0'; } while ((--n)); } } /*#pragma GCC diagnostic ignored "-Wmisleading-indentation"*/ #if __GNUC_PREREQ(4,5) #pragma GCC diagnostic ignored "-Wmisleading-indentation" #endif /* uses 2 64s as swap loads, this prevents stalls */ /* returns 1 less than actual digits, the power of 10, or log10 value */ static int _first_digit(uint64_t a) { _ull_t reg; const uint64_t *fc; uint64_t b; reg.ull = a; fc = _fcarry; if (reg.uls.hi) { a = fc[35]; b = fc[34]; if (reg.ull >= a) return 17; a = fc[33]; if (reg.ull >= b) return 16; b = fc[32]; if (reg.ull >= a) return 15; a = fc[31]; if (reg.ull >= b) return 14; b = fc[30]; if (reg.ull >= a) return 13; a = fc[29]; if (reg.ull >= b) return 12; b = fc[28]; if (reg.ull >= a) return 11; if (reg.ull >= b) return 10; return 9; } else { uint32_t c, d; c = (uint32_t)fc[27]; d = (uint32_t)fc[26]; if (reg.uls.lo >= c) return 9; c = (uint32_t)fc[25]; if (reg.uls.lo >= d) return 8; d = (uint32_t)fc[24]; if (reg.uls.lo >= c) return 7; c = (uint32_t)fc[23]; if (reg.uls.lo >= d) return 6; if (reg.uls.lo >= c) return 5; if (reg.uls.lo >= 10000U) return 4; if (reg.uls.lo >= 1000U) return 3; if (reg.uls.lo >= 100U) return 2; if (reg.uls.lo >= 10U) return 1; return 0; } } /*#pragma GCC diagnostic pop*/ #if __GNUC_PREREQ(4,5) #pragma GCC diagnostic pop #endif #pragma mark *** Support Subroutines *** static unsigned char * __n_char_sequence(uint32_t w160[5], unsigned char *sPtr, int type) { const char *_outvalues; unsigned is_zero; /* e80 long double type, 63rd bit required set reguardless if nan or inf */ /* if is_zero is , then is infinity, else nan */ /* any info in nan is considered n-char-sequence */ /* bit 62 of nan, when 0, and others [61-0] set, is considered signalling nan. * bit 62 of nan, when 1, if others 0, special quiet nan, others [61-0] set, * considered quiet signaling nan. */ is_zero = ((w160[3] & 0x7FFFFFFF) | w160[2] | w160[1] | w160[0]); _outvalues = _const_outStr + 16; if (IS_UPPER_TYPE(type)) _outvalues += 32; if (!is_zero){ *((uint32_t *)sPtr) = ((int32_t *)_outvalues)[1]; return (sPtr + 3); } *((uint32_t *)sPtr) = ((int32_t *)_outvalues)[0]; sPtr += 3; if (SURPRESS_NSEQ(type)) return sPtr; /* for doubles 53bits exposed, shift off hidden */ if (!IS_LDBL(type)) *(uint64_t*)&w160[2] <<= 1; if (IS_QUAD(type)) { w160[2] |= (w160[1] & 0x40000000) >> 30; *(uint64_t*)&w160[0] <<= 2; } /* now output the n-char-sequence */ /* (0x hexdigits ) * e80,16nibbles, doubles,13nibbles, floats,5.5nibbles, */ *((uint16_t *)sPtr) = cconst16_t('(','0'); sPtr += 2; *sPtr = _outvalues[9]; /* 'x' or 'X' */ sPtr++; _outvalues -= 16; sPtr[ 0] = _outvalues[(w160[3] >> 28)]; sPtr[ 1] = _outvalues[((w160[3] << 4) >> 28)]; sPtr[ 2] = _outvalues[((w160[3] << 8) >> 28)]; sPtr[ 3] = _outvalues[((w160[3] << 12) >> 28)]; sPtr[ 4] = _outvalues[((w160[3] << 16) >> 28)]; sPtr[ 5] = _outvalues[((w160[3] << 20) >> 28)]; if (IS_FLT(type)) { sPtr += 6; goto end_brace; } sPtr[ 6] = _outvalues[((w160[3] << 24) >> 28)]; sPtr[ 7] = _outvalues[((w160[3] << 28) >> 28)]; sPtr[ 8] = _outvalues[(w160[2] >> 28)]; sPtr[ 9] = _outvalues[((w160[2] << 4) >> 28)]; sPtr[10] = _outvalues[((w160[2] << 8) >> 28)]; sPtr[11] = _outvalues[((w160[2] << 12) >> 28)]; sPtr[12] = _outvalues[((w160[2] << 16) >> 28)]; if (IS_DBL(type)) { sPtr += 13; goto end_brace; } sPtr[13] = _outvalues[((w160[2] << 20) >> 28)]; sPtr[14] = _outvalues[((w160[2] << 24) >> 28)]; sPtr[15] = _outvalues[((w160[2] << 28) >> 28)]; sPtr += 16; if (IS_LDBL(type)) goto end_brace; /* undo w160 formatting of '[0]' for number */ sPtr[ 0] = _outvalues[(w160[1] >> 28)]; sPtr[ 1] = _outvalues[((w160[1] << 4) >> 28)]; sPtr[ 2] = _outvalues[((w160[1] << 8) >> 28)]; sPtr[ 3] = _outvalues[((w160[1] << 12) >> 28)]; sPtr[ 4] = _outvalues[((w160[1] << 16) >> 28)]; sPtr[ 5] = _outvalues[((w160[1] << 20) >> 28)]; sPtr[ 6] = _outvalues[((w160[1] << 24) >> 28)]; sPtr[ 7] = _outvalues[((w160[1] << 28) >> 28)]; sPtr[ 8] = _outvalues[(w160[0] >> 28)]; sPtr[ 9] = _outvalues[((w160[0] << 4) >> 28)]; sPtr[10] = _outvalues[((w160[0] << 8) >> 28)]; sPtr[11] = _outvalues[((w160[0] << 12) >> 28)]; sPtr += 12; end_brace: *sPtr = ')'; return (sPtr + 1); } /* converts fractional portion of IEEE754 value to * base 10 representation, in 10^18 ull format. */ static uint32_t __fractional(_ull_t *uPtr, _ull_t reg) { uint32_t last = 1; uint64_t mlConst = 3814697265625ULL; /* 5^18 (42bits) 0x378 2DACE9D9 */ /* 0x7FFFFFFF FFFFFFFF start 0x7FFFE 1FFF FFFFFFFF split */ uPtr[0].ull = (reg.uls.hi >> 13) * mlConst; /* 18 + 45 == 63 */ /* test added, rather than just '&='. This is guess that small 'standard' * fractions might be used */ if (((reg.uls.hi &= 0x00001FFF) | reg.uls.lo) != 0) { /* the left over 13bits from hi with 5bits from lo (45-18=>27). 0x00001FFF FFFFFFFF start 0x00001FFFF8 7FFFFFF split */ uint64_t u64; last += 1; uPtr[0].ull += (u64 = ((uint32_t)(reg.ull >> 27)) * mlConst) >> 18; uPtr[1].ull = ((uint32_t)u64 & 0x3FFFF) * mlConst; if ((reg.uls.lo &= 0x7FFFFFF) != 0) { /* 0x00000000 07FFFFFF start 0x 7FFFE 1FF split */ _ull_t scratch; last += 1; uPtr[0].ull += (u64 = (reg.uls.lo >> 9) * mlConst) >> 36; scratch.ull = (uint32_t)((uint32_t)u64 & 0x3FFFF) * mlConst; uPtr[2].ull = (scratch.uls.lo & 0x3FFFF) * mlConst; u64 = uPtr[1].ull + (scratch.ull >> 18) + (((uint32_t)(u64 >> 18)) & 0x3FFFF) * mlConst; if (u64 >= _CARRY) u64 -= _CARRY, uPtr[0].ull += 1ULL; uPtr[1].ull = u64; if ((reg.uls.lo &= 0x1FF) != 0) { /* 0x00000000 000001FF end of 63bit resolution */ uint32_t u32; last += 1; // 54*10^18 u32 = (uint32_t)((scratch.ull = reg.uls.lo * mlConst) >> 27); uPtr[0].ull += (uint64_t)(scratch.uls.hi >> 13); // 54*10^18*10^18 /* first 18 of 45 remainder */ u64 = ((u32 & 0x3FFFF) * mlConst) + uPtr[1].ull; /* second 18 of 45 remainder, scratch.uls.lo &= 0x07FFFFFF; */ u32 = scratch.uls.lo & 0x07FFFFFF; scratch.ull = (u32 >> 9) * mlConst; u64 += (scratch.ull >> 18); uPtr[2].ull += (scratch.uls.lo & 0x3FFFF) * mlConst; if (uPtr[2].ull >= _CARRY) uPtr[2].ull -= _CARRY, u64 += 1ULL; uPtr[1].ull = u64; /* last 9 of 45 remainder */ scratch.ull = ((u32 & 0x000001FF) << 9) * mlConst; uPtr[1].ull += (uint64_t)(scratch.uls.hi >> 4); if (uPtr[1].ull >= _CARRY) uPtr[1].ull -= _CARRY, uPtr[0].ull += 1ULL; /* of scratch2 36 remainder */ scratch.uls.hi &= 0x0000000F; uPtr[2].ull += ((uint32_t)(scratch.ull >> 18)) * mlConst; uPtr[2].ull += ((scratch.uls.lo & 0x3FFFF) * mlConst) >> 18; if (uPtr[2].ull >= _CARRY) uPtr[2].ull -= _CARRY, uPtr[1].ull += 1ULL; /* of 18 remainder */ scratch.uls.lo = (uint32_t)((scratch.uls.lo & 0x3FFFF) * mlConst) & 0x3FFFF; uPtr[3].ull = scratch.uls.lo * mlConst; } } } return last; } /* this is for second register of frational */ /* expected entry use: __fractional(uPtr, reg2); * expected range of reg2: 0x7fffffffffffc000 thru 0x0000000000004000 * reg2 expectation: shifted right 1, matching reg 1, where high bit is bit 62 * to avoid initial allocation problems, create stack use instead of uPtr. * also avoids re-zeroing of uPtr regs * uPtr parameter for adding to reg1's converted fractional, the final result */ static uint32_t __fractional_128(_ull_t *uPtr, _ull_t reg) { uint32_t last; _ull_t sreg[15]; /* StackREGisters */ memset(sreg, 0, sizeof(sreg)); /* create regs of multiplly by 5^63 (3 max) */ __fractional(sreg, reg); /* creates sr3-sr6, affects uPtr[4] */ last = __fractional(&sreg[ 3], sreg[0]); if ((sreg[1].ull)) { last = 5; // creates uPtr[5] value /* creates sr7-sr10 */ __fractional(&sreg[ 7], sreg[1]); sreg[4].ull += sreg[7].ull; sreg[5].ull += sreg[8].ull; sreg[6].ull += sreg[9].ull; sreg[7].ull = sreg[10].ull; if ((sreg[2].ull)) { /* creates sr11-sr14*/ __fractional(&sreg[11], sreg[2]); sreg[5].ull += sreg[11].ull; sreg[6].ull += sreg[12].ull; sreg[7].ull += sreg[13].ull; uPtr[6].ull = sreg[14].ull; } if (sreg[7].ull >= _CARRY) sreg[6].ull += 1ULL, sreg[7].ull -= _CARRY; uPtr[5].ull = sreg[7].ull; while (sreg[6].ull >= _CARRY) sreg[5].ull += 1ULL, sreg[6].ull -= _CARRY; } /* sreg3-sreg8 set for addin transfer to uPtr */ uPtr[4].ull = sreg[6].ull; uPtr[3].ull += sreg[5].ull; while (uPtr[3].ull >= _CARRY) uPtr[2].ull += 1ULL, uPtr[3].ull -= _CARRY; uPtr[2].ull += sreg[4].ull; if (uPtr[2].ull >= _CARRY) uPtr[1].ull += 1ULL, uPtr[2].ull -= _CARRY; uPtr[1].ull += sreg[3].ull; if (uPtr[1].ull >= _CARRY) uPtr[0].ull += 1ULL, uPtr[1].ull -= _CARRY; return last; } static uint32_t __positive_lowexp128(_ull_t *uPtr, unsigned pw2, _ull_t reg0, _ull_t reg1) { uint32_t last, pw10 = 0; uint32_t unit_18 = 0; if (pw2 < 64) { /* create shifted reg0, reg1 */ /* special case: reg1 has 49bits, a shift of >= 49 bits (*2^49+) * means only 1 reg used for __fractional() */ uint32_t shiftb = 63 - pw2; if (pw2 <= 48) { /* lowest drop of 15 bits, max of 63 */ uPtr[0].ull = reg0.ull >> shiftb; /* pw2 == 0 clears explicit '1' */ reg0.ull = reg0.ull << pw2, reg0.uls.hi &= 0x7fffffff; /* pw2 == 0 ORs high bit explicit '0' */ reg0.ull |= reg1.ull >> shiftb; reg1.ull = reg1.ull << pw2, reg1.uls.hi &= 0x7fffffff; last = __fractional(&uPtr[1], reg0); if (reg1.ull != 0) last = __fractional_128(&uPtr[1], reg1); } else { uint32_t hi; /* @49 shiftb = 14, @63 shiftb = 0 */ if (shiftb != 0) { reg1.ull = (reg1.ull >> shiftb); reg1.uls.hi |= (reg0.uls.lo << (32 - shiftb)) >> 1; reg0.ull >>= shiftb; } else { /* values 8-15 possible, reducing >= _CARRY loop * 8 9 10 11 12 13 14 15 * actual 9, 10, 11, 12, 13, 14, 16, 17 e18 starting values */ hi = (uint32_t)((reg0.uss.w3 >> 12) + 1); goto hi_adjusted; } /* reg0 holds shifted whole value */ /* default action */ uPtr[0].ull = reg0.ull; if ((shiftb < 4) || ((shiftb == 4) && (reg0.ull >= _CARRY))) { /* values 0-7 possible, reducing >= _CARRY loop * 0 1 2 3 4 5 6 7 * actual 1, 1, 2, 3, 4, 5, 6, 8, e18 starting values */ hi = (uint32_t)(reg0.uss.w3 >> 12); if ((hi)) hi_adjusted: reg0.ull -= hi * _CARRY; while (reg0.ull >= _CARRY) hi += 1, reg0.ull -= _CARRY; uPtr[0].ull = (uint64_t)hi, ++uPtr; uPtr[0].ull = reg0.ull; pw10 = 18; } last = __fractional(&uPtr[1], reg1); unit_18 = 1; } } else { uint32_t n28s; _ull_t reg; /* most used 64 bit gpr */ /* 64 <= pw2 < 112, 1 fractional reg, multiple whole regs */ uint32_t hi = (uint32_t)((reg0.uss.w3 >> 12) + 1); reg0.ull -= hi * _CARRY; while (reg0.ull >= _CARRY) hi += 1, reg0.ull -= _CARRY; uPtr[0].ull = (uint64_t)hi; uPtr[1].ull = reg0.ull; /* max 50 ((111-63) = 49) [0-49] */ pw2 -= 63; uint32_t shiftb = 63 - pw2; if ((n28s = (pw2 >= 28))) pw2 -= 28; if ((pw2)) { /* shift range [27-1] */ _ull_t scratch; /* shift [18-9] left [27-1] */ uPtr[0].uls.lo <<= pw2; /* shift remainder (uPtr[1], size < 10^19) left [27-1] */ reg.ull = divlu2r(uPtr[1].ull); scratch.ull = divlu2r((uint64_t)reg.uls.hi << pw2); uPtr[0].ull += scratch.uls.hi; reg.ull = ((uint64_t)scratch.uls.lo * 1000000000U) + ((uint64_t)reg.uls.lo << pw2); /* do carry, set uPtr[1] */ if (reg.ull >= _CARRY) uPtr[0].ull += 1ULL, reg.ull -= _CARRY; uPtr[1].ull = reg.ull; } unit_18 = 1; pw10 = 18; if (n28s) { /* shift of 28 multiples */ /* set initial limiter to current regs valid (<= unit_max) */ _ull_t *cap = &uPtr[unit_18]; _ull_t *input, *output, next; reg.ull = (output = (input = uPtr))->ull; output->ull = 0; next.ull = input[1].ull; unit_18++; if (reg.ull >= 3725290298ULL) goto L1; unit_18--; output->ull = reg.ull << 28; output--; do { _ull_t scratch0; uint32_t u32; reg.ull = next.ull; next.ull = (++input)[1].ull; output++; L1: scratch0.ull = divlu2r(reg.ull); u32 = (uint32_t)(((uint64_t)scratch0.uls.hi * 2305843009) >> 32) >> 1; scratch0.ull = ((((uint64_t)scratch0.uls.hi << 28) - ((uint64_t)u32 * 1000000000U)) * 1000000000U) + ((uint64_t)scratch0.uls.lo << 28); if (scratch0.ull >= _CARRY) u32 += 1, scratch0.ull -= _CARRY; output[1].ull = scratch0.ull; if ((output->ull += u32) >= _CARRY) output->ull -= _CARRY, output[-1].ull += 1; } while (input < cap); } pw10 += ((unit_18 - 1) * 18); /* reg0 has addin to shifted whole value */ pw2 = 63 - shiftb; reg0.ull = (reg1.ull >> shiftb); reg0.ull += uPtr[1].ull; if (reg0.ull >= _CARRY) uPtr[0].ull += 1, reg0.ull -= _CARRY; uPtr[1].ull = reg0.ull; /* put fractional bits into place */ reg1.ull = reg1.ull << pw2, reg1.uls.hi &= 0x7fffffff; last = __fractional(&uPtr[2], reg1); last++; } return ((pw10 << 16) | ((last + unit_18) << 4) | last); } static uint32_t __positive_lowexp(_ull_t *uPtr, unsigned pw2, _ull_t reg0) { /* do less than 63 code */ uint16_t last, whole_incr, pw10; uint64_t up0; pw10 = (whole_incr = 0); if (pw2 < 31) { up0 = (uint64_t)(reg0.uls.hi >> (31 - pw2)); reg0.uls.hi = (reg0.uls.hi << (pw2 + 1)) >> (pw2 + 1); } else if (pw2 == 31) { up0 = (uint64_t)reg0.uls.hi; reg0.uls.hi = 0; } else { uint32_t shift = pw2 - 32; up0 = reg0.ull >> (31 - shift); reg0.uls.lo = (reg0.uls.lo << (shift + 1)) >> (shift + 1); if ((shift > 27) || ((shift == 27) && (up0 >= _CARRY))) { do uPtr[0].ull += 1ULL; while ((up0 -= _CARRY) >= _CARRY); uPtr++, whole_incr++, pw10 = 18; } } uPtr[0].ull = up0; last = 1; if ((reg0.ull <<= pw2) != 0) last += __fractional(&uPtr[1], reg0); return ((pw10 << 16) | ((last + whole_incr) << 4) | last); } /* now grows low address to high address, or left to right numerically */ /* expects implicit/explicit 1 as 63rd bit */ /* multiplies by 2^28, 2^28 used to keep large values within 10^18 ulls */ /* addition of such values create at max 'overflow' of 1 */ /* valid division range on /28 [0-16383] */ static uint32_t __positive_exponent_fp(_ull_t *uPtr, int type) { unsigned pw2, pw10, n28s; /* power of 2, power of 10, shifts of 28 */ _ull_t reg0; /* most used 64 bit gpr */ /* track reg expansion, initial is to read 2 registers (&uPtr[unit_18]) */ uint32_t unit_18 = 1; /* load 'passed' data */ pw2 = uPtr[0].uss.w0 - 16383; /* note: retain explicit '1' */ reg0.ull = uPtr[1].ull; uPtr[1].ull = (uPtr[0].ull = 0); if (uPtr[2].ull != 0) { _ull_t reg1; uint32_t hi; reg1.ull = uPtr[2].ull; uPtr[2].ull = 0; if (pw2 < 112) return __positive_lowexp128(uPtr, pw2, reg0, reg1); /* set up with all whole */ hi = (uint32_t)((reg0.uss.w3 >> 12) + 1); reg0.ull -= hi * _CARRY; while (reg0.ull >= _CARRY) hi += 1, reg0.ull -= _CARRY; uPtr[0].ull = (uint64_t)hi << 49; uPtr[1].ull = reg1.ull >> 14; /* reg1 free to use as gpr */ reg0.ull = divlu2r(reg0.ull); reg1.ull = divlu2r((uint64_t)reg0.uls.hi << 21); uPtr[0].ull += (uint64_t)reg1.uls.hi << 28; reg0.ull = ((uint64_t)reg1.uls.lo * 1000000000U) + ((uint64_t)reg0.uls.lo << 21); reg0.ull = divlu2r(reg0.ull); reg1.ull = divlu2r((uint64_t)reg0.uls.hi << 28); uPtr[0].ull += (uint64_t)reg1.uls.hi; uPtr[1].ull += ((uint64_t)reg1.uls.lo * 1000000000U) + ((uint64_t)reg0.uls.lo << 28); if (uPtr[1].ull >= _CARRY) uPtr[0].ull += 1, uPtr[1].ull -= _CARRY; pw10 = 18; pw2 -= 112; /* uPtr[0] range [10384593717069655-5192296858534827] */ if ((pw2 -= ((n28s = (pw2 * 18725U) >> 19) * 28)) != 0) { _ull_t next, *output; uint32_t u32; next.ull = uPtr[1].ull; reg0.ull = divlu2r(uPtr[0].ull); /* 2^61 / 10^9 based on << 28 forces all into higher register */ u32 = (uint32_t)(((uint64_t)reg0.uls.hi * 2305843009U) >> 32) >> (29 - pw2); reg0.ull = ((((uint64_t)reg0.uls.hi << pw2) - ((uint64_t)u32 * 1000000000U)) * 1000000000U) + ((uint64_t)reg0.uls.lo << pw2); if (reg0.ull >= _CARRY) u32 += 1, reg0.ull -= _CARRY; if ((u32)) { uPtr[1].ull = reg0.ull; uPtr[0].ull = (uint64_t)u32; /* creates new register, increases uPtr[unit_18] limit (3) */ /* adds pw10 += 18 by return math addition of unit_18 */ unit_18++; output = &uPtr[1]; } else { /* no new register, still uPtr[unit_18] limit (2) */ uPtr[0].ull = reg0.ull; output = &uPtr[0]; } reg0.ull = divlu2r(next.ull); u32 = (uint32_t)(((uint64_t)reg0.uls.hi * 2305843009U) >> 32) >> (29 - pw2); reg0.ull = ((((uint64_t)reg0.uls.hi << pw2) - ((uint64_t)u32 * 1000000000U)) * 1000000000U) + ((uint64_t)reg0.uls.lo << pw2); if (reg0.ull >= _CARRY) u32 += 1, reg0.ull -= _CARRY; output[1].ull = reg0.ull; /* case: * w160[4] = 0x4092, w160[3] = 0x8001a8fb, w160[2] = 0xec8ed23f; */ if ((output[0].ull += u32) >= _CARRY) output[-1].ull += 1, output[0].ull -= _CARRY; } } else { uint32_t hi; if (pw2 < 63) return __positive_lowexp(uPtr, pw2, reg0); /* below, all use min of 2 regs (uPtr) */ /* creates new register, still uPtr[unit_18] limit (2) */ /* values 8-15 possible, reducing >= _CARRY loop * 8 9 10 11 12 13 14 15 * actual 9, 10, 11, 12, 13, 14, 16, 17 e18 starting values */ hi = (uint32_t)((reg0.uss.w3 >> 12) + 1); reg0.ull -= hi * _CARRY; while (reg0.ull >= _CARRY) hi += 1, reg0.ull -= _CARRY; uPtr[0].ull = (uint64_t)hi; uPtr[1].ull = reg0.ull; pw10 = 18; pw2 -= 63; if ((pw2 -= ((n28s = (pw2 * 18725U) >> 19) * 28)) != 0) { /* shift range [27-1] */ _ull_t scratch; /* shift [18-9] left [27-1] */ uPtr[0].uls.lo <<= pw2; /* shift remainder (uPtr[1], size < 10^19) left [27-1] */ reg0.ull = divlu2r(uPtr[1].ull); scratch.ull = divlu2r((uint64_t)reg0.uls.hi << pw2); uPtr[0].ull += scratch.uls.hi; reg0.ull = ((uint64_t)scratch.uls.lo * 1000000000U) + ((uint64_t)reg0.uls.lo << pw2); /* do carry, set uPtr[1] */ if (reg0.ull >= _CARRY) uPtr[0].ull += 1ULL, reg0.ull -= _CARRY; uPtr[1].ull = reg0.ull; } } if (n28s != 0) { /* shift of 28 multiples */ /* set max regs use, cap used as pointer compare limiter */ uint32_t unit_max = type >> 16; /* set initial limiter to current regs valid (<= unit_max) */ _ull_t *cap = &uPtr[unit_18]; do { /* start at base */ /* create loop counter, each 28 loop expects increase of new * reg created. A test used to verify if one is created. loop counter * then used to increase both pw10, and possibly the ending of * last math reg used (cap) */ _ull_t *input, *output, next; reg0.ull = (output = (input = uPtr))->ull; output->ull = 0; next.ull = input[1].ull; /* critical point where addin to reg0.ull can cause overflow * and propagation into a prior ull */ unit_18++; if (reg0.ull >= 3725290298ULL) goto L1; unit_18--; /* shift of reg0.ull with addin remains below 10^19 */ output->ull = reg0.ull << 28; /* undo loop increment of first addin location, * and lcnt (new reg increase) initial assumption */ output--; do { _ull_t scratch0; uint32_t u32; reg0.ull = next.ull; next.ull = (++input)[1].ull; output++; L1: scratch0.ull = divlu2r(reg0.ull); u32 = (uint32_t)(((uint64_t)scratch0.uls.hi * 2305843009) >> 32) >> 1; scratch0.ull = ((((uint64_t)scratch0.uls.hi << 28) - ((uint64_t)u32 * 1000000000U)) * 1000000000U) + ((uint64_t)scratch0.uls.lo << 28); if (scratch0.ull >= _CARRY) u32 += 1, scratch0.ull -= _CARRY; output[1].ull = scratch0.ull; if ((output->ull += u32) >= _CARRY) output->ull -= _CARRY, output[-1].ull += 1; } while (input < cap); /* tail pointer update based on new reg add vs needs for precision */ /* limit allowed to increase above cap for pw10 counter */ cap = &uPtr[((unit_18 > unit_max) ? unit_max : unit_18)]; } while ((--n28s)); /* convert regs used into pw10. add to pre-28 value */ } pw10 += ((unit_18 - 1) * 18); return ((pw10 << 16) | ((unit_18 + 1) << 4)); } static uint32_t __negative_exponent_fp(_ull_t *uPtr, int type) { int pw2, pw10; /* power of 2, power of 10 */ uint32_t index; _ull_t reg0; pw10 = -18; pw2 = 16383 - uPtr[0].uls.lo; pw2 -= (index = ((uint32_t)pw2 == ((IS_DBL(type)) ? 1023U : 16383U))); /* remove explicit '1'. add back later as 1e18 if not denormal */ reg0.ull = uPtr[1].ull; reg0.uls.hi &= 0x7FFFFFFF; uPtr[1].ull = 0; if (uPtr[2].ull != 0) { _ull_t reg1; reg1.ull = uPtr[2].ull; uPtr[2].ull = 0; __fractional(uPtr, reg0); __fractional_128(uPtr, reg1); if (index == 0) uPtr->ull += _CARRY; index = 8; } else if (reg0.ull != 0) { /* convert fractional to base 10 */ __fractional(uPtr, reg0); /* 2 cases: denormal, normal */ if (index == 0) uPtr->ull += _CARRY; index = 5; } else { if (pw2 < 18) { /* straight power of 2 */ /* first 17, [-1, -17]... shift of 10^18, * 10^18/18446744073709551616 = * 0.0542101086242752217003726400434970855712 * 0.0542101086242752217003726400434970855712 * 2^64 = * 10^18 = 0x232830643 * 2^32 + 2808348672 */ /* no whole, all fraction */ uPtr->uls.lo = ((uint32_t)232830643U << (32 - pw2)) | ((uint32_t)2808348672U >> pw2); uPtr->uls.hi = (uint32_t)232830643U >> pw2; return (uint32_t)((-(18 << 16)) | (1 << 4) | 8); } /* there are 18 trailing zeroes to _CARRY */ uPtr[0].ull = _CARRY >> 18; pw2 -= 18; index = 5; } /* 5^18 * 2^2 * 2^16 = 10^18 */ /* each shift into new ull is multiplied by 10^18. * or 2^-16 * 10^18 = shift * 5^18 * 2^2 */ if (pw2 & 0xf) { /* partial 2^-16, use of shorter, quicker multiply */ /* cant use shift on ulls, when partial gcc adds extra coding, test of 32 */ /* use this code, smaller, faster than using unsigned long gcc code */ uint32_t m = pw2 & 0xf; uint32_t shift = (16 - m); uint32_t shiftb = shift + 16; uint64_t fConst = 15258789062500ULL; if (index == 8) { uPtr[7].ull = ((uint16_t)(uPtr[6].uls.lo << shift)) * fConst; uPtr[6].uls.lo = (uPtr[6].uls.hi << shiftb) | (uPtr[6].uls.lo >> m); uPtr[6].uls.hi >>= m; uPtr[6].ull += ((uint16_t)(uPtr[5].uls.lo << shift)) * fConst; uPtr[5].uls.lo = (uPtr[5].uls.hi << shiftb) | (uPtr[5].uls.lo >> m); uPtr[5].uls.hi >>= m; uPtr[5].ull += ((uint16_t)(uPtr[4].uls.lo << shift)) * fConst; uPtr[4].uls.lo = (uPtr[4].uls.hi << shiftb) | (uPtr[4].uls.lo >> m); uPtr[4].uls.hi >>= m; uPtr[4].ull += ((uint16_t)(uPtr[3].uls.lo << shift)) * fConst; } else { uPtr[4].ull = ((uint16_t)(uPtr[3].uls.lo << shift)) * fConst; } uPtr[3].uls.lo = (uPtr[3].uls.hi << shiftb) | (uPtr[3].uls.lo >> m); uPtr[3].uls.hi >>= m; uPtr[3].ull += ((uint16_t)(uPtr[2].uls.lo << shift)) * fConst; uPtr[2].uls.lo = (uPtr[2].uls.hi << shiftb) | (uPtr[2].uls.lo >> m); uPtr[2].uls.hi >>= m; uPtr[2].ull += ((uint16_t)(uPtr[1].uls.lo << shift)) * fConst; uPtr[1].uls.lo = (uPtr[1].uls.hi << shiftb) | (uPtr[1].uls.lo >> m); uPtr[1].uls.hi >>= m; uPtr[1].ull += ((uint16_t)(uPtr[0].uls.lo << shift)) * fConst; uPtr[0].uls.lo = (uPtr[0].uls.hi << shiftb) | (uPtr[0].uls.lo >> m); uPtr[0].uls.hi >>= m; /* address case of denormals, below 'flag' will write pre-regs otherwise */ if ((uPtr[0].uls.hi | uPtr[0].uls.lo) == 0) { pw10 -= 18; uPtr[0].ull = uPtr[1].ull, uPtr[1].ull = uPtr[2].ull; uPtr[2].ull = uPtr[3].ull, uPtr[3].ull = uPtr[4].ull; if (index == 8) { uPtr[4].ull = uPtr[5].ull, uPtr[5].ull = uPtr[6].ull; uPtr[6].ull = uPtr[7].ull, uPtr[7].ull = 0; } else { uPtr[4].ull = 0; } } } /* 2^-16, multiples */ if ((pw2 >> 4) != 0) { _ull_t *regPtr, *limit; pw2 >>= 4; limit = uPtr + (unsigned)(type >> 16); int16_t fcnt = 0; /* code approaches best written assembler, addressed most stalls * inotherwords, use caution rewording, huge penalties. */ do { uint64_t work; uint32_t w0; int32_t flag; regPtr = uPtr; w0 = (uint32_t)regPtr->uss.w0; fcnt += (flag = -((regPtr->ull = regPtr->ull >> 16) == 0)); work = (++regPtr)->ull; do { uint64_t m0 = w0 * 15258789062500ULL; w0 = (uint16_t)work; regPtr[flag].ull = (work = m0 + (work >> 16)); ++regPtr; } while ((regPtr <= limit) && ((work = regPtr->ull) != 0)); regPtr[flag].ull = w0 * 15258789062500ULL; } while ((--pw2) != 0); index = (int)(regPtr - uPtr) + 1; pw10 += fcnt * 18; } /* only pw10 used in function as exponent of power 10 */ return ((pw10 << 16) | (index << 4) | 8); } /* enters with 'sSize = strPtr - sPtr;'. * s, sPtr could have been malloc'd on entry, but expected to have been * cleared, output used, prior to entry. Possibly previous real allocation, * output to 'buffer', FILE or supplied, and not free'd. * printf() supplies BUFSTRSIZE on entry, realtostr supplies 45 ulls. * BUFSTRSIZE considered large for normal requests, * 45 ulls covers all needs for double reals. * all real types generally need 8, [±][0][.](precision)[e±000[0]] * IS_TYPE_F adds exponent to count, [±][0](exponent)[.](precision)[e±000[0]] * IS_TYPE_F has possiblity of grouping: * GRP_PREFIX assumes thousands, xxx,xxx,xxx,xxx etc. = 6 per ull * can't reuse precision 'reg's as double duty... 18 bytes / 8 byte ull * bufS(string) must come first for free'ing of allocation */ #pragma mark *** External Interfaces *** #if (!__GNUC_PREREQ(4,5)) #pragma GCC diagnostic ignored "-Wuninitialized" #endif unsigned char * realtostr(uint32_t w160[5], unsigned char **s, int type, int arg_prec, int arg_flags, unsigned decimal) { /*smash_stack();*/ int exponent, rpos; /* Rounding POSition */ uint32_t (*math_func)(_ull_t *, int); unsigned char *sPtr, dblt_sign; /* set up for percision of 792 digits, 18/ull * 44ull */ /* malloc if exponent and percision dictate, 44ull = 352 bytes */ /* reg: static reserved, regs: actual reg use address (stack or allocated) */ _ull_t reg[45], *regs; /* supress warning due to zero jumps */ #if __GNUC_PREREQ(4,5) #pragma GCC diagnostic ignored "-Wuninitialized" #endif /* utPtr: top of stack, ubPtr: bottom of stack, ufPtr: fractional start */ _ull_t *utPtr, *ubPtr, *ufPtr; int d0; /* Digit0 (first digit) */ #if __GNUC_PREREQ(4,5) #pragma GCC diagnostic pop #endif uint64_t a; unsigned strsz, pw2, packed_return; /*** input defaults ***/ char *decimalPtr = (char*)&decimal; if (arg_prec < 0) arg_prec = 6; /* CASE 'g': initial zero(or first digit) is significant digit */ if ((arg_prec) && (IS_TYPE_G(type))) --arg_prec; /*** sign ***/ if ((w160[4] & 0x8000) != 0) dblt_sign = '-'; else { dblt_sign = 0; if ((arg_flags & (SIGN_PREFIX | SPC_PREFIX)) != 0) dblt_sign = (arg_flags & SPC_PREFIX) ? ' ' : '+'; } /*** exponent ***/ /* extra math used for limiting register updating in math units. Also * allows allocation size when precision exceeds static buffersize */ if ((exponent = w160[4] & 0x7FFF) == 0x7FFF) { /* inf or nan, nan(n-char-sequence) */ *(sPtr = *s) = dblt_sign, sPtr += (dblt_sign != 0); return __n_char_sequence(w160, sPtr, (type | ((arg_flags & ALT_PREFIX) << 1))); } if (exponent < 0x3FFF) { uint32_t reg_min = (IS_QUAD(type)) ? 7 : 4; math_func = __negative_exponent_fp; pw2 = reg_min; /* these 'need' pw10 pre-calculations for extra decisions */ /* !E can result in '0', no caculation needed, * >> 4 == 0 results in use of default, * default pw2 == 4 covers precision < 64 */ /* XXX with !e80, does pw2 cover precision < (NEG_REG_BITS + 1)? */ if ( (!IS_TYPE_E(type)) || ( (((uint32_t)(0x3FFF - exponent) >> 4) != 0) && (arg_prec >= 63) ) ) { pw2 = 0x3FFF - exponent; /* number of prefixing zeroes */ pw2 = (((uint32_t)(((uint64_t)((uint32_t)pw2) * (uint32_t)2585827973U) >> 32)) >> 1); /* check for all zeroes on a 'f' request */ if ( (!IS_TYPE_E(type)) && ((unsigned)arg_prec < pw2) ) { if (pw2 > reg_min) { pw2 = reg_min; exponent = 0; *(uint64_t*)&w160[2] = 0; /* XXX do I need? */ *(uint64_t*)&w160[0] = 0; if (IS_TYPE_G(type)) type |= TYPE_F; type |= TYPE_ZERO; /* must not see as denormal for doubles */ /* remove 'double' label, keep or set 'long double' */ type &= ~(UPPER_TYPE << 8); } else { pw2 = reg_min; } } else { /* subtract zeroes from significant digits */ uint32_t bits = (IS_QUAD(type)) ? 112 : 63; pw2 = (0x3FFF - exponent) + bits - pw2; if (pw2 > (unsigned)arg_prec) pw2 = arg_prec; /* divide by 18 for number ulls needed */ if (pw2 <= 38) pw2 = reg_min; else pw2 = reg_min + (uint32_t)((((uint64_t)(uint32_t)(pw2 - 38)) * (uint32_t)238609295U) >> 32); } } } else { math_func = __positive_exponent_fp; pw2 = 2; /* when 'f', need maximum value */ if ( ((exponent >= 0x40ce) && (arg_prec >= 27)) || (IS_TYPE_F(type)) ) { pw2 = exponent - 0x3FFF; pw2 = (((uint32_t)(((uint64_t)((uint32_t)pw2) * 2585827973U) >> 32)) >> 1); /* 'e' doesn't need full precision */ if ((IS_TYPE_E(type)) && ((unsigned)arg_prec < pw2)) pw2 = arg_prec; /* divide by 18 for number ulls needed */ pw2 = 2 + (uint32_t)(((uint64_t)((uint32_t)pw2) * 252645135U) >> 32); } } /*** allocation ***/ /* allocate reg here based on exponent/precision, see above notes */ /* group allocation, fprintf() to free, returning we allocating 's' */ strsz = arg_prec + 8; type |= pw2 << 16; /* type F has special requirements, if positive exponent */ if ((type << 31) & ~(exponent - 0x3FFF)) { /* grouping assumption of thousands: 1,000,000 */ /* add in exponent: [±][0](exponent)[.](precision)[e±000[0]] */ /* assume all pw2 as 'whole' */ /* more accuracy, for less allocations, is faster */ uint32_t triplets = (exponent - 0x3FFF); strsz += 1 + (((uint32_t)(((uint64_t)((uint32_t)triplets) * 2585827973U) >> 32)) >> 1); if ((arg_flags & GRP_PREFIX) != 0) strsz += ((uint16_t)21845 * (uint32_t)triplets) >> 16; } regs = reg; if ((pw2 > 44) || (strsz > BUFSTRSIZE)) { /* if either static exceeds limit, allocate both */ strsz = (strsz + ((1 << 6) - 1)) & ~((1 << 6) - 1); /* test for non-static string buffer */ if (arg_flags & ALLOC_BUF) free(*s); /* assignment to sPtr, sPtr is one tested and free'd in fprintf() */ if ((sPtr = malloc((size_t)(strsz + ((pw2 + 1) * sizeof(_ull_t))))) == NULL) return sPtr; *s = sPtr; regs = (_ull_t*)(sPtr + strsz); } /* zero regs, no longer part of math units */ do regs[pw2].uls.hi = 0, regs[pw2].uls.lo = 0; while ((--pw2)); /*** fractional load ***/ /* assign 'fraction', 'exponent' info for passage to math units */ /* w160[0,1] used on requested quadruple, or long double 128bit (not e80) */ regs[0].uls.lo = exponent; regs[1].ull = *(uint64_t*)&w160[2]; regs[2].ull = *(uint64_t*)&w160[0]; if ((regs[1].ull | regs[2].ull) == 0) { exponent = 0; if (IS_TYPE_G(type)) type |= TYPE_F; type |= TYPE_ZERO; goto skip_round; } /*** math ***/ packed_return = (*math_func)(regs, type); /* transfer info returned into addresses, exponent */ ubPtr = (utPtr = regs) + ((packed_return >> 4) & 0x3FF); exponent = (d0 = _first_digit(utPtr->ull)); exponent += (int32_t)packed_return >> 16; packed_return &= 0x0F; ufPtr = ((packed_return == 0) ? ubPtr : ((packed_return == 0x08) ? regs : ubPtr - (packed_return - 1))); /*** rounding ***/ /* assign position 1 pass display precision */ rpos = arg_prec + 1; if (IS_TYPE_F(type)) { /* this excludes positive exponents, they will not jump to skip */ if (arg_prec < ~exponent) goto skip_round; /* positive exponents must count pre-radix as precision */ if (~exponent >> 31) arg_prec += exponent; /* when negative, remove pre-zero count, positive add in whole portion */ rpos += exponent; } /* rounding position <= first digit position, has front end zeroes */ if ((rpos -= d0) > 0) { _ull_t *aPtr = utPtr; /* test rounding for second ull or higher, assume not large arg_prec */ while (((++aPtr) < ubPtr) && (rpos > 18)) rpos -= 18; /* verify position has digits to round */ if (aPtr < ubPtr) { /* set last ull, if rpos == 1 then exclude current aPtr */ ubPtr = aPtr + (rpos != 1); if (ufPtr > ubPtr) ufPtr = ubPtr; const uint64_t *fc = _fcarry; if ((aPtr->ull += fc[18 - rpos]) >= _CARRY) { uint64_t t = aPtr[-1].ull + 1ULL; /* carry forced propagation, makes ull == 0 */ (ubPtr = aPtr)->ull = 0; (--aPtr)->ull = t; if ((aPtr == utPtr) && (t >= fc[19 + d0])) { exponent++; /* type 'f' has increase in precision if positive exponent */ /* condition of == 18 never occurs */ arg_prec += (type & ((uint32_t)~exponent >> 31)); ++d0; } } } } else { /* first ull rounding */ const uint64_t *fc = _fcarry; /* limit reads of ulls to next register */ ubPtr = utPtr + 1; if (ufPtr > ubPtr) ufPtr = ubPtr; /* add '5' to rounding location */ a = utPtr->ull + fc[-rpos]; utPtr->ull = a; /* test for propagation to new first digit */ if (a >= fc[19 + d0]) { exponent++; if ((++d0) == 18) { /* rare case. inserted here to avoid alter of ulltobuffer for odd case * and testing all cases for this condition. */ /* occurs @ 1e18, 1e-18 9.9999950000000026e+17 first precision 6 9.9999999999999987e+17 last 9.9999950000000012e-19 first precision 6 9.9999999999999988e-19 last */ *(sPtr = *s) = dblt_sign, sPtr += (dblt_sign != 0); /* type f and negative exponent */ if (((uint32_t)exponent >> 31) & type) { unsigned j, k; *sPtr++ = '0'; *(uint16_t*)sPtr = *(uint16_t*)decimalPtr; j = 1 + (decimalPtr[1] != 0); sPtr += j, decimalPtr += j; if ((j = -exponent - 1)) { if (j >= (unsigned)arg_prec) goto L24; arg_prec -= j; k = j & 3; if ((j = (int)((unsigned)j >> 2))) do *(uint32_t*)sPtr = 0x30303030U, sPtr += 4; while ((--j)); *(uint32_t*)sPtr = 0x30303030U; sPtr += k; } *sPtr++ = '1'; if ((arg_prec -= 1) > 0) goto L245; return sPtr; } *sPtr++ = '1'; if ( (IS_TYPE_G(type)) && (((unsigned)exponent) <= (unsigned)arg_prec) ) type |= TYPE_F; if ( (arg_prec) && (!(IS_TYPE_G(type)) || (arg_flags & ALT_PREFIX)) ) { unsigned dflag = (decimalPtr[1] != 0); *(uint16_t*)sPtr = *(uint16_t*)decimalPtr; sPtr += dflag + 1, decimalPtr += dflag + 1; goto L245; } goto L25; } } } /*** adjust for entered type... g ***/ /* rounding can adjust exponent, so now can test if conditions met */ if (IS_TYPE_G(type)) { /* this should exclude negatives */ if (((unsigned)exponent) <= (unsigned)arg_prec) goto L23; if (((unsigned)(exponent + 4)) <= 4) { arg_prec -= exponent; L23: type |= TYPE_F; } } /*** output section ***/ skip_round: *(sPtr = *s) = dblt_sign, sPtr += (dblt_sign != 0); /* type f and negative exponent, */ if ((((uint32_t)exponent >> 31) & type) | (IS_TYPE_ZERO(type))) { unsigned j, k; *sPtr++ = '0'; *(uint16_t*)sPtr = *(uint16_t*)decimalPtr; j = 1 + (decimalPtr[1] != 0); if (!arg_prec) { if (IS_TYPE_E(type)) goto L11; if (arg_flags & ALT_PREFIX) return (sPtr + j); return sPtr; } sPtr += j, decimalPtr += j; if ((j = -exponent - 1)) { if (j >= (unsigned)arg_prec) goto L24; arg_prec -= j; k = j & 3; if ((j = (int)((unsigned)j >> 2))) do *(uint32_t*)sPtr = 0x30303030U, sPtr += 4; while ((--j)); *(uint32_t*)sPtr = 0x30303030U; sPtr += k; } } { /* first ull if utPtr == ufPtr then all digits are part of precision. d0 does not count digits but returns log10... log10(1-9) = 0 if 'f', data starts at sPtr. Otherwise starts sPtr + size of decimal. will write on last char of decimal, which then that char copies to sPtr then overwritten by decimal. (!IS_TYPE_F(type)) stringPtr = sPtr + decimalsz To determine if first digit is used as precision use (decimalPtr == (char*)&decimal) Two-byte decimal determined by (((char*)&decimal)[1] != 0) */ /* deal with d0 */ /* _fcarry multiplier based on d0, then can adjust d0 */ const uint64_t *mConst = &_fcarry[35 - d0]; if ((d0 += ((utPtr == ufPtr) & IS_TYPE_F(type))) > arg_prec) d0 = arg_prec; unsigned char *stringPtr = sPtr; if (!IS_TYPE_F(type)) stringPtr += 1 + (((char*)&decimal)[1] != 0); /* traps d0 = [0,9] or all ull multipliers */ if (utPtr->ull < 1000000000) { _ull_t b; /* buf output use pre-incr (++) */ unsigned char *buf = stringPtr - 1; uint32_t f = (uint32_t)((uint64_t)utPtr->uls.lo * (uint32_t)mConst[(-9)]); uint32_t n = d0 + (decimalPtr == (char*)&decimal); b.ull = (uint64_t)(f << 2) * 2882303762U; goto L2; do { b.uls.hi = (((b.uls.hi << 4) >> 4) * 10) + (b.uls.lo >> 28); b.uls.lo <<= 4; L2: *++buf = (b.uls.hi >> 28) + '0'; } while ((--n)); } else { _ulltobuffer_nlr((divlu2r(utPtr->ull * (uint32_t)*mConst)), stringPtr, (d0 + (decimalPtr == (char*)&decimal))); } /* type 'e' requires radix after first digit */ if (!IS_TYPE_F(type)) { /* move first digit to first spot */ *sPtr = *stringPtr; /* can not use short write here, * will overwrite any digits following if sz == 1 */ *++sPtr = *decimalPtr; if (*++decimalPtr != 0) *++sPtr = *decimalPtr, decimalPtr++; /* a must for if arg_prec */ sPtr++; if (!arg_prec) { if (arg_flags & ALT_PREFIX) sPtr += (((char*)&decimal)[1] != 0); else sPtr -= 1 + (((char*)&decimal)[1] != 0); goto L11; } } sPtr += d0 + (decimalPtr == (char*)&decimal); if ((arg_prec -= d0) == 0) goto L25; } do { /* case where has whole/fraction */ if ((++utPtr) >= ufPtr) { if (decimalPtr == (char*)&decimal) { /* this also used as delimiter for whole part, stop point for * trailing zero removal */ unsigned dflag = (decimalPtr[1] != 0); *(uint16_t*)sPtr = *(uint16_t*)decimalPtr; sPtr += dflag + 1, decimalPtr += dflag + 1; } /* set to last ull, if not already at */ if (ufPtr == ubPtr) break; ufPtr = ubPtr; } if ((arg_prec - 18) < 0) { _ulltobuffer_nlr(divlu2r(utPtr->ull), sPtr, arg_prec); sPtr += arg_prec; goto L25; } arg_prec -= 18; _ulltobuffer_18lr(divlu2r(utPtr->ull), sPtr); sPtr += 18; } while (1); L24: if ( (arg_prec) && (!(IS_TYPE_G(type)) || (arg_flags & ALT_PREFIX)) ) { /* safe because of added room for exponent in allocation */ unsigned k; L245: k = arg_prec & 3; arg_prec = (int)((unsigned)arg_prec >> 2); *(uint32_t*)sPtr = 0x30303030U; if (arg_prec) do sPtr += 4, *(uint32_t*)sPtr = 0x30303030U; while ((--arg_prec)); sPtr += k; } L25: /* case 'g', remove any trailing s */ if (IS_TYPE_E(type)) goto L11; if (!(arg_flags & ALT_PREFIX)) { /* test only 'last' byte of decimal point. * '== 0' only occurs if written. decimal is at max 2 bytes */ if (*decimalPtr == 0) { unsigned char ch; ch = *--sPtr; if (IS_TYPE_G(type)) while (ch == '0') ch = *--sPtr; /* decimal point is at max 2 bytes */ if (ch != (unsigned char)decimalPtr[-1]) ++sPtr; else if (&decimalPtr[-1] != (char*)&decimal) --sPtr; } } else if (*decimalPtr != 0) { unsigned dflag = (decimalPtr[1] != 0); *(uint16_t*)sPtr = *(uint16_t*)decimalPtr; sPtr += dflag + 1, decimalPtr += dflag + 1; } /*** exponent output ***/ if (IS_TYPE_F(type)) return sPtr; L11: /* converts g,G to e,E and adds sign (portable character set) */ *(uint16_t*)sPtr = cconst16_t(('e' ^ ((type & 0x20) ^ UPPER_TYPE)), ('+' - ((exponent >> 31) << 1))); /* absolute value */ exponent = (exponent ^ (exponent >> 31)) - (exponent >> 31); if (exponent < 10) { /* exponent output reqires 2 digit display */ *(uint16_t*)(&sPtr[2]) = cconst16_t('0',('0' + exponent)); return (sPtr + 4); } /* quadruple, long doubles range [10,4966] */ uint32_t ecd = (uint32_t)(((uint64_t)((uint32_t)exponent << 19) * 2199023989U) >> 32); ++sPtr; if (exponent >= 1000) *++sPtr = (ecd >> 28) + '0'; ecd = ((ecd << 4) >> 4) * 10; if (exponent >= 100) *++sPtr = (ecd >> 28) + '0'; sPtr[1] = ((ecd = (((ecd << 4) >> 4) * 10)) >> 28) + '0'; sPtr[2] = ((((ecd << 4) >> 4) * 10) >> 28) + '0'; return (sPtr + 3); } /* "[±]0xh.hhhhp±d" * normalized: h. force to high nibble for higher precision * denornalize: .h force highest nibble as above * output precision based on 'g' style */ unsigned char * realtohex(uint32_t w160[5], unsigned char **s, int type, int arg_prec, int arg_flags, unsigned decimal) { unsigned char *sPtr = *s; int exponent; uint64_t frac; /*** input defaults ***/ char *decimalPtr = (char*)&decimal; if (arg_prec < 0) { arg_prec = (IS_DBL(type)) ? 13 : 15; type |= TYPE_G; } /* determine upper of lower case output */ const char *_outvalues = _const_outStr; if (IS_UPPER_TYPE(type)) _outvalues += 32; /*** sign && exponent ***/ exponent = w160[4] & 0x7FFF; if ((w160[4] & 0x8000) != 0) *sPtr++ = '-'; else { if ((arg_flags & (SIGN_PREFIX | SPC_PREFIX)) != 0) *sPtr++ = (arg_flags & SPC_PREFIX) ? ' ' : '+'; } if (exponent == 0x7FFF) return __n_char_sequence(w160, sPtr, (type | ((arg_flags & ALT_PREFIX) << 1))); exponent -= 16383; /*** prefix 0x or 0X ***/ *((uint16_t *)sPtr) = ((uint16_t *)_outvalues)[12]; sPtr += 2; /*** fractional load ***/ if ((frac = *(uint64_t*)&w160[2]) == 0) { exponent = 0; *sPtr++ = '0'; if (arg_flags & ALT_PREFIX) { unsigned dflag = (decimalPtr[1] != 0); *(uint16_t*)sPtr = *(uint16_t*)decimalPtr; sPtr += dflag + 1, decimalPtr += dflag + 1; } goto out_exp; } /* alter exponent by 'shift' amount used to normalize */ if ((frac & 0x8000000000000000) != 0) exponent -= 3; /*** rounding ***/ if (arg_prec < 15) { uint64_t rmask = 0x0FFFFFFFFFFFFFFFULL >> (arg_prec << 2); uint64_t radd = (rmask + 1) >> 1; /* not sure need to mask result */ if ((frac & rmask) > radd) frac = (frac + radd) & ~rmask; } /* rotate high nibble to low nibble */ uint32_t snfct = 0; if ((frac & 0x8000000000000000ULL) == 0) { frac <<= 1; snfct = 1; *sPtr++ = '0'; } else { frac = (frac >> 60) | (frac << 4); *sPtr++ = _outvalues[((uint32_t)frac & 0x0000000F)]; } /* decimal point */ unsigned dflag = (decimalPtr[1] != 0); *(uint16_t*)sPtr = *(uint16_t*)decimalPtr; sPtr += dflag + 1, decimalPtr += dflag + 1; /* precision based on type, adjust if denormal */ dflag = (arg_prec < ((IS_DBL(type)) ? 13 : 15)) ? arg_prec : ((IS_DBL(type)) ? 13 : 15); arg_prec -= dflag; if (snfct == 1) dflag++; do { /* rotate into position */ frac = (frac >> 60) | (frac << 4); *sPtr++ = _outvalues[((uint32_t)frac & 0x0000000F)]; } while ((--dflag)); if (IS_TYPE_G(type)) { unsigned char ch = *--sPtr; while (ch == '0') ch = *--sPtr; if (ch != (unsigned char)decimalPtr[-1]) ++sPtr; else if (&decimalPtr[-1] != (char*)&decimal) --sPtr; } else if (arg_prec) { dflag = arg_prec & 3; arg_prec = (int)((unsigned)arg_prec >> 2); *(uint32_t*)sPtr = 0x30303030U; if (arg_prec) do sPtr += 4, *(uint32_t*)sPtr = 0x30303030U; while ((--arg_prec)); sPtr += dflag; } /*** exponent output (p±d p[-16494,+16383]) ***/ out_exp: *sPtr++ = _outvalues[19]; /* based on portable character set */ *sPtr++ = '+' - ((exponent >> 31) << 1); /* absolute value */ exponent = (exponent ^ (exponent >> 31)) - (exponent >> 31); if (exponent < 10) { *sPtr = exponent + '0'; return (sPtr + 1); } /* divide exponent by 10000 3518437209 */ uint32_t ecd = (uint32_t)(((uint64_t)(uint32_t)exponent * 3518437209U) >> 17); ecd += 1 << 14; if (exponent >= 10000) *sPtr++ = (ecd >> 28) + '0'; ecd = ((ecd << 4) >> 4) * 10; if (exponent >= 1000) *sPtr++ = (ecd >> 28) + '0'; ecd = ((ecd << 4) >> 4) * 10; if (exponent >= 100) *sPtr++ = (ecd >> 28) + '0'; sPtr[0] = ((ecd = (((ecd << 4) >> 4) * 10)) >> 28) + '0'; sPtr[1] = ((((ecd << 4) >> 4) * 10) >> 28) + '0'; return (sPtr + 2); } #pragma mark *** Utility External Interfaces *** /* need different long double definitions 106,64,53 */ long double qtold(quadruple qdbl) { octle iqdbl; unsigned char *cdbl = (unsigned char*)&iqdbl; iqdbl = qdbl; #if (__LDBL_MANT_DIG__ == 64) iqdbl.ulls.ull0 = (iqdbl.ulls.ull1 << 15) | (iqdbl.uss.us3 >> 1); iqdbl.ulls.ull1 >>= 48; iqdbl.uls.ul1 = (((iqdbl.uss.us4 & 0x7fff) != 0) << 31) | (iqdbl.uls.ul1 & 0x7fffffff); #else int16_t sign = (idbl.uss.us7 & 0x8000) >> 4; int16_t e16 = (idbl.uss.us7 & 0x7fff) - 16383 + 1023; iqdbl.uss.us7 = 0; iqdbl.ulls.ull1 <<= 4; iqdbl.uss.us4 |= iqdbl.uss.us3 >> 12; /* double-double and double */ #if (__LDBL_MANT_DIG__ == 106) /* assumes low double exponent different by 2^-52, keeps sign * of course this doesnt make sense on denormals * thus no extra precision, low double has to be zero */ /* mantissas into correct positions */ iqdbl.ulls.ull0 >>= 8; iqdbl.uss.us3 |= ((e16 - 52) | sign) << 4; if (e16 <= 52) { iqdbl.ulls.ull0 = 0; /* unknown if benefits hardware when 0 */ iqdbl.uss.us3 = sign << 4; } #endif if (e16 <= 0) { if (e16 <= -52) iqdbl.ulls.ull1 = 0; else iqdbl.ulls.ull1 >>= (12 - e16); e16 = 0; } else if (e16 > 2047) { /* set to inf */ iqdbl.ulls.ull1 = 0; e16 = 2047; } iqdbl.uss.us7 |= (e16 | sign) << 4; #endif return (*(long double*)cdbl); } double ldtod(long double ldbl) { unsigned char *wc = (unsigned char *)&ldbl; #if ((__LDBL_MANT_DIG__ == 53) || (__LDBL_MANT_DIG__ == 106)) /* ppc double-double XXX untested, just drop lower 64, big endian */ /* wc will point to highest byte of 128 bit */ return (*(double*)wc); #else /* since padding occurs, non gpr registers tend towards 128bit use: */ octle ildbl; int16_t e16; int16_t sign; ildbl = *(octle*)wc; wc = (unsigned char *)&ildbl; /* 3 known, use IEEE-754 16bit sign/exp, biased */ #if (__LDBL_MANT_DIG__ > 65) /* trap binary128 */ e16 = ildbl.uss.us7; ildbl.ulls.ull0 = ildbl.ulls.ull1 >> 12; #else /* trap binary64, e80 (64/63 mantissas) */ e16 = ildbl.uss.us4; #if (__LDBL_MANT_DIG__ == 64) ildbl.uss.us3 &= 0x7fff; ildbl.ulls.ull0 >>= 11; #else ildbl.ulls.ull0 = (((ildbl.ulls.ull1 << 4) | (uint64_t)(ildbl.uss.us3 >> 12))) >> 12; #endif #endif sign = (e16 & 0x8000) >> 4; e16 = (e16 & 0x7FFF) - 16383 + 1023; if (e16 <= 0) { if (e16 <= -52) ildbl.ulls.ull0 = 0; else ildbl.ulls.ull0 >>= (12 - e16); e16 = 0; } else if (e16 > 2047) { /* set to inf */ ildbl.ulls.ull0 = 0; e16 = 2047; } ildbl.uss.us3 |= (e16 | sign) << 4; return (*(double*)wc); #endif } int main(void) { phx_printf("%.43f\n", 0x1.52f8a8e32e982p-140); phx_printf("%g\n", 355.0/113.0); return 0; }