If you are bold enough to try something different here is some code to examine. I could butcher up something to create full implementation outside my code. Testing showed faster than gdtoa. Low precision routines around 0 exponent show near tie. Added code to show how I have connected to my printf: case 'a': case 'e': case 'f': case 'g': case 'A': case 'E': case 'F': case 'G': /* gcc doesn't permit floats, it converts to doubles */ { unsigned char *(*toStr)(uint32_t [5], unsigned char **, int, int, int, unsigned); uint32_t w160[5]; uint32_t dp; /* 4 byte decimal point param */ int32_t sch; __decimalLoad((unsigned char*)&dp, params); if ((ch | 0x20) == 'a') ch -= 'A', toStr = realtohex; else ch -= 'E', toStr = realtostr; sch = format[-1]; /* force 128bit long double into quad parsing */ if (sch == 'q') { #if (__LDBL_MANT_DIG__ != 64) quad_parse: #endif sch = (('q' << 8) | ch); qtow(((vPtr == NULL) ? va_arg(arg, octle) : *(octle *)vPtr), w160); } else if (sch == 'L') { #if (__LDBL_MANT_DIG__ != 64) goto quad_parse; #endif sch = (('L' << 8) | ch); ldtow(((vPtr == NULL) ? va_arg(arg, long double) : *(long double *)vPtr), w160); } else { sch = (('l' << 8) | ch); dtow(((vPtr == NULL) ? va_arg(arg, double) : *(double *)vPtr), w160); } arg_flags |= (strBuf != buffer) << 7; strPtr = toStr(w160, &strBuf, sch, arg_prec, (int)arg_flags, dp); } if (strPtr == NULL) goto error; val_ul = (n = (int)(strPtr - strBuf)); strPtr = strBuf; arg_prec = 0; arg_flags &= ~ALT_PREFIX; if ((arg_flags & GRP_PREFIX) != 0) strPtr = __localizeReal(strPtr, &n), val_ul = n; goto L300; This is part of my sys/types so above 'octle' doesn't look so strange: typedef union { #if __SIZEOF_INT128__ == 16 uint128_t uo; #endif #ifdef __BIG_ENDIAN__ struct { uint64_t ull1, ull0; } ulls; struct { uint32_t ul3, ul2, ul1, ul0; } uls; struct { uint16_t us7, us6, us5, us4, us3, us2, us1, us0; } uss; #else /* __LITTLE_ENDIAN__ */ struct { uint64_t ull0, ull1; } ulls; struct { uint32_t ul0, ul1, ul2, ul3; } uls; struct { uint16_t us0, us1, us2, us3, us4, us5, us6, us7; } uss; #endif } octle; And from sysdep.h for attached ldtostr: /* endian break down of uint64_t */ #ifdef __BIG_ENDIAN__ typedef struct { uint16_t w3; uint16_t w2; uint16_t w1; uint16_t w0; } _ull_ws; typedef struct { uint32_t hi; uint32_t lo; } _ull_ls; #elif defined (__LITTLE_ENDIAN__) typedef struct { uint16_t w0; uint16_t w1; uint16_t w2; uint16_t w3; } _ull_ws; typedef struct { uint32_t lo; uint32_t hi; } _ull_ls; #else #error: undefined endianness #endif typedef union { uint64_t ull; _ull_ls uls; _ull_ws uss; } _ull_t; Steve