Re: gcc 11.1.0: printf("%.43f\n", 0x1.52f8a8e32e982p-140): printed value is incorrectly rounded

public inbox for newlib@sourceware.org
 help / color / mirror / Atom feed

From: Steven Abner <pheonix.sja@att.net>
To: Newlib <newlib@sourceware.org>
Subject: Re: gcc 11.1.0: printf("%.43f\n", 0x1.52f8a8e32e982p-140): printed value is incorrectly rounded
Date: Sat, 06 Nov 2021 08:06:05 -0400	[thread overview]
Message-ID: <1636200365.25270.0@smtp.mail.att.net> (raw)
In-Reply-To: <1636128049.25340.0@smtp.mail.att.net>

[-- Attachment #1: Type: text/plain, Size: 936 bytes --]


  I didn't mean to stop the conversion! Here is a butchered, self 
contained use.
Hopefully this provides better means to evaluate. Low precision code 
removed,
since I assume embedded systems won't use. In case of not obvious, 
entry from
printf() into realto functions expected a 64 byte buffer to write to. 
realto functions
has a static buffer to cover double range. It uses malloc() if it 
determines that
with localization and precision, if it needs to increase past 64 byte 
return. realto
functions includes decimal encoding of up to 4 byte utf8 encoding, 
which at the
time of writing no script came close to, think 2 bytes was max?
  There is commented out code for 32 bit float, but not sure would ever 
get used.

  sample use:
steven@steven-ryzen:~/Development/Projects$ gcc  newlib_printf_demo.c 
-o nldemo
steven@steven-ryzen:~/Development/Projects$ ./nldemo
0.0000000000000000000000000000000000000000009
3.14159

Steve

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: newlib_printf_demo.c --]
[-- Type: text/x-csrc, Size: 72783 bytes --]

#include <stdarg.h>

/* must define a data model */
#ifndef __LP64__
 #define __LP32__  1
#endif
/* found out 24 Jan 2019 from new computer build */
/* gcc does not define __LITTLE_ENDIAN__ or __BIG_ENDIAN__ */
#ifndef __LITTLE_ENDIAN__
 #if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
  #define __LITTLE_ENDIAN__
 #else
  #define __BIG_ENDIAN__
 #endif
#endif

typedef signed char         int8_t;
typedef unsigned char       uint8_t;
typedef short               int16_t;
typedef unsigned short      uint16_t;
typedef int                 int32_t;
typedef unsigned int        uint32_t;
typedef long long           int64_t;
typedef unsigned long long  uint64_t;

#if __SIZEOF_INT128__ == 16
typedef __int128            int128_t;
typedef unsigned __int128   uint128_t;
#endif

typedef union {
#if __SIZEOF_INT128__ == 16
 uint128_t   uo;
#endif
#ifdef __BIG_ENDIAN__
 struct { uint64_t ull1, ull0; } ulls;
 struct { uint32_t ul3, ul2, ul1, ul0; } uls;
 struct { uint16_t us7, us6, us5, us4, us3, us2, us1, us0; } uss;
#else  /* __LITTLE_ENDIAN__ */
 struct { uint64_t ull0, ull1; } ulls;
 struct { uint32_t ul0, ul1, ul2, ul3; } uls;
 struct { uint16_t us0, us1, us2, us3, us4, us5, us6, us7; } uss;
#endif
} octle;
/* defined in as_stdio.h */
#define quadruple octle

/* gcc appears to only defines _LP64 __LP64__ (2019) */
/* only models with 64bit ints */
#if defined (__ILP64__) || defined (__SILP64__)
typedef  int64_t      intmax_t;
typedef uint64_t     uintmax_t;
#elif defined (__LP32__)
typedef  int16_t      intmax_t;
typedef uint16_t     uintmax_t;
#else /* __LP64__ has 32bit int */
typedef  int32_t      intmax_t;
typedef uint32_t     uintmax_t;
#endif

/* these are all register size based */
#if defined (__LP32__) || defined (__ILP32__)
typedef  int32_t      intptr_t;
typedef uint32_t     uintptr_t;
#define __SIZEOF_REGISTER_T__ 4
#define __SIZEOF_SIZE_T__ 4
#else
typedef  int64_t      intptr_t;
typedef uint64_t     uintptr_t;
#define __SIZEOF_REGISTER_T__ 8
#define __SIZEOF_SIZE_T__ 8
#endif

typedef  intptr_t   register_t;
typedef  intptr_t    _intptr_t;
typedef uintptr_t   u_intptr_t;
typedef  intptr_t      ssize_t;   /* Used for a count of bytes or an error indication. */
typedef uintptr_t       size_t;   /* Used for sizes of objects. */
typedef  intptr_t    ptrdiff_t;   /* Used for subtracting two pointers. */
typedef uintptr_t      clock_t;   /* Used for system times in clock ticks or CLOCKS_PER_SEC . */
typedef  intptr_t       time_t;   /* Used for time in seconds. */

#define NULL        ((void *)0)

#ifdef __APPLE__
  #define FILE FILE_mac
#elif defined __linux  /* !__APPLE__ */
  typedef struct _IO_FILE _IO_FILE;
  #define FILE _IO_FILE
#else    /* !__APPLE__ !__CYGWIN__ !__linux */
  #error: unknown FILE structure
#endif  /* FILE * */
extern void *malloc(size_t);
extern void free(void *);
extern int puts(const char *);
size_t fwrite(const void *restrict, size_t, size_t, FILE *restrict);
extern FILE* stdout;

  /* in ldtostr.c */
extern unsigned char *
realtostr(uint32_t [5], unsigned char **, int, int, int, unsigned);
extern unsigned char *
realtohex(uint32_t [5], unsigned char **, int, int, int, unsigned);

  /* guarentee use of this printf */
int phx_printf(const char *format, ...);

/* local defines */
#define ALT_PREFIX   0002
#define PREC_PREFIX  0400

/* converts to w160 type used by realtostr,
 * current use as 63-63 with implicit '1"-implicit nil */
static void
qtow(quadruple qdbl, uint32_t w160[5]) {

    /* required strict aliasing conversion */
  octle *iqdbl;
  unsigned char *cqdbl = (unsigned char*)&qdbl;
  iqdbl = (octle*)cqdbl;

    /* long double shifted to high bit of uint128_t, with implicit
     * bit becoming explicit at bit 127 */
  w160[4] = iqdbl->uss.us7;
  *(uint64_t*)&w160[2] = (iqdbl->ulls.ull1 << 15)
                       | (iqdbl->uls.ul1 >> 17);
    /* lower 64 set same position as upper, upper gets 'implicit '1' */
  iqdbl->uss.us3 &= 1;
  *(uint64_t*)&w160[0] = (iqdbl->ulls.ull0 << 14);
    /* place implicit bit for all but denormal */
  w160[3] |= (w160[4] != 0) << 31;
}

/* converts to w160 type used by realtostr */
static void
ldtow(long double ldbl, uint32_t w160[5]) {

    /* required strict aliasing conversion */
  octle *ildbl;
  unsigned char *cldbl = (unsigned char*)&ldbl;
  ildbl = (octle*)cldbl;

  w160[4] = ildbl->uss.us4;
  *(uint64_t*)&w160[2] = ildbl->ulls.ull0;
  *(uint64_t*)w160 = 0;
}

/* converts to w160 type used by realtostr */
static void
dtow(double dbl, uint32_t w160[5]) {

    /* required strict aliasing conversion */
  uint64_t *idbl;
  unsigned char *cdbl = (unsigned char*)&dbl;
  idbl = (uint64_t*)cdbl;
  *(uint64_t*)&w160[2] = *idbl << 11;
  w160[4] = (uint32_t)(*idbl >> 52);

  if ((w160[4] & 0x7FF) == 0x7FF)
    w160[4] = (w160[4] << 4) | 0x000F;
  else if (((w160[4] & 0x7FF) == 0) && (*(uint64_t*)&w160[2] == 0)) {
    w160[4] <<= 4;
  } else {
    w160[4] = ((w160[4] & 0x7FF) + 16383 - 1023) | ((w160[4] & 0x800) << 4);
      /* place implicit bit for all but denormal */
    w160[3] |= ((w160[4] & 0x7FFF) != 0x3C00) << 31;
  }
  *(uint64_t*)w160 = 0;
}

/* not sure usefulness, gcc's code forces doubles in printf. Not sure of overhead
 * created by gcc and/or POSIX requirement of double argument.
static void
ftow(float flt, uint32_t w160[5]) {

  *(uint64_t*)w160 = 0;
  *(uint64_t*)&w160[2] = 0;
  w160[4] = (*(uint32_t*)&flt) >> 23;
  w160[3] = (*(uint32_t*)&flt) << 9;
  if ((w160[4] & 0xFF) == 0xFF)
    w160[4] = (w160[4] << 8) | 0x00FF;
  else
    w160[4] = ((w160[4] & 0xFF) + 16383 - 127) | ((w160[4] & 0x100) << 7);
}
*/


/* locate dependent utf8 loading of decimal point */
/* for newlib demo return ascii */
struct __params {
  unsigned char **s;
  unsigned char *slimit;
  //locale_t ld;
};

static unsigned
__decimalLoad(unsigned char *s, struct __params *params) {
  (void)params;
  *s = '.';
  return (1);
}

#define BUFSTRSIZE     64
unsigned char *
__format(const unsigned char *format, struct __params *params, va_list arg) {

  unsigned char   buffer[BUFSTRSIZE]; /* size of 64bit binary ouput  no prec,
                                       * field nor alt goes here */
  unsigned char  **s = params->s;
  unsigned char  *slimit = params->slimit;
  unsigned char  *strBuf = buffer, *strPtr, *strEnd, *sPtr = *s;
  uint64_t        val_ull;            /* for any sizeof() > 4 bytes, if
                                       * unsigned is 64bits it will bypass
                                       * val_ul to go here */
  unsigned        val_ul;             /* need unsigned instead of unsigned long
                                       * incase unsigned 64bits */
  uint32_t        arg_flags;          /* arg_flags needs to be 32bits??? */
  int             arg_prec, arg_field, n;
  unsigned char   ch, sign, arg_size; /* arg_size max value is sizeof(long
                                       * double) if int goes 128bits still
                                       * within char size */
  uintptr_t      *vPtr = NULL;

  --format;
  goto L500;
  do {

    strEnd = strPtr = &strBuf[BUFSTRSIZE];
    arg_flags = 0;
    arg_size  = sizeof(int);
    arg_prec  = arg_field = -1;
    sign = 0;

  next_ch:
    ch = *++format;
  EO_EXTENSIONS:
    if (!ch) break;
    switch (ch) {

        /* Removed Flags for demo */
        /* Removed some Length modifiers for demo */
      case 'l':
        if ((ch = *++format) != 'l') {   /* and ll */
                  arg_size = sizeof(long);         goto EO_EXTENSIONS;  }
                  arg_size = sizeof(long long);    goto next_ch;
      case 'q':   arg_size = sizeof(octle);        goto next_ch;	/* 128bit */
      case 'L':   arg_size = sizeof(long double);  goto next_ch;	/* long double */

        /* Removed some Field-Width / Precision for demo */
      case '.':
        ch = *++format;
        arg_flags |= PREC_PREFIX;

      case '1': case '2': case '3':
      case '4': case '5': case '6':
      case '7': case '8': case '9':
        n = ch - 48;
        while ((unsigned)((ch = *++format) - '0') <= 9)
          n = (n * 10) + (ch - '0');
        if (ch != '$') {
          if (arg_flags & PREC_PREFIX)  arg_prec = n;
          else  arg_field = n;
          arg_flags &= ~PREC_PREFIX;
          goto EO_EXTENSIONS;
        }
        goto next_ch;

        /* Removed all Formats... but real */
      case 'a':  case 'e':  case 'f':  case 'g':
      case 'A':  case 'E':  case 'F':  case 'G':
          /* gcc doesn't permit floats, it converts to doubles */
      {
        unsigned char *(*toStr)(uint32_t [5], unsigned char **,
                                                      int, int, int, unsigned);
        uint32_t w160[5];
        uint32_t dp;  /* 4 byte decimal point param */
        int32_t sch;
        __decimalLoad((unsigned char*)&dp, params);
        if ((ch | 0x20) == 'a')  ch -= 'A', toStr = realtohex;
        else                     ch -= 'E', toStr = realtostr;
        sch = format[-1];
          /* force 128bit long double into quad parsing */
        if (sch == 'q') {
#if (__LDBL_MANT_DIG__ != 64)
    quad_parse:
#endif
          sch = (('q' << 8) | ch);
          qtow(((vPtr == NULL) ? va_arg(arg, octle) : *(octle *)vPtr), w160);
        } else if (sch == 'L') {
#if (__LDBL_MANT_DIG__ != 64)
          goto quad_parse;
#endif
          sch = (('L' << 8) | ch);
          ldtow(((vPtr == NULL) ? va_arg(arg, long double) : *(long double *)vPtr), w160);
        } else {
          sch = (('l' << 8) | ch);
          dtow(((vPtr == NULL) ? va_arg(arg, double) : *(double *)vPtr), w160);
        }
        arg_flags |= (strBuf != buffer) << 7;
        strPtr = toStr(w160, &strBuf, sch, arg_prec, (int)arg_flags, dp);
      }
        if (strPtr == NULL)  goto error;

        val_ul = (n = (int)(strPtr - strBuf));
        strPtr = strBuf;
        arg_prec = 0;
        arg_flags &= ~ALT_PREFIX;
          /* grouping code removed for demo */
    } /* end switch */
                        /* main output section */
      /* adjust the different output factors */
    if ((arg_prec -= n) < 0) arg_prec = 0;
    if ((arg_field -= (arg_prec + val_ul)) < 0) arg_field = 0;
      /* is there output space? removed for demo */
      /* different formatting removed for demo */
    if (sign != 0) *sPtr++ = sign;
    if (arg_prec)  do *sPtr++ = '0'; while ((--arg_prec));
    if (n & 1)
               *sPtr++ = *strPtr++;
    if (n & 2) {
      *(int16_t *)sPtr = *(int16_t *)strPtr;
                  sPtr += sizeof(int16_t); strPtr += sizeof(int16_t);  }
    if ((n >>= 2))
      do {
      *(int32_t *)sPtr = *(int32_t *)strPtr;
                  sPtr += sizeof(int32_t); strPtr += sizeof(int32_t);
      } while ((--n));
    if (arg_field)  do  *sPtr++ = ' ';  while ((--arg_field));

      /* parsing of format string */
    do {
L500: ch = *++format;
      if ((sPtr >= slimit)) {
        --format;
          /* removed buffer extreme */
        puts("this is demo, don't be cute!");
        goto end;
      }
      *sPtr = ch;
      if (!ch) goto end;
      if (ch == '%')  break;
      sPtr++;
    } while (1);
  } while (1);
  *sPtr = 0;
end:
  if (strBuf != buffer)  free(strBuf);
  return sPtr;
error:
  if (strBuf != buffer)  free(strBuf);
  *--sPtr = 0;
  return (*s - 12);
}

#define FORMAT_LINE_MAX  4096

static int
phx_vfprintf(FILE *stream, const char *format, va_list arg) {
//_vfprintf(FILE *stream, const char *format, va_list arg, locale_t ld) {

  unsigned char  *sPtr, *wrPtr, *rdPtr;
  ssize_t         to_write;
  size_t          u8Sz  = FORMAT_LINE_MAX;
  struct __params params;

    /* make sure writable stream, Remove writable for demo */
  if ( (stream == NULL) //|| (!_io_writable(stream))
      || (format == NULL) )  return -1;

    /* create buffer to 'write' into */
  if ((wrPtr = (unsigned char*)malloc(u8Sz)) == NULL)  return -1;

    /* label rdPtr as <format>. If conversion, rdPtr will be allocated
     * and filled with UTF-8 version of <format> */
  rdPtr = (unsigned char*)format;

                        /* UTF-8 <format> input */
  params.s           = &wrPtr;
  params.slimit      = (wrPtr + u8Sz);
  //params.ld          = ld;
  sPtr = __format((const unsigned char *)rdPtr, &params, arg);

                        /* UTF-8 text ouput */
  if ((to_write = (ssize_t)(sPtr - wrPtr)) > 0)
    to_write = fwrite((const void *)wrPtr, (size_t)1, (size_t)to_write, stream);
  free(wrPtr);
  return (int)to_write;
}

int
phx_printf(const char *format, ...) {

  int     written;
  va_list arg;

  va_start(arg, format);
    written = phx_vfprintf((FILE *)stdout, format, arg);
    //written = _vfprintf((FILE *)stdout, format, arg, NULL);
  va_end(arg);
  return written;
}

/*
 *  realtostr - convert real numbers to formated latn numerical string
 *  realtohex - convert real numbers to formated hexadecimal string
 *  qtold - convert quadruple-precision number to a system defined long double
 *  ldtod - convert a system defined long double to a double-precision number
 */
        /* C langauge - string literals */
#ifdef __BIG_ENDIAN__
 #define cconst16_t(a,b)     (uint16_t)(((uint8_t)(a) << 8) | (uint8_t)(b))
 #define cconst32_t(a,b,c,d) (uint32_t)(((uint8_t)(a) << 24) | ((uint8_t)(b) << 16) | ((uint8_t)(c) << 8) | (uint8_t)(d))
#else
 #define cconst16_t(a,b)     (uint16_t)(((uint8_t)(b) << 8) | (uint8_t)(a))
 #define cconst32_t(a,b,c,d) (uint32_t)(((uint8_t)(d) << 24) | ((uint8_t)(c) << 16) | ((uint8_t)(b) << 8) | (uint8_t)(a))
#endif
        /* compiler convience macro */
#ifndef __GNUC_PREREQ
 #if defined __GNUC__ && defined __GNUC_MINOR__
  #define __GNUC_PREREQ(major,minor) \
    (((__GNUC__ << 16) + __GNUC_MINOR__) >= (((major) << 16) + (minor)))
 #else
  #define __GNUC_PREREQ(major,minor) 0
 #endif
#endif  /* __GNUC_PREREQ */

#ifndef __LDBL_MANT_DIG__
#define __LDBL_MANT_DIG__  LDBL_MANT_DIG
#endif

/* endian break down of uint64_t */
#ifdef __BIG_ENDIAN__
  typedef struct {  uint16_t w3; uint16_t w2; uint16_t w1; uint16_t w0;  } _ull_ws;
  typedef struct {  uint32_t hi; uint32_t lo;  } _ull_ls;
#elif defined (__LITTLE_ENDIAN__)
  typedef struct {  uint16_t w0; uint16_t w1; uint16_t w2; uint16_t w3;  } _ull_ws;
  typedef struct {  uint32_t lo; uint32_t hi;  } _ull_ls;
#else
  #error: undefined endianness
#endif
typedef union {  uint64_t ull;  _ull_ls  uls;  _ull_ws  uss;  } _ull_t;

#ifdef __ppc__
  #define CNTLZW(x,y) \
    do __asm__("cntlzw %0,%1" : "=r" (x) : "r" (y)); while(0)
#elif defined (__i386__)
  #define CNTLZW(x,y) \
    do {  __asm__("bsr %1,%0" : "=r" (x) : "r" (y));  x = 31 - x;  } while(0)
#elif defined (__x86_64__)
  #define CNTLZW(x,y) \
    do {  __asm__("bsr %1,%0" : "=r" (x) : "r" (y));  x = 31 - x;  } while(0)
  #define CNTLZD(x,y) \
    do {  __asm__("bsr %1,%0" : "=r" (x) : "r" (y));  x = 63 - x;  } while(0)
#else
    /* about 50% slower, 3 instructions vs 31 instructions */
  #define CNTLZW(x,y) \
    do {  unsigned t = y;   \
      t |= t >> 1;          \
      t |= t >> 2;          \
      t |= t >> 4;          \
      t |= t >> 8;          \
      t |= t >> 16;         \
      t = t - ((t >> 1) & 0x55555555);                            \
      t = (t & 0x33333333) + ((t >> 2) & 0x33333333);             \
      x  = 32 - (((t + (t >> 4) & 0xF0F0F0F) * 0x1010101) >> 24); \
    } while (0);
#endif

  /* used for __fractional128 stack zeroing */
extern void *memset(void *, int, size_t);

long double qtold(quadruple);
double ldtod(long double);

#define _CARRY  1000000000000000000ULL          /* 0x 0DE0B6B3 A7640000 */

  /* for rounding */
static const uint64_t _fcarry[] = {
  5, 50, 500, 5000, 50000, 500000, 5000000, 50000000, 500000000, 5000000000ULL,
  50000000000ULL, 500000000000ULL, 5000000000000ULL, 50000000000000ULL, 500000000000000ULL,
  5000000000000000ULL, 50000000000000000ULL, 500000000000000000ULL,
  /*18*/
  1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000,
  10000000000ULL, 100000000000ULL, 1000000000000ULL, 10000000000000ULL, 100000000000000ULL,
  1000000000000000ULL, 10000000000000000ULL, 100000000000000000ULL, _CARRY
};
  /* for cases: nan, inf, hex */
static const char *_const_outStr =
  "0123456789abcdefnanpinfe0x0.00000123456789ABCDEFNANPINFE0X0.0000";

      /*** internal routines ***/
  /* generic, div() moved to 64_t to address endianess */
static uint64_t divlu2r(uint64_t) __attribute__ ((noinline));
static void  _ulltobuffer_18lr(uint64_t, unsigned char *) __attribute__ ((noinline));
static void  _ulltobuffer_nlr(uint64_t, unsigned char *, int) __attribute__ ((noinline));
static int   _first_digit(uint64_t);
  /* ldtostr specific */
static unsigned char *__n_char_sequence(uint32_t [5], unsigned char *, int);
static uint32_t __fractional(_ull_t *, _ull_t);
static uint32_t __fractional_128(_ull_t *, _ull_t);
static uint32_t __positive_exponent_fp(_ull_t *, int);
static uint32_t __negative_exponent_fp(_ull_t *, int);

/* from fprintf... needs to be the same */
#define ZERO_PREFIX  0001
#define ALT_PREFIX   0002
#define LEFT_PREFIX  0004
#define SIGN_PREFIX  0010
#define SPC_PREFIX   0020
#define STAR_PREFIX  0040
#define GRP_PREFIX   0100
#define USGN_PREFIX  0200
#define PREC_PREFIX  0400
#define ALLOC_BUF    USGN_PREFIX
#define BUFSTRSIZE     64

/* XXX using upper 16 to pass 'unit' max from
 calcation prior to entry of x_exponent */

#define UPPER_TYPE  0x20
#define IS_UPPER_TYPE(a)  (!((a) & UPPER_TYPE))
#define IS_QUAD(a)    ((uint8_t)((a) >> 8) == (uint8_t)'q')
#define IS_LDBL(a)    ((uint8_t)((a) >> 8) == (uint8_t)'L')
#define IS_DBL(a)     ((uint8_t)((a) >> 8) == (uint8_t)'l')
#define IS_FLT(a)     ((uint8_t)((a) >> 8) == 0)
#define TYPE_ZERO   0x40
#define IS_TYPE_ZERO(a)   ((a) & TYPE_ZERO)
#define TYPE_E  0
#define TYPE_F  1
#define TYPE_G  2
#define IS_TYPE_E(a)  (((a) & 3) == 0)
#define IS_TYPE_F(a)  ((a) & TYPE_F)
#define IS_TYPE_G(a)  ((a) & TYPE_G)
#define SURPRESS_NSEQ(a) (((a) & 4) != 0)

#pragma mark *** Support Mathematics ***

static uint64_t
divlu2r(uint64_t c) {

  _ull_t q;
  uint32_t mConst = 2305843009U;  // 2^61 / 10^9
  uint32_t tConst = 1000000000U;
  q.uls.hi = (uint32_t)(c >> 32);
  q.uls.lo = ((uint32_t)c);
  if (q.uls.hi != 0) {
    uint32_t shift;
    CNTLZW(shift, q.uls.hi);
    q.uls.hi = (uint32_t)((((q.uls.hi << shift) | (q.uls.lo >> (32 - shift)))
                                                    * (uint64_t)mConst) >> 32);
    if (shift > 3)  q.uls.hi >>= (shift - 3);
  } else {
    q.uls.hi = (uint32_t)(((q.uls.lo | 1) * (uint64_t)mConst) >> (32 + 29));
  }
  q.uls.lo = (uint32_t)(c - ((uint64_t)q.uls.hi * (uint32_t)tConst));
  if (q.uls.lo >= tConst)
    q.uls.hi += 1, q.uls.lo -= tConst;
  return q.ull;
}

static void
_ulltobuffer_18lr(uint64_t c, unsigned char *buf) {

  _ull_t a, b;
  int n;
  --buf;
    /* in 4.60, 2.30 fixed point integers */
  b.ull = (uint64_t)(((uint32_t)c) << 2) * 2882303762U;
  a.ull = (uint64_t)(((uint32_t)(c >> 32)) << 2) * 2882303762U;
  n = 9;
  goto L1;
  do {
    b.uls.hi = (((b.uls.hi << 4) >> 4) * 10) + (b.uls.lo >> 28);
    a.uls.hi = (((a.uls.hi << 4) >> 4) * 10) + (a.uls.lo >> 28);
    b.uls.lo <<= 4;
    a.uls.lo <<= 4;
L1: *++buf = (a.uls.hi >> 28) + '0';
    buf[9] = (b.uls.hi >> 28) + '0';
  } while ((--n));
}

/* went for speed */
static void
_ulltobuffer_nlr(uint64_t c, unsigned char *buf, int m) {

  _ull_t b;
  int n;
  --buf;
    /* in 4.60, 2.30 fixed point integers */
  c <<= 2;
  b.ull = (uint64_t)((uint32_t)(c >> 32)) * 2882303762U;
  n = (m <= 9) ? m : 9;
  m -= 9;
  goto L1;
  do {
    b.uls.hi = (((b.uls.hi << 4) >> 4) * 10) + (b.uls.lo >> 28);
    b.uls.lo <<= 4;
L1: *++buf = (b.uls.hi >> 28) + '0';
  } while ((--n));
  if (m > 0) {
    n = m;
    b.ull = (uint64_t)((uint32_t)c) * 2882303762U;
    goto L2;
    do {
      b.uls.hi = (((b.uls.hi << 4) >> 4) * 10) + (b.uls.lo >> 28);
      b.uls.lo <<= 4;
  L2: *++buf = (b.uls.hi >> 28) + '0';
    } while ((--n));
  }
}

/*#pragma GCC diagnostic ignored "-Wmisleading-indentation"*/
#if __GNUC_PREREQ(4,5)
 #pragma GCC diagnostic ignored "-Wmisleading-indentation"
#endif
/* uses 2 64s as swap loads, this prevents stalls */
/* returns 1 less than actual digits, the power of 10, or log10 value */
static int
_first_digit(uint64_t a) {

  _ull_t reg;
  const uint64_t *fc;
  uint64_t b;
  reg.ull = a;
  fc = _fcarry;
  if (reg.uls.hi) {
    a = fc[35];
    b = fc[34];
    if (reg.ull >= a)  return 17;  a = fc[33];
    if (reg.ull >= b)  return 16;  b = fc[32];
    if (reg.ull >= a)  return 15;  a = fc[31];
    if (reg.ull >= b)  return 14;  b = fc[30];
    if (reg.ull >= a)  return 13;  a = fc[29];
    if (reg.ull >= b)  return 12;  b = fc[28];
    if (reg.ull >= a)  return 11;
    if (reg.ull >= b)  return 10;
    return 9;
  } else {
    uint32_t c, d;
    c = (uint32_t)fc[27];
    d = (uint32_t)fc[26];
    if (reg.uls.lo >= c)  return 9;  c = (uint32_t)fc[25];
    if (reg.uls.lo >= d)  return 8;  d = (uint32_t)fc[24];
    if (reg.uls.lo >= c)  return 7;  c = (uint32_t)fc[23];
    if (reg.uls.lo >= d)  return 6;
    if (reg.uls.lo >= c)  return 5;
    if (reg.uls.lo >= 10000U)  return 4;
    if (reg.uls.lo >= 1000U)  return 3;
    if (reg.uls.lo >= 100U)  return 2;
    if (reg.uls.lo >= 10U)  return 1;
    return 0;
  }
}
/*#pragma GCC diagnostic pop*/
#if __GNUC_PREREQ(4,5)
 #pragma GCC diagnostic pop
#endif

#pragma mark *** Support Subroutines ***

static unsigned char *
__n_char_sequence(uint32_t w160[5], unsigned char *sPtr, int type) {

  const char   *_outvalues;
  unsigned is_zero;

    /* e80 long double type, 63rd bit required set reguardless if nan or inf */
    /* if is_zero is <zero>, then is infinity, else nan */
    /* any info in nan is considered n-char-sequence */
    /* bit 62 of nan, when 0, and others [61-0] set, is considered signalling nan.
     * bit 62 of nan, when 1, if others 0, special quiet nan, others [61-0] set,
     * considered quiet signaling nan. */
  is_zero = ((w160[3] & 0x7FFFFFFF) | w160[2] | w160[1] | w160[0]);
  _outvalues = _const_outStr + 16;
  if (IS_UPPER_TYPE(type)) _outvalues += 32;
  if (!is_zero){
    *((uint32_t *)sPtr) = ((int32_t *)_outvalues)[1];
    return (sPtr + 3);
  }

  *((uint32_t *)sPtr) = ((int32_t *)_outvalues)[0];
  sPtr += 3;
  if (SURPRESS_NSEQ(type)) return sPtr;

    /* for doubles 53bits exposed, shift off hidden */
  if (!IS_LDBL(type)) *(uint64_t*)&w160[2] <<= 1;
  if (IS_QUAD(type)) {
    w160[2] |= (w160[1] & 0x40000000) >> 30;
    *(uint64_t*)&w160[0] <<= 2;
  }

    /* now output the n-char-sequence */
    /* (0x  hexdigits )
     * e80,16nibbles, doubles,13nibbles, floats,5.5nibbles, */
  *((uint16_t *)sPtr) = cconst16_t('(','0');
  sPtr += 2;
  *sPtr = _outvalues[9]; /* 'x' or 'X' */
  sPtr++;
  _outvalues -= 16;
  sPtr[ 0] = _outvalues[(w160[3]  >> 28)];
  sPtr[ 1] = _outvalues[((w160[3] <<  4) >> 28)];
  sPtr[ 2] = _outvalues[((w160[3] <<  8) >> 28)];
  sPtr[ 3] = _outvalues[((w160[3] << 12) >> 28)];
  sPtr[ 4] = _outvalues[((w160[3] << 16) >> 28)];
  sPtr[ 5] = _outvalues[((w160[3] << 20) >> 28)];
  if (IS_FLT(type)) {  sPtr +=  6; goto end_brace;  }
  sPtr[ 6] = _outvalues[((w160[3] << 24) >> 28)];
  sPtr[ 7] = _outvalues[((w160[3] << 28) >> 28)];
  sPtr[ 8] = _outvalues[(w160[2]  >> 28)];
  sPtr[ 9] = _outvalues[((w160[2] <<  4) >> 28)];
  sPtr[10] = _outvalues[((w160[2] <<  8) >> 28)];
  sPtr[11] = _outvalues[((w160[2] << 12) >> 28)];
  sPtr[12] = _outvalues[((w160[2] << 16) >> 28)];
  if (IS_DBL(type)) {  sPtr += 13; goto end_brace;  }
  sPtr[13] = _outvalues[((w160[2] << 20) >> 28)];
  sPtr[14] = _outvalues[((w160[2] << 24) >> 28)];
  sPtr[15] = _outvalues[((w160[2] << 28) >> 28)];
  sPtr += 16;
  if (IS_LDBL(type))  goto end_brace;
    /* undo w160 formatting of '[0]' for number */
  sPtr[ 0] = _outvalues[(w160[1]  >> 28)];
  sPtr[ 1] = _outvalues[((w160[1] <<  4) >> 28)];
  sPtr[ 2] = _outvalues[((w160[1] <<  8) >> 28)];
  sPtr[ 3] = _outvalues[((w160[1] << 12) >> 28)];
  sPtr[ 4] = _outvalues[((w160[1] << 16) >> 28)];
  sPtr[ 5] = _outvalues[((w160[1] << 20) >> 28)];
  sPtr[ 6] = _outvalues[((w160[1] << 24) >> 28)];
  sPtr[ 7] = _outvalues[((w160[1] << 28) >> 28)];
  sPtr[ 8] = _outvalues[(w160[0]  >> 28)];
  sPtr[ 9] = _outvalues[((w160[0] <<  4) >> 28)];
  sPtr[10] = _outvalues[((w160[0] <<  8) >> 28)];
  sPtr[11] = _outvalues[((w160[0] << 12) >> 28)];
  sPtr += 12;
end_brace:
  *sPtr = ')';
  return (sPtr + 1);
}

/* converts fractional portion of IEEE754 value to
 * base 10 representation, in 10^18 ull format.
 */
static uint32_t
__fractional(_ull_t *uPtr, _ull_t reg) {

  uint32_t last = 1;
  uint64_t mlConst = 3814697265625ULL;  /* 5^18 (42bits) 0x378 2DACE9D9 */
    /*
  0x7FFFFFFF  FFFFFFFF  start
  0x7FFFE 1FFF FFFFFFFF split
     */
  uPtr[0].ull = (reg.uls.hi >> 13) * mlConst; /* 18 + 45 == 63 */
    /* test added, rather than just '&='. This is guess that small 'standard'
     * fractions might be used */
  if (((reg.uls.hi &= 0x00001FFF) | reg.uls.lo) != 0) {
      /* the left over 13bits from hi with 5bits from lo (45-18=>27).
  0x00001FFF FFFFFFFF  start
  0x00001FFFF8 7FFFFFF split
       */
    uint64_t u64;
    last += 1;
    uPtr[0].ull += (u64 = ((uint32_t)(reg.ull >> 27)) * mlConst) >> 18;
    uPtr[1].ull  = ((uint32_t)u64 & 0x3FFFF) * mlConst;
    if ((reg.uls.lo &= 0x7FFFFFF) != 0) {
        /*
  0x00000000 07FFFFFF   start
  0x          7FFFE 1FF split
         */
      _ull_t scratch;
      last += 1;
      uPtr[0].ull += (u64 = (reg.uls.lo >> 9) * mlConst) >> 36;
      scratch.ull  = (uint32_t)((uint32_t)u64 & 0x3FFFF) * mlConst;
      uPtr[2].ull  = (scratch.uls.lo & 0x3FFFF) * mlConst;
      u64 = uPtr[1].ull + (scratch.ull >> 18)
          + (((uint32_t)(u64 >> 18)) & 0x3FFFF) * mlConst;
      if (u64 >= _CARRY)  u64 -= _CARRY, uPtr[0].ull += 1ULL;
      uPtr[1].ull = u64;
      if ((reg.uls.lo &= 0x1FF) != 0) {
          /*
  0x00000000 000001FF end of 63bit resolution
           */
        uint32_t u32;
        last += 1;
          // 54*10^18
        u32 = (uint32_t)((scratch.ull = reg.uls.lo * mlConst) >> 27);
        uPtr[0].ull += (uint64_t)(scratch.uls.hi >> 13);
          // 54*10^18*10^18
          /* first  18 of 45 remainder */
        u64 = ((u32 & 0x3FFFF) * mlConst) + uPtr[1].ull;
          /* second 18 of 45 remainder, scratch.uls.lo &= 0x07FFFFFF; */
        u32 = scratch.uls.lo & 0x07FFFFFF;
        scratch.ull = (u32 >> 9) * mlConst;
        u64 += (scratch.ull >> 18);
        uPtr[2].ull += (scratch.uls.lo & 0x3FFFF) * mlConst;
        if (uPtr[2].ull >= _CARRY)  uPtr[2].ull -= _CARRY, u64 += 1ULL;
        uPtr[1].ull = u64;
          /* last    9 of 45 remainder */
        scratch.ull = ((u32 & 0x000001FF) << 9) * mlConst;
        uPtr[1].ull += (uint64_t)(scratch.uls.hi >> 4);
        if (uPtr[1].ull >= _CARRY)  uPtr[1].ull -= _CARRY, uPtr[0].ull += 1ULL;
          /* of scratch2 36 remainder */
        scratch.uls.hi &= 0x0000000F;
        uPtr[2].ull += ((uint32_t)(scratch.ull >> 18)) * mlConst;
        uPtr[2].ull += ((scratch.uls.lo & 0x3FFFF) * mlConst) >> 18;
        if (uPtr[2].ull >= _CARRY)  uPtr[2].ull -= _CARRY, uPtr[1].ull += 1ULL;
          /* of 18 remainder */
        scratch.uls.lo = (uint32_t)((scratch.uls.lo & 0x3FFFF) * mlConst) & 0x3FFFF;
        uPtr[3].ull = scratch.uls.lo * mlConst;
  } } }
  return last;
}

/* this is for second register of frational */
/* expected entry use: __fractional(uPtr, reg2);
 * expected range of reg2: 0x7fffffffffffc000 thru 0x0000000000004000
 * reg2 expectation: shifted right 1, matching reg 1, where high bit is bit 62
 * to avoid initial allocation problems, create stack use instead of uPtr.
 * also avoids re-zeroing of uPtr regs
 * uPtr parameter for adding to reg1's converted fractional, the final result */
static uint32_t
__fractional_128(_ull_t *uPtr, _ull_t reg) {

  uint32_t last;
  _ull_t sreg[15]; /* StackREGisters */
  memset(sreg, 0, sizeof(sreg));
    /* create regs of multiplly by 5^63 (3 max) */
  __fractional(sreg, reg);

    /* creates sr3-sr6, affects uPtr[4] */
  last = __fractional(&sreg[ 3], sreg[0]);
  if ((sreg[1].ull)) {
    last = 5;  // creates uPtr[5] value
      /* creates sr7-sr10 */
    __fractional(&sreg[ 7], sreg[1]);
    sreg[4].ull += sreg[7].ull;
    sreg[5].ull += sreg[8].ull;
    sreg[6].ull += sreg[9].ull;
    sreg[7].ull  = sreg[10].ull;
    if ((sreg[2].ull)) {
        /* creates sr11-sr14*/
      __fractional(&sreg[11], sreg[2]);
      sreg[5].ull += sreg[11].ull;
      sreg[6].ull += sreg[12].ull;
      sreg[7].ull += sreg[13].ull;
      uPtr[6].ull  = sreg[14].ull;
    }
    if (sreg[7].ull >= _CARRY)  sreg[6].ull += 1ULL, sreg[7].ull -= _CARRY;
    uPtr[5].ull  = sreg[7].ull;
    while (sreg[6].ull >= _CARRY)  sreg[5].ull += 1ULL, sreg[6].ull -= _CARRY;
  }
    /* sreg3-sreg8 set for addin transfer to uPtr */
  uPtr[4].ull  = sreg[6].ull;
  uPtr[3].ull += sreg[5].ull;
  while (uPtr[3].ull >= _CARRY)  uPtr[2].ull += 1ULL, uPtr[3].ull -= _CARRY;
  uPtr[2].ull += sreg[4].ull;
  if (uPtr[2].ull >= _CARRY)  uPtr[1].ull += 1ULL, uPtr[2].ull -= _CARRY;
  uPtr[1].ull += sreg[3].ull;
  if (uPtr[1].ull >= _CARRY)  uPtr[0].ull += 1ULL, uPtr[1].ull -= _CARRY;
  return last;
}

static uint32_t
__positive_lowexp128(_ull_t *uPtr, unsigned pw2, _ull_t reg0, _ull_t reg1) {

  uint32_t last, pw10 = 0;
  uint32_t  unit_18 = 0;
  if (pw2 < 64) {
    /* create shifted reg0, reg1 */
    /* special case: reg1 has 49bits, a shift of >= 49 bits (*2^49+)
     * means only 1 reg used for __fractional() */
    uint32_t shiftb = 63 - pw2;

    if (pw2 <= 48) {
        /* lowest drop of 15 bits, max of 63 */
      uPtr[0].ull = reg0.ull >> shiftb;
        /* pw2 == 0 clears explicit '1' */
      reg0.ull  = reg0.ull << pw2, reg0.uls.hi &= 0x7fffffff;
        /* pw2 == 0 ORs high bit explicit '0' */
      reg0.ull |= reg1.ull >> shiftb;
      reg1.ull  = reg1.ull << pw2, reg1.uls.hi &= 0x7fffffff;
      last = __fractional(&uPtr[1], reg0);
      if (reg1.ull != 0)
        last = __fractional_128(&uPtr[1], reg1);
    } else {

      uint32_t hi;
        /* @49 shiftb = 14, @63 shiftb = 0 */
      if (shiftb != 0) {
        reg1.ull = (reg1.ull >> shiftb);
        reg1.uls.hi |= (reg0.uls.lo << (32 - shiftb)) >> 1;
        reg0.ull >>= shiftb;
      } else {
          /* values 8-15 possible, reducing >= _CARRY loop
           *        8   9  10  11  12  13  14  15
           * actual 9, 10, 11, 12, 13, 14, 16, 17 e18 starting values */
        hi = (uint32_t)((reg0.uss.w3 >> 12) + 1);
        goto hi_adjusted;
      }
        /* reg0 holds shifted whole value */
        /* default action */
      uPtr[0].ull = reg0.ull;
      if ((shiftb < 4) || ((shiftb == 4) && (reg0.ull >= _CARRY))) {
          /* values 0-7 possible, reducing >= _CARRY loop
           *        0  1  2  3  4  5  6  7
           * actual 1, 1, 2, 3, 4, 5, 6, 8,  e18 starting values */
        hi = (uint32_t)(reg0.uss.w3 >> 12);
        if ((hi))
hi_adjusted:
          reg0.ull -= hi * _CARRY;
        while (reg0.ull >= _CARRY)  hi += 1, reg0.ull -= _CARRY;
        uPtr[0].ull = (uint64_t)hi, ++uPtr;
        uPtr[0].ull = reg0.ull;
        pw10 = 18;
      }
      last = __fractional(&uPtr[1], reg1);
      unit_18 = 1;
    }
  } else {
    uint32_t n28s;
    _ull_t    reg;              /* most used 64 bit gpr */

      /* 64 <= pw2 < 112, 1 fractional reg, multiple whole regs */
    uint32_t hi = (uint32_t)((reg0.uss.w3 >> 12) + 1);
    reg0.ull -= hi * _CARRY;
    while (reg0.ull >= _CARRY)  hi += 1, reg0.ull -= _CARRY;
    uPtr[0].ull = (uint64_t)hi;
    uPtr[1].ull = reg0.ull;
      /* max 50 ((111-63) = 49) [0-49] */
    pw2 -= 63;
    uint32_t shiftb = 63 - pw2;
    if ((n28s = (pw2 >= 28)))
      pw2 -= 28;
    if ((pw2)) {
        /* shift range [27-1] */
      _ull_t  scratch;
        /* shift [18-9] left [27-1] */
      uPtr[0].uls.lo <<= pw2;
        /* shift remainder (uPtr[1], size < 10^19) left [27-1] */
      reg.ull = divlu2r(uPtr[1].ull);
      scratch.ull = divlu2r((uint64_t)reg.uls.hi << pw2);
      uPtr[0].ull += scratch.uls.hi;
      reg.ull = ((uint64_t)scratch.uls.lo * 1000000000U)
              + ((uint64_t)reg.uls.lo << pw2);
        /* do carry, set uPtr[1] */
      if (reg.ull >= _CARRY)  uPtr[0].ull += 1ULL, reg.ull -= _CARRY;
      uPtr[1].ull = reg.ull;
    }
    unit_18 = 1;
    pw10 = 18;
    if (n28s) {
        /* shift of 28 multiples */
        /* set initial limiter to current regs valid (<= unit_max) */
      _ull_t *cap = &uPtr[unit_18];
      _ull_t *input, *output, next;
      reg.ull = (output = (input = uPtr))->ull;
      output->ull = 0;
      next.ull = input[1].ull;

      unit_18++;
      if (reg.ull >= 3725290298ULL)  goto L1;
      unit_18--;
      output->ull = reg.ull << 28;
      output--;

      do {
        _ull_t scratch0;
        uint32_t u32;
        reg.ull = next.ull;
        next.ull = (++input)[1].ull;
        output++;
  L1:   scratch0.ull = divlu2r(reg.ull);
        u32 = (uint32_t)(((uint64_t)scratch0.uls.hi * 2305843009) >> 32) >> 1;
        scratch0.ull = ((((uint64_t)scratch0.uls.hi << 28)
                         - ((uint64_t)u32 * 1000000000U)) * 1000000000U)
                     + ((uint64_t)scratch0.uls.lo << 28);
        if (scratch0.ull >= _CARRY)
          u32 += 1, scratch0.ull -= _CARRY;
        output[1].ull = scratch0.ull;
        if ((output->ull += u32) >= _CARRY)
          output->ull -= _CARRY, output[-1].ull += 1;
      } while (input < cap);
    }
    pw10 += ((unit_18 - 1) * 18);

      /* reg0 has addin to shifted whole value */
    pw2 = 63 - shiftb;
    reg0.ull = (reg1.ull >> shiftb);
    reg0.ull += uPtr[1].ull;
    if (reg0.ull >= _CARRY)  uPtr[0].ull += 1, reg0.ull -= _CARRY;
    uPtr[1].ull = reg0.ull;
      /* put fractional bits into place */
    reg1.ull = reg1.ull << pw2, reg1.uls.hi &= 0x7fffffff;
    last = __fractional(&uPtr[2], reg1);
    last++;
  }
  return ((pw10 << 16) | ((last + unit_18) << 4) | last);
}

static uint32_t
__positive_lowexp(_ull_t *uPtr, unsigned pw2, _ull_t reg0) {

    /* do less than 63 code */
  uint16_t last, whole_incr, pw10;
  uint64_t up0;
  pw10 = (whole_incr = 0);
  if (pw2 < 31) {
    up0 = (uint64_t)(reg0.uls.hi >> (31 - pw2));
    reg0.uls.hi = (reg0.uls.hi << (pw2 + 1)) >> (pw2 + 1);
  } else if (pw2 == 31) {
    up0 = (uint64_t)reg0.uls.hi;
    reg0.uls.hi = 0;
  } else {
    uint32_t shift = pw2 - 32;
    up0 = reg0.ull >> (31 - shift);
    reg0.uls.lo = (reg0.uls.lo << (shift + 1)) >> (shift + 1);
    if ((shift > 27) || ((shift == 27) && (up0 >= _CARRY))) {
      do uPtr[0].ull += 1ULL; while ((up0 -= _CARRY) >= _CARRY);
      uPtr++, whole_incr++, pw10 = 18;
    }
  }
  uPtr[0].ull = up0;
  last = 1;
  if ((reg0.ull <<= pw2) != 0)
    last += __fractional(&uPtr[1], reg0);
  return ((pw10 << 16) | ((last + whole_incr) << 4) | last);
}

/* now grows low address to high address, or left to right numerically */
/* expects implicit/explicit 1 as 63rd bit */
/* multiplies by 2^28, 2^28 used to keep large values within 10^18 ulls */
/* addition of such values create at max 'overflow' of 1 */
/* valid division range on /28 [0-16383] */
static uint32_t
__positive_exponent_fp(_ull_t *uPtr, int type) {

  unsigned  pw2, pw10, n28s;  /* power of 2, power of 10, shifts of 28 */
  _ull_t    reg0;              /* most used 64 bit gpr */
    /* track reg expansion, initial is to read 2 registers (&uPtr[unit_18]) */
  uint32_t  unit_18 = 1;

    /* load 'passed' data */
  pw2 = uPtr[0].uss.w0 - 16383;
    /* note: retain explicit '1' */
  reg0.ull = uPtr[1].ull;
  uPtr[1].ull = (uPtr[0].ull = 0);

  if (uPtr[2].ull != 0) {
    _ull_t   reg1;
    uint32_t hi;
    reg1.ull = uPtr[2].ull;
    uPtr[2].ull = 0;
    if (pw2 < 112)
      return __positive_lowexp128(uPtr, pw2, reg0, reg1);
    /* set up with all whole */
    hi = (uint32_t)((reg0.uss.w3 >> 12) + 1);
    reg0.ull -= hi * _CARRY;
    while (reg0.ull >= _CARRY)  hi += 1, reg0.ull -= _CARRY;
    uPtr[0].ull = (uint64_t)hi << 49;
    uPtr[1].ull = reg1.ull >> 14;
      /* reg1 free to use as gpr */
    reg0.ull = divlu2r(reg0.ull);
    reg1.ull = divlu2r((uint64_t)reg0.uls.hi << 21);
    uPtr[0].ull += (uint64_t)reg1.uls.hi << 28;
    reg0.ull = ((uint64_t)reg1.uls.lo * 1000000000U)
             + ((uint64_t)reg0.uls.lo << 21);
    reg0.ull = divlu2r(reg0.ull);
    reg1.ull = divlu2r((uint64_t)reg0.uls.hi << 28);
    uPtr[0].ull += (uint64_t)reg1.uls.hi;
    uPtr[1].ull += ((uint64_t)reg1.uls.lo * 1000000000U)
                 + ((uint64_t)reg0.uls.lo << 28);
    if (uPtr[1].ull >= _CARRY)  uPtr[0].ull += 1, uPtr[1].ull -= _CARRY;
    pw10 = 18;
    pw2 -= 112;
      /* uPtr[0] range [10384593717069655-5192296858534827] */
    if ((pw2 -= ((n28s = (pw2 * 18725U) >> 19) * 28)) != 0) {
      _ull_t next, *output;
      uint32_t u32;
      next.ull = uPtr[1].ull;
      reg0.ull = divlu2r(uPtr[0].ull);
        /* 2^61 / 10^9  based on << 28 forces all into higher register */
      u32 = (uint32_t)(((uint64_t)reg0.uls.hi * 2305843009U) >> 32) >> (29 - pw2);
      reg0.ull = ((((uint64_t)reg0.uls.hi << pw2)
                  - ((uint64_t)u32 * 1000000000U)) * 1000000000U)
                   + ((uint64_t)reg0.uls.lo << pw2);
      if (reg0.ull >= _CARRY)
        u32 += 1, reg0.ull -= _CARRY;
      if ((u32)) {
        uPtr[1].ull = reg0.ull;
        uPtr[0].ull = (uint64_t)u32;
          /* creates new register, increases uPtr[unit_18] limit (3) */
          /* adds pw10 += 18 by return math addition of unit_18 */
        unit_18++;
        output = &uPtr[1];
      } else {
          /* no new register, still uPtr[unit_18] limit (2) */
        uPtr[0].ull = reg0.ull;
        output = &uPtr[0];
      }
      reg0.ull = divlu2r(next.ull);
      u32 = (uint32_t)(((uint64_t)reg0.uls.hi * 2305843009U) >> 32) >> (29 - pw2);
      reg0.ull = ((((uint64_t)reg0.uls.hi << pw2)
                  - ((uint64_t)u32 * 1000000000U)) * 1000000000U)
                   + ((uint64_t)reg0.uls.lo << pw2);
      if (reg0.ull >= _CARRY)
        u32 += 1, reg0.ull -= _CARRY;
      output[1].ull  = reg0.ull;
        /* case:
         * w160[4] = 0x4092, w160[3] = 0x8001a8fb, w160[2] = 0xec8ed23f;
         */
      if ((output[0].ull += u32) >= _CARRY)
        output[-1].ull += 1, output[0].ull -= _CARRY;
      }
  } else {
    uint32_t hi;
    if (pw2 < 63)
      return __positive_lowexp(uPtr, pw2, reg0);
      /* below, all use min of 2 regs (uPtr) */
      /* creates new register, still uPtr[unit_18] limit (2) */
      /* values 8-15 possible, reducing >= _CARRY loop
       *        8   9  10  11  12  13  14  15
       * actual 9, 10, 11, 12, 13, 14, 16, 17 e18 starting values */
    hi = (uint32_t)((reg0.uss.w3 >> 12) + 1);
    reg0.ull -= hi * _CARRY;
    while (reg0.ull >= _CARRY)  hi += 1, reg0.ull -= _CARRY;
    uPtr[0].ull = (uint64_t)hi;
    uPtr[1].ull = reg0.ull;
    pw10 = 18;
    pw2 -= 63;
    if ((pw2 -= ((n28s = (pw2 * 18725U) >> 19) * 28)) != 0) {
        /* shift range [27-1] */
      _ull_t  scratch;
        /* shift [18-9] left [27-1] */
      uPtr[0].uls.lo <<= pw2;
        /* shift remainder (uPtr[1], size < 10^19) left [27-1] */
      reg0.ull = divlu2r(uPtr[1].ull);
      scratch.ull = divlu2r((uint64_t)reg0.uls.hi << pw2);
      uPtr[0].ull += scratch.uls.hi;
      reg0.ull = ((uint64_t)scratch.uls.lo * 1000000000U)
              + ((uint64_t)reg0.uls.lo << pw2);
        /* do carry, set uPtr[1] */
      if (reg0.ull >= _CARRY)  uPtr[0].ull += 1ULL, reg0.ull -= _CARRY;
      uPtr[1].ull = reg0.ull;
    }
  }

  if (n28s != 0) {
      /* shift of 28 multiples */
      /* set max regs use, cap used as pointer compare limiter */
    uint32_t unit_max = type >> 16;
      /* set initial limiter to current regs valid (<= unit_max) */
    _ull_t *cap = &uPtr[unit_18];

    do {
        /* start at base */
        /* create loop counter, each 28 loop expects increase of new
         * reg created. A test used to verify if one is created. loop counter
         * then used to increase both pw10, and possibly the ending of
         * last math reg used (cap) */
      _ull_t *input, *output, next;
      reg0.ull = (output = (input = uPtr))->ull;
      output->ull = 0;
      next.ull = input[1].ull;

        /* critical point where addin to reg0.ull can cause overflow
         * and propagation into a prior ull */
      unit_18++;
      if (reg0.ull >= 3725290298ULL)  goto L1;
      unit_18--;
        /* shift of reg0.ull with addin remains below 10^19 */
      output->ull = reg0.ull << 28;
        /* undo loop increment of first addin location,
         * and lcnt (new reg increase) initial assumption */
      output--;

      do {
        _ull_t scratch0;
        uint32_t u32;
        reg0.ull = next.ull;
        next.ull = (++input)[1].ull;
        output++;
  L1:   scratch0.ull = divlu2r(reg0.ull);
        u32 = (uint32_t)(((uint64_t)scratch0.uls.hi * 2305843009) >> 32) >> 1;
        scratch0.ull = ((((uint64_t)scratch0.uls.hi << 28)
                         - ((uint64_t)u32 * 1000000000U)) * 1000000000U)
                     + ((uint64_t)scratch0.uls.lo << 28);
        if (scratch0.ull >= _CARRY)
          u32 += 1, scratch0.ull -= _CARRY;
        output[1].ull = scratch0.ull;
        if ((output->ull += u32) >= _CARRY)
          output->ull -= _CARRY, output[-1].ull += 1;
      } while (input < cap);
        /* tail pointer update based on new reg add vs needs for precision */
        /* limit allowed to increase above cap for pw10 counter */
      cap = &uPtr[((unit_18 > unit_max) ? unit_max : unit_18)];
    } while ((--n28s));
      /* convert regs used into pw10. add to pre-28 value */
  }
  pw10 += ((unit_18 - 1) * 18);
  return ((pw10 << 16) | ((unit_18 + 1) << 4));
}

static uint32_t
__negative_exponent_fp(_ull_t *uPtr, int type) {

  int      pw2, pw10;         /* power of 2, power of 10 */
  uint32_t index;
  _ull_t   reg0;

  pw10 = -18;
  pw2  = 16383 - uPtr[0].uls.lo;
  pw2 -= (index = ((uint32_t)pw2 == ((IS_DBL(type)) ? 1023U : 16383U)));

    /* remove explicit '1'. add back later as 1e18 if not denormal */
  reg0.ull = uPtr[1].ull;
  reg0.uls.hi &= 0x7FFFFFFF;
  uPtr[1].ull = 0;

  if (uPtr[2].ull != 0) {
    _ull_t   reg1;
    reg1.ull = uPtr[2].ull;
    uPtr[2].ull = 0;
    __fractional(uPtr, reg0);
    __fractional_128(uPtr, reg1);
    if (index == 0)  uPtr->ull += _CARRY;
    index = 8;
  } else if (reg0.ull != 0) {
      /* convert fractional to base 10 */
    __fractional(uPtr, reg0);
      /* 2 cases: denormal, normal */
    if (index == 0)  uPtr->ull += _CARRY;
    index = 5;
  } else {
    if (pw2 < 18) {
        /* straight power of 2 */
        /* first 17, [-1, -17]... shift of 10^18,
         * 10^18/18446744073709551616 =
         * 0.0542101086242752217003726400434970855712
         * 0.0542101086242752217003726400434970855712 * 2^64 =
         * 10^18 = 0x232830643 * 2^32 + 2808348672 */
        /* no whole, all fraction */
      uPtr->uls.lo = ((uint32_t)232830643U << (32 - pw2))
                     | ((uint32_t)2808348672U >> pw2);
      uPtr->uls.hi =  (uint32_t)232830643U >> pw2;
      return (uint32_t)((-(18 << 16)) | (1 << 4) | 8);
    }
      /* there are 18 trailing zeroes to _CARRY */
    uPtr[0].ull = _CARRY >> 18;
    pw2 -= 18;
    index = 5;
  }
    /* 5^18 * 2^2 * 2^16 = 10^18 */
    /* each shift into new ull is multiplied by 10^18.
     * or 2^-16 * 10^18 = shift * 5^18 * 2^2 */
  if (pw2 & 0xf) {
      /* partial 2^-16, use of shorter, quicker multiply */
      /* cant use shift on ulls, when partial gcc adds extra coding, test of 32 */
      /* use this code, smaller, faster than using unsigned long gcc code */
    uint32_t m = pw2 & 0xf;
    uint32_t shift = (16 - m);
    uint32_t shiftb = shift + 16;
    uint64_t fConst = 15258789062500ULL;
    if (index == 8) {
      uPtr[7].ull = ((uint16_t)(uPtr[6].uls.lo << shift)) * fConst;
      uPtr[6].uls.lo = (uPtr[6].uls.hi << shiftb) | (uPtr[6].uls.lo >> m);
      uPtr[6].uls.hi >>= m;
      uPtr[6].ull += ((uint16_t)(uPtr[5].uls.lo << shift)) * fConst;
      uPtr[5].uls.lo = (uPtr[5].uls.hi << shiftb) | (uPtr[5].uls.lo >> m);
      uPtr[5].uls.hi >>= m;
      uPtr[5].ull += ((uint16_t)(uPtr[4].uls.lo << shift)) * fConst;
      uPtr[4].uls.lo = (uPtr[4].uls.hi << shiftb) | (uPtr[4].uls.lo >> m);
      uPtr[4].uls.hi >>= m;
      uPtr[4].ull += ((uint16_t)(uPtr[3].uls.lo << shift)) * fConst;
    } else {
      uPtr[4].ull = ((uint16_t)(uPtr[3].uls.lo << shift)) * fConst;
    }
    uPtr[3].uls.lo = (uPtr[3].uls.hi << shiftb) | (uPtr[3].uls.lo >> m);
    uPtr[3].uls.hi >>= m;
    uPtr[3].ull += ((uint16_t)(uPtr[2].uls.lo << shift)) * fConst;
    uPtr[2].uls.lo = (uPtr[2].uls.hi << shiftb) | (uPtr[2].uls.lo >> m);
    uPtr[2].uls.hi >>= m;
    uPtr[2].ull += ((uint16_t)(uPtr[1].uls.lo << shift)) * fConst;
    uPtr[1].uls.lo = (uPtr[1].uls.hi << shiftb) | (uPtr[1].uls.lo >> m);
    uPtr[1].uls.hi >>= m;
    uPtr[1].ull += ((uint16_t)(uPtr[0].uls.lo << shift)) * fConst;
    uPtr[0].uls.lo = (uPtr[0].uls.hi << shiftb) | (uPtr[0].uls.lo >> m);
    uPtr[0].uls.hi >>= m;
      /* address case of denormals, below 'flag' will write pre-regs otherwise */
    if ((uPtr[0].uls.hi | uPtr[0].uls.lo) == 0) {
      pw10 -= 18;
      uPtr[0].ull = uPtr[1].ull, uPtr[1].ull = uPtr[2].ull;
      uPtr[2].ull = uPtr[3].ull, uPtr[3].ull = uPtr[4].ull;
      if (index == 8) {
        uPtr[4].ull = uPtr[5].ull, uPtr[5].ull = uPtr[6].ull;
        uPtr[6].ull = uPtr[7].ull, uPtr[7].ull = 0;
      } else {
        uPtr[4].ull = 0;
      }
    }
  }
    /* 2^-16, multiples */
  if ((pw2 >> 4) != 0) {

    _ull_t *regPtr, *limit;
    pw2 >>= 4;
    limit = uPtr + (unsigned)(type >> 16);
    int16_t fcnt = 0;
      /* code approaches best written assembler, addressed most stalls
       * inotherwords, use caution rewording, huge penalties. */
    do {
      uint64_t work;
      uint32_t w0;
      int32_t flag;
      regPtr = uPtr;
      w0 = (uint32_t)regPtr->uss.w0;
      fcnt += (flag = -((regPtr->ull = regPtr->ull >> 16) == 0));
      work = (++regPtr)->ull;
      do {
        uint64_t m0 = w0 * 15258789062500ULL;
        w0 = (uint16_t)work;
        regPtr[flag].ull = (work = m0 + (work >> 16));
        ++regPtr;
      } while ((regPtr <= limit) && ((work = regPtr->ull) != 0));
      regPtr[flag].ull = w0 * 15258789062500ULL;
    } while ((--pw2) != 0);
    index = (int)(regPtr - uPtr) + 1;
    pw10 += fcnt * 18;
  }
    /* only pw10 used in function as exponent of power 10 */
  return ((pw10 << 16) | (index << 4) | 8);
}

/* enters with 'sSize = strPtr - sPtr;'.
 * s, sPtr could have been malloc'd on entry, but expected to have been
 * cleared, output used, prior to entry. Possibly previous real allocation,
 * output to 'buffer', FILE or supplied, and not free'd.
 * printf() supplies BUFSTRSIZE on entry, realtostr supplies 45 ulls.
 * BUFSTRSIZE considered large for normal requests,
 * 45 ulls covers all needs for double reals.
 * all real types generally need 8, [±][0][.](precision)[e±000[0]]
 * IS_TYPE_F adds exponent to count, [±][0](exponent)[.](precision)[e±000[0]]
 * IS_TYPE_F has possiblity of grouping:
 * GRP_PREFIX assumes thousands, xxx,xxx,xxx,xxx etc. = 6 per ull
 * can't reuse precision 'reg's as double duty... 18 bytes / 8 byte ull
 * bufS(string) must come first for free'ing of allocation
 */

#pragma mark *** External Interfaces ***

#if (!__GNUC_PREREQ(4,5))
#pragma GCC diagnostic ignored "-Wuninitialized"
#endif

unsigned char *
realtostr(uint32_t w160[5], unsigned char **s, int type,
          int arg_prec, int arg_flags, unsigned decimal) {

/*smash_stack();*/

  int exponent, rpos;   /* Rounding POSition */
  uint32_t (*math_func)(_ull_t *, int);
  unsigned char  *sPtr, dblt_sign;
    /* set up for percision of 792 digits, 18/ull * 44ull */
    /* malloc if exponent and percision dictate, 44ull = 352 bytes */
    /* reg: static reserved, regs: actual reg use address (stack or allocated) */
  _ull_t reg[45], *regs;
  /* supress warning due to zero jumps */
#if __GNUC_PREREQ(4,5)
#pragma GCC diagnostic ignored "-Wuninitialized"
#endif
    /* utPtr: top of stack, ubPtr: bottom of stack, ufPtr: fractional start */
  _ull_t *utPtr, *ubPtr, *ufPtr;
  int d0;   /* Digit0 (first digit) */
#if __GNUC_PREREQ(4,5)
#pragma GCC diagnostic pop
#endif
  uint64_t a;
  unsigned strsz, pw2, packed_return;

                        /*** input defaults ***/
  char *decimalPtr = (char*)&decimal;

  if (arg_prec < 0)  arg_prec = 6;
    /* CASE 'g':  initial zero(or first digit) is significant digit */
  if ((arg_prec) && (IS_TYPE_G(type)))  --arg_prec;

                        /*** sign ***/
  if ((w160[4] & 0x8000) != 0)  dblt_sign = '-';
  else {
    dblt_sign = 0;
    if ((arg_flags & (SIGN_PREFIX | SPC_PREFIX)) != 0)
      dblt_sign = (arg_flags & SPC_PREFIX) ? ' ' : '+';
  }
                        /*** exponent ***/
    /* extra math used for limiting register updating in math units. Also
     * allows allocation size when precision exceeds static buffersize */
  if ((exponent = w160[4] & 0x7FFF) == 0x7FFF) {
      /* inf or nan, nan(n-char-sequence) */
    *(sPtr = *s) = dblt_sign, sPtr += (dblt_sign != 0);
    return __n_char_sequence(w160, sPtr, (type | ((arg_flags & ALT_PREFIX) << 1)));
  }
  if (exponent < 0x3FFF) {
    uint32_t reg_min = (IS_QUAD(type)) ? 7 : 4;
    math_func = __negative_exponent_fp;
    pw2 = reg_min;

      /* these 'need' pw10 pre-calculations for extra decisions */
      /* !E can result in '0', no caculation needed,
       * >> 4 == 0 results in use of default,
       * default pw2 == 4 covers precision < 64 */
/* XXX with !e80, does pw2 cover precision < (NEG_REG_BITS + 1)? */
    if ( (!IS_TYPE_E(type)) ||
        ( (((uint32_t)(0x3FFF - exponent) >> 4) != 0) && (arg_prec >= 63) ) ) {
      pw2 = 0x3FFF - exponent;
        /* number of prefixing zeroes */
      pw2 = (((uint32_t)(((uint64_t)((uint32_t)pw2)
                                * (uint32_t)2585827973U) >> 32)) >> 1);
        /* check for all zeroes on a 'f' request */
      if ( (!IS_TYPE_E(type)) && ((unsigned)arg_prec < pw2) ) {
        if (pw2 > reg_min) {
          pw2 = reg_min;
          exponent = 0;
          *(uint64_t*)&w160[2] = 0;
/* XXX do I need? */
          *(uint64_t*)&w160[0] = 0;
          if (IS_TYPE_G(type))  type |= TYPE_F;
          type |= TYPE_ZERO;
            /* must not see as denormal for doubles */
            /* remove 'double' label, keep or set 'long double' */
          type &= ~(UPPER_TYPE << 8);
        } else {
          pw2 = reg_min;
        }
      } else {
          /* subtract zeroes from significant digits */
        uint32_t bits = (IS_QUAD(type)) ? 112 : 63;
        pw2 = (0x3FFF - exponent) + bits - pw2;
        if (pw2 > (unsigned)arg_prec)  pw2 = arg_prec;
          /* divide by 18 for number ulls needed */
        if (pw2 <= 38)
          pw2 = reg_min;
        else
          pw2 = reg_min + (uint32_t)((((uint64_t)(uint32_t)(pw2 - 38))
                                                * (uint32_t)238609295U) >> 32);
      }
    }
  } else {
    math_func = __positive_exponent_fp;
    pw2 = 2;
      /* when 'f', need maximum value */
    if ( ((exponent >= 0x40ce) && (arg_prec >= 27)) || (IS_TYPE_F(type)) ) {
      pw2 = exponent - 0x3FFF;
      pw2 = (((uint32_t)(((uint64_t)((uint32_t)pw2)
                                                * 2585827973U) >> 32)) >> 1);
        /* 'e' doesn't need full precision */
      if ((IS_TYPE_E(type)) && ((unsigned)arg_prec < pw2))
        pw2 = arg_prec;
        /* divide by 18 for number ulls needed */
      pw2 = 2 + (uint32_t)(((uint64_t)((uint32_t)pw2) * 252645135U) >> 32);
    }
  }
                        /*** allocation ***/
    /* allocate reg here based on exponent/precision, see above notes */
    /* group allocation, fprintf() to free, returning we allocating 's' */
  strsz = arg_prec + 8;
  type |= pw2 << 16;
    /* type F has special requirements, if positive exponent */
  if ((type << 31) & ~(exponent - 0x3FFF)) {
      /* grouping assumption of thousands: 1,000,000 */
      /* add in exponent: [±][0](exponent)[.](precision)[e±000[0]] */
      /* assume all pw2 as 'whole' */
      /* more accuracy, for less allocations, is faster */
    uint32_t triplets = (exponent - 0x3FFF);
    strsz += 1
          + (((uint32_t)(((uint64_t)((uint32_t)triplets) * 2585827973U) >> 32)) >> 1);
    if ((arg_flags & GRP_PREFIX) != 0)
      strsz += ((uint16_t)21845 * (uint32_t)triplets) >> 16;
  }
  regs = reg;
  if ((pw2 > 44) || (strsz > BUFSTRSIZE)) {
      /* if either static exceeds limit, allocate both */
    strsz = (strsz + ((1 << 6) - 1)) & ~((1 << 6) - 1);
      /* test for non-static string buffer */
    if (arg_flags & ALLOC_BUF)  free(*s);
      /* assignment to sPtr, sPtr is one tested and free'd in fprintf() */
    if ((sPtr = malloc((size_t)(strsz + ((pw2 + 1) * sizeof(_ull_t))))) == NULL)
      return sPtr;
    *s = sPtr;
    regs = (_ull_t*)(sPtr + strsz);
  }
    /* zero regs, no longer part of math units */
  do  regs[pw2].uls.hi = 0, regs[pw2].uls.lo = 0;  while ((--pw2));

                        /*** fractional load ***/
    /* assign 'fraction', 'exponent' info for passage to math units */
    /* w160[0,1] used on requested quadruple, or long double 128bit (not e80) */
  regs[0].uls.lo = exponent;
  regs[1].ull = *(uint64_t*)&w160[2];
  regs[2].ull = *(uint64_t*)&w160[0];
  if ((regs[1].ull | regs[2].ull) == 0) {
    exponent = 0;
    if (IS_TYPE_G(type))  type |= TYPE_F;
    type |= TYPE_ZERO;
    goto skip_round;
  }

                        /*** math ***/
  packed_return = (*math_func)(regs, type);
    /* transfer info returned into addresses, exponent */
  ubPtr = (utPtr = regs) + ((packed_return >> 4) & 0x3FF);
  exponent  = (d0 = _first_digit(utPtr->ull));
  exponent += (int32_t)packed_return >> 16;
  packed_return &= 0x0F;
  ufPtr = ((packed_return == 0) ? ubPtr :
          ((packed_return == 0x08) ? regs : ubPtr - (packed_return - 1)));

                        /*** rounding ***/
    /* assign position 1 pass display precision */
  rpos = arg_prec + 1;
  if (IS_TYPE_F(type))  {
      /* this excludes positive exponents, they will not jump to skip */
    if (arg_prec < ~exponent)  goto skip_round;
      /* positive exponents must count pre-radix as precision */
    if (~exponent >> 31)
      arg_prec += exponent;
      /* when negative, remove pre-zero count, positive add in whole portion */
    rpos += exponent;
  }
    /* rounding position <= first digit position, has front end zeroes */
  if ((rpos -= d0) > 0) {
    _ull_t *aPtr = utPtr;
      /* test rounding for second ull or higher, assume not large arg_prec */
    while (((++aPtr) < ubPtr) && (rpos > 18))  rpos -= 18;
      /* verify position has digits to round */
    if (aPtr < ubPtr) {
        /* set last ull, if rpos == 1 then exclude current aPtr */
      ubPtr = aPtr + (rpos != 1);
      if (ufPtr > ubPtr)  ufPtr = ubPtr;
      const uint64_t *fc = _fcarry;
      if ((aPtr->ull += fc[18 - rpos]) >= _CARRY) {
        uint64_t t = aPtr[-1].ull + 1ULL;
          /* carry forced propagation, makes ull == 0 */
        (ubPtr = aPtr)->ull = 0;
        (--aPtr)->ull = t;
        if ((aPtr == utPtr) && (t >= fc[19 + d0])) {
          exponent++;
            /* type 'f' has increase in precision if positive exponent */
            /* condition of == 18 never occurs */
          arg_prec += (type & ((uint32_t)~exponent >> 31));
          ++d0;
        }
      }
    }
  } else {
      /* first ull rounding */
    const uint64_t *fc = _fcarry;
      /* limit reads of ulls to next register */
    ubPtr = utPtr + 1;
    if (ufPtr > ubPtr)  ufPtr = ubPtr;
      /* add '5' to rounding location */
    a = utPtr->ull + fc[-rpos];
    utPtr->ull = a;
      /* test for propagation to new first digit */
    if (a >= fc[19 + d0]) {
      exponent++;
      if ((++d0) == 18) {
          /* rare case. inserted here to avoid alter of ulltobuffer for odd case
           * and testing all cases for this condition. */
          /* occurs @ 1e18, 1e-18
           9.9999950000000026e+17 first precision 6
           9.9999999999999987e+17 last
           9.9999950000000012e-19 first precision 6
           9.9999999999999988e-19 last
           */
        *(sPtr = *s) = dblt_sign, sPtr += (dblt_sign != 0);
          /* type f and negative exponent */
        if (((uint32_t)exponent >> 31) & type) {
          unsigned j, k;
          *sPtr++ = '0';
          *(uint16_t*)sPtr = *(uint16_t*)decimalPtr;
          j = 1 + (decimalPtr[1] != 0);
          sPtr += j, decimalPtr += j;
          if ((j = -exponent - 1)) {
            if (j >= (unsigned)arg_prec) goto L24;
            arg_prec -= j;
            k = j & 3;
            if ((j = (int)((unsigned)j >> 2)))
              do  *(uint32_t*)sPtr = 0x30303030U, sPtr += 4;  while ((--j));
            *(uint32_t*)sPtr = 0x30303030U;
            sPtr += k;
          }
          *sPtr++ = '1';
          if ((arg_prec -= 1) > 0)  goto L245;
          return sPtr;
        }
        *sPtr++ = '1';
        if ( (IS_TYPE_G(type)) && (((unsigned)exponent) <= (unsigned)arg_prec) )
          type |= TYPE_F;
        if ( (arg_prec) && (!(IS_TYPE_G(type)) || (arg_flags & ALT_PREFIX)) ) {
          unsigned dflag = (decimalPtr[1] != 0);
          *(uint16_t*)sPtr = *(uint16_t*)decimalPtr;
          sPtr += dflag + 1, decimalPtr += dflag + 1;
          goto L245;
        }
        goto L25;
      }
    }
  }
                    /*** adjust for entered type... g ***/
    /* rounding can adjust exponent, so now can test if conditions met */
  if (IS_TYPE_G(type)) {
      /* this should exclude negatives */
    if (((unsigned)exponent) <= (unsigned)arg_prec)  goto L23;
    if (((unsigned)(exponent + 4)) <= 4) {
      arg_prec -= exponent;
L23:  type |= TYPE_F;
    }
  }
                        /*** output section ***/
skip_round:
  *(sPtr = *s) = dblt_sign, sPtr += (dblt_sign != 0);
    /* type f and negative exponent, <zero> */
  if ((((uint32_t)exponent >> 31) & type) | (IS_TYPE_ZERO(type))) {
    unsigned j, k;
    *sPtr++ = '0';
    *(uint16_t*)sPtr = *(uint16_t*)decimalPtr;
    j = 1 + (decimalPtr[1] != 0);
    if (!arg_prec) {
      if (IS_TYPE_E(type))  goto L11;
      if (arg_flags & ALT_PREFIX)  return (sPtr + j);
      return sPtr;
    }
    sPtr += j, decimalPtr += j;
    if ((j = -exponent - 1)) {
      if (j >= (unsigned)arg_prec) goto L24;
      arg_prec -= j;
      k = j & 3;
      if ((j = (int)((unsigned)j >> 2)))
        do  *(uint32_t*)sPtr = 0x30303030U, sPtr += 4;  while ((--j));
      *(uint32_t*)sPtr = 0x30303030U;
      sPtr += k;
    }
  }
  {
    /*      first ull
     if utPtr == ufPtr then all digits are part of precision. d0 does not
     count digits but returns log10... log10(1-9) = 0
     if 'f', data starts at sPtr. Otherwise starts sPtr + size of decimal.
     will write on last char of decimal, which then that char copies to sPtr
     then overwritten by decimal.
     (!IS_TYPE_F(type)) stringPtr = sPtr + decimalsz
     To determine if first digit is used as precision use
     (decimalPtr == (char*)&decimal)
     Two-byte decimal determined by (((char*)&decimal)[1] != 0)
     */

      /* deal with d0 */
      /* _fcarry multiplier based on d0, then can adjust d0 */
    const uint64_t *mConst = &_fcarry[35 - d0];
    if ((d0 += ((utPtr == ufPtr) & IS_TYPE_F(type))) > arg_prec)  d0 = arg_prec;

    unsigned char *stringPtr = sPtr;
    if (!IS_TYPE_F(type))
      stringPtr += 1 + (((char*)&decimal)[1] != 0);
      /* traps d0 = [0,9] or all ull multipliers */
    if (utPtr->ull < 1000000000) {
      _ull_t b;
        /* buf output use pre-incr (++) */
      unsigned char *buf = stringPtr - 1;
      uint32_t f = (uint32_t)((uint64_t)utPtr->uls.lo * (uint32_t)mConst[(-9)]);
      uint32_t n = d0 + (decimalPtr == (char*)&decimal);
      b.ull = (uint64_t)(f << 2) * 2882303762U;
      goto L2;
      do {
        b.uls.hi = (((b.uls.hi << 4) >> 4) * 10) + (b.uls.lo >> 28);
        b.uls.lo <<= 4;
    L2: *++buf = (b.uls.hi >> 28) + '0';
      } while ((--n));
    } else {
      _ulltobuffer_nlr((divlu2r(utPtr->ull * (uint32_t)*mConst)), stringPtr,
                                        (d0 + (decimalPtr == (char*)&decimal)));
    }
      /* type 'e' requires radix after first digit */
    if (!IS_TYPE_F(type)) {
        /* move first digit to first spot */
      *sPtr = *stringPtr;
        /* can not use short write here,
         * will overwrite any digits following if sz == 1 */
      *++sPtr = *decimalPtr;
      if (*++decimalPtr != 0)  *++sPtr = *decimalPtr, decimalPtr++;
        /* a must for if arg_prec */
      sPtr++;
      if (!arg_prec) {
        if (arg_flags & ALT_PREFIX)
              sPtr += (((char*)&decimal)[1] != 0);
        else  sPtr -= 1 + (((char*)&decimal)[1] != 0);
        goto L11;
      }
    }
    sPtr += d0 + (decimalPtr == (char*)&decimal);
    if ((arg_prec -= d0) == 0)  goto L25;
  }

  do {
      /* case where has whole/fraction */
    if ((++utPtr) >= ufPtr) {
      if (decimalPtr == (char*)&decimal) {
          /* this also used as delimiter for whole part, stop point for
           * trailing zero removal */
        unsigned dflag = (decimalPtr[1] != 0);
        *(uint16_t*)sPtr = *(uint16_t*)decimalPtr;
        sPtr += dflag + 1, decimalPtr += dflag + 1;
      }
        /* set to last ull, if not already at */
      if (ufPtr == ubPtr)  break;
      ufPtr = ubPtr;
    }
    if ((arg_prec - 18) < 0) {
      _ulltobuffer_nlr(divlu2r(utPtr->ull), sPtr, arg_prec);
      sPtr += arg_prec;
      goto L25;
    }
    arg_prec -= 18;
    _ulltobuffer_18lr(divlu2r(utPtr->ull), sPtr);
    sPtr += 18;
  } while (1);
L24:
  if ( (arg_prec) && (!(IS_TYPE_G(type)) || (arg_flags & ALT_PREFIX)) ) {
      /* safe because of added room for exponent in allocation */
    unsigned k;
  L245:
    k = arg_prec & 3;
    arg_prec = (int)((unsigned)arg_prec >> 2);
    *(uint32_t*)sPtr = 0x30303030U;
    if (arg_prec)
      do  sPtr += 4, *(uint32_t*)sPtr = 0x30303030U;  while ((--arg_prec));
    sPtr += k;
  }
L25:
    /* case 'g', remove any trailing <zero>s */
  if (IS_TYPE_E(type))  goto L11;
  if (!(arg_flags & ALT_PREFIX)) {
      /* test only 'last' byte of decimal point.
       * '== 0' only occurs if written. decimal is at max 2 bytes */
    if (*decimalPtr == 0) {
      unsigned char ch;
      ch = *--sPtr;
      if (IS_TYPE_G(type))
        while (ch == '0')  ch = *--sPtr;
        /* decimal point is at max 2 bytes */
      if (ch != (unsigned char)decimalPtr[-1])  ++sPtr;
      else if (&decimalPtr[-1] != (char*)&decimal) --sPtr;
    }
  } else if (*decimalPtr != 0) {
    unsigned dflag = (decimalPtr[1] != 0);
    *(uint16_t*)sPtr = *(uint16_t*)decimalPtr;
    sPtr += dflag + 1, decimalPtr += dflag + 1;
  }
                        /*** exponent output ***/
  if (IS_TYPE_F(type))  return sPtr;
L11:
    /* converts g,G to e,E and adds sign (portable character set) */
  *(uint16_t*)sPtr = cconst16_t(('e' ^ ((type & 0x20) ^ UPPER_TYPE)),
                                ('+' - ((exponent >> 31) << 1)));
    /* absolute value */
  exponent = (exponent ^ (exponent >> 31)) - (exponent >> 31);
  if (exponent < 10) {
      /* exponent output reqires 2 digit display */
    *(uint16_t*)(&sPtr[2]) = cconst16_t('0',('0' + exponent));
    return (sPtr + 4);
  }
    /* quadruple, long doubles range [10,4966] */
  uint32_t ecd = (uint32_t)(((uint64_t)((uint32_t)exponent << 19)
                                                      * 2199023989U) >> 32);
  ++sPtr;
  if (exponent >= 1000)  *++sPtr = (ecd >> 28) + '0';
  ecd = ((ecd << 4) >> 4) * 10;
  if (exponent >= 100)   *++sPtr = (ecd >> 28) + '0';
  sPtr[1] = ((ecd = (((ecd << 4) >> 4) * 10)) >> 28) + '0';
  sPtr[2] = ((((ecd << 4) >> 4) * 10) >> 28) + '0';
  return (sPtr + 3);
}

/* "[±]0xh.hhhhp±d"
 * normalized: h. force to high nibble for higher precision
 * denornalize: .h force highest nibble as above
 * output precision based on 'g' style
 */
unsigned char *
realtohex(uint32_t w160[5], unsigned char **s, int type,
         int arg_prec, int arg_flags, unsigned decimal) {

  unsigned char *sPtr = *s;
  int exponent;
  uint64_t frac;

                      /*** input defaults ***/
  char *decimalPtr = (char*)&decimal;
  if (arg_prec < 0) {
    arg_prec = (IS_DBL(type)) ? 13 : 15;
    type |= TYPE_G;
  }

    /* determine upper of lower case output */
  const char    *_outvalues = _const_outStr;
  if (IS_UPPER_TYPE(type)) _outvalues += 32;

                      /*** sign && exponent ***/
  exponent = w160[4] & 0x7FFF;
  if ((w160[4] & 0x8000) != 0)  *sPtr++ = '-';
  else {
    if ((arg_flags & (SIGN_PREFIX | SPC_PREFIX)) != 0)
      *sPtr++ =  (arg_flags & SPC_PREFIX) ? ' ' : '+';
  }
  if (exponent == 0x7FFF)
    return __n_char_sequence(w160, sPtr,
                                  (type | ((arg_flags & ALT_PREFIX) << 1)));
  exponent -= 16383;

                      /*** prefix  0x or 0X ***/
  *((uint16_t *)sPtr) = ((uint16_t *)_outvalues)[12];
  sPtr += 2;

                      /*** fractional load ***/
  if ((frac = *(uint64_t*)&w160[2]) == 0) {
    exponent = 0;
    *sPtr++ = '0';
    if (arg_flags & ALT_PREFIX) {
      unsigned dflag = (decimalPtr[1] != 0);
      *(uint16_t*)sPtr = *(uint16_t*)decimalPtr;
      sPtr += dflag + 1, decimalPtr += dflag + 1;
    }
    goto out_exp;
  }
    /* alter exponent by 'shift' amount used to normalize */
  if ((frac & 0x8000000000000000) != 0)
    exponent -= 3;
                        /*** rounding ***/
  if (arg_prec < 15) {
    uint64_t rmask = 0x0FFFFFFFFFFFFFFFULL >> (arg_prec << 2);
    uint64_t radd = (rmask + 1) >> 1;
      /* not sure need to mask result */
    if ((frac & rmask) > radd)  frac = (frac + radd) & ~rmask;
  }

    /* rotate high nibble to low nibble */
  uint32_t snfct = 0;
  if ((frac & 0x8000000000000000ULL) == 0) {
    frac <<= 1;
    snfct = 1;
    *sPtr++ = '0';
  } else {
    frac = (frac >> 60) | (frac << 4);
    *sPtr++ = _outvalues[((uint32_t)frac & 0x0000000F)];
  }
    /* decimal point */
  unsigned dflag = (decimalPtr[1] != 0);
  *(uint16_t*)sPtr = *(uint16_t*)decimalPtr;
  sPtr += dflag + 1, decimalPtr += dflag + 1;
    /* precision based on type, adjust if denormal */
  dflag = (arg_prec < ((IS_DBL(type)) ? 13 : 15))
         ? arg_prec : ((IS_DBL(type)) ? 13 : 15);
  arg_prec -= dflag;
  if (snfct == 1)
    dflag++;
  do {
      /* rotate into position */
    frac = (frac >> 60) | (frac << 4);
    *sPtr++ = _outvalues[((uint32_t)frac & 0x0000000F)];
  } while ((--dflag));

  if (IS_TYPE_G(type)) {
    unsigned char ch = *--sPtr;
    while (ch == '0')  ch = *--sPtr;
    if (ch != (unsigned char)decimalPtr[-1])  ++sPtr;
    else if (&decimalPtr[-1] != (char*)&decimal) --sPtr;
  } else if (arg_prec) {
    dflag = arg_prec & 3;
    arg_prec = (int)((unsigned)arg_prec >> 2);
    *(uint32_t*)sPtr = 0x30303030U;
    if (arg_prec)  do  sPtr += 4, *(uint32_t*)sPtr = 0x30303030U;  while ((--arg_prec));
    sPtr += dflag;
  }

  /*** exponent output (p±d p[-16494,+16383]) ***/
out_exp:
  *sPtr++ = _outvalues[19];
    /* based on portable character set */
  *sPtr++ = '+' - ((exponent >> 31) << 1);
    /* absolute value */
  exponent = (exponent ^ (exponent >> 31)) - (exponent >> 31);

  if (exponent < 10) {
    *sPtr = exponent + '0';
    return (sPtr + 1);
  }
    /* divide exponent by 10000   3518437209 */
  uint32_t ecd = (uint32_t)(((uint64_t)(uint32_t)exponent * 3518437209U) >> 17);
  ecd += 1 << 14;
  if (exponent >= 10000)  *sPtr++ = (ecd >> 28) + '0';
  ecd = ((ecd << 4) >> 4) * 10;
  if (exponent >= 1000)   *sPtr++ = (ecd >> 28) + '0';
  ecd = ((ecd << 4) >> 4) * 10;
  if (exponent >= 100)    *sPtr++ = (ecd >> 28) + '0';
  sPtr[0] = ((ecd = (((ecd << 4) >> 4) * 10)) >> 28) + '0';
  sPtr[1] = ((((ecd << 4) >> 4) * 10) >> 28) + '0';

  return (sPtr + 2);
}

#pragma mark *** Utility External Interfaces ***

/* need different long double definitions 106,64,53 */
long double
qtold(quadruple qdbl) {

  octle iqdbl;
  unsigned char *cdbl = (unsigned char*)&iqdbl;
  iqdbl = qdbl;

#if (__LDBL_MANT_DIG__ == 64)
  iqdbl.ulls.ull0 = (iqdbl.ulls.ull1 << 15) | (iqdbl.uss.us3 >> 1);
  iqdbl.ulls.ull1 >>= 48;
  iqdbl.uls.ul1 = (((iqdbl.uss.us4 & 0x7fff) != 0) << 31)
                 | (iqdbl.uls.ul1 & 0x7fffffff);
#else
  int16_t sign = (idbl.uss.us7 & 0x8000) >> 4;
  int16_t e16 = (idbl.uss.us7 & 0x7fff) - 16383 + 1023;
  iqdbl.uss.us7 = 0;
  iqdbl.ulls.ull1 <<= 4;
  iqdbl.uss.us4 |= iqdbl.uss.us3 >> 12;
    /* double-double and double */
 #if (__LDBL_MANT_DIG__ == 106)
    /* assumes low double exponent different by 2^-52, keeps sign
     * of course this doesnt make sense on denormals
     * thus no extra precision, low double has to be zero */
    /* mantissas into correct positions */
  iqdbl.ulls.ull0 >>= 8;
  iqdbl.uss.us3 |= ((e16 - 52) | sign) << 4;
  if (e16 <= 52) {
    iqdbl.ulls.ull0 = 0;
      /* unknown if benefits hardware when 0 */
    iqdbl.uss.us3 = sign << 4;
  }
 #endif
  if (e16 <= 0) {
    if (e16 <= -52)  iqdbl.ulls.ull1 = 0;
    else             iqdbl.ulls.ull1 >>= (12 - e16);
    e16 = 0;
  } else if (e16 > 2047) {
      /* set to inf */
    iqdbl.ulls.ull1 = 0;
    e16 = 2047;
  }
  iqdbl.uss.us7 |= (e16 | sign) << 4;
#endif
  return (*(long double*)cdbl);
}

double
ldtod(long double ldbl) {

  unsigned char *wc = (unsigned char *)&ldbl;
#if ((__LDBL_MANT_DIG__ == 53) || (__LDBL_MANT_DIG__ == 106))
    /* ppc double-double  XXX untested, just drop lower 64, big endian */
    /* wc will point to highest byte of 128 bit */
  return (*(double*)wc);
#else
    /* since padding occurs, non gpr registers tend towards 128bit use: */
  octle ildbl;
  int16_t e16;
  int16_t sign;
  ildbl = *(octle*)wc;
  wc = (unsigned char *)&ildbl;
    /* 3 known, use IEEE-754 16bit sign/exp, biased */
#if (__LDBL_MANT_DIG__ > 65)
    /* trap binary128 */
  e16 = ildbl.uss.us7;
  ildbl.ulls.ull0 = ildbl.ulls.ull1 >> 12;
#else
    /* trap binary64, e80 (64/63 mantissas) */
  e16 = ildbl.uss.us4;
 #if (__LDBL_MANT_DIG__ == 64)
  ildbl.uss.us3 &= 0x7fff;
  ildbl.ulls.ull0 >>= 11;
 #else
  ildbl.ulls.ull0 = (((ildbl.ulls.ull1 << 4)
                      | (uint64_t)(ildbl.uss.us3 >> 12))) >> 12;
 #endif
#endif
  sign = (e16 & 0x8000) >> 4;
  e16 = (e16 & 0x7FFF) - 16383 + 1023;
  if (e16 <= 0) {
    if (e16 <= -52)  ildbl.ulls.ull0 = 0;
    else             ildbl.ulls.ull0 >>= (12 - e16);
    e16 = 0;
  } else if (e16 > 2047) {
      /* set to inf */
    ildbl.ulls.ull0 = 0;
    e16 = 2047;
  }
  ildbl.uss.us3 |= (e16 | sign) << 4;
  return (*(double*)wc);
#endif
}



int
main(void) {
  phx_printf("%.43f\n", 0x1.52f8a8e32e982p-140);
  phx_printf("%g\n", 355.0/113.0);
  return 0;
}

next prev parent reply	other threads:[~2021-11-06 12:06 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <CAL9Mx1uMOz2wfqbMpD_xfA=D9JkpFzVz6AR_DKHK34AvrGOP6w@mail.gmail.com>
2021-11-03 21:26 ` Fwd: " Pavel M
2021-11-04 12:24   ` Corinna Vinschen
2021-11-04 16:48     ` Keith Packard
2021-11-04 17:27       ` Dave Nadler
2021-11-04 17:58         ` Keith Packard
2021-11-04 21:28           ` Brian Inglis
2021-11-05  2:54             ` Keith Packard
2021-11-05 16:00               ` Steven Abner
2021-11-06 12:06                 ` Steven Abner [this message]
2021-11-06 23:25                   ` Steven Abner
2021-11-08  9:57               ` Corinna Vinschen
2021-11-04 20:57       ` Corinna Vinschen
2021-11-28  7:43     ` Takashi Yano
2021-11-28 12:38       ` Takashi Yano
2021-11-28 13:16         ` Takashi Yano
2021-11-29 10:56           ` Takashi Yano
2021-11-29 14:24             ` Takashi Yano
2021-11-29 15:55               ` Corinna Vinschen
2021-11-30 10:51                 ` Takashi Yano
2021-11-30 15:09                   ` Corinna Vinschen
2021-11-30 20:38                     ` Takashi Yano
2021-11-30 21:17                       ` Takashi Yano
2021-12-06  9:53                         ` Corinna Vinschen
2021-12-06 10:26                           ` Takashi Yano
2021-12-01 12:05                     ` Takashi Yano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1636200365.25270.0@smtp.mail.att.net \
    --to=pheonix.sja@att.net \
    --cc=newlib@sourceware.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).