public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* [PATCH] printf_fp: Get rid of alloca.
@ 2023-07-05 17:19 Joe Simmons-Talbott
  2023-08-28 20:36 ` Joe Simmons-Talbott
  2023-08-31 19:02 ` Adhemerval Zanella Netto
  0 siblings, 2 replies; 8+ messages in thread
From: Joe Simmons-Talbott @ 2023-07-05 17:19 UTC (permalink / raw)
  To: libc-alpha; +Cc: Joe Simmons-Talbott

Replace unbounded alloca calls with scratch_buffers to avoid potential
stack overflow.
---
 stdio-common/printf_fp.c | 59 ++++++++++++++++++++++++++++------------
 1 file changed, 41 insertions(+), 18 deletions(-)

diff --git a/stdio-common/printf_fp.c b/stdio-common/printf_fp.c
index 6f22985ba1..9d6925a624 100644
--- a/stdio-common/printf_fp.c
+++ b/stdio-common/printf_fp.c
@@ -28,6 +28,7 @@
 #include <gmp-mparam.h>
 #include <gmp.h>
 #include <ieee754.h>
+#include <scratch_buffer.h>
 #include <stdlib/gmp-impl.h>
 #include <stdlib/longlong.h>
 #include <stdlib/fpioconst.h>
@@ -181,8 +182,15 @@ __printf_fp_buffer_1 (struct __printf_buffer *buf, locale_t loc,
 
   /* Buffer in which we produce the output.  */
   char *wbuffer = NULL;
-  /* Flag whether wbuffer and buffer are malloc'ed or not.  */
-  int buffer_malloced = 0;
+
+  struct scratch_buffer sbuf_frac;
+  scratch_buffer_init (&sbuf_frac);
+  struct scratch_buffer sbuf_tmp;
+  scratch_buffer_init (&sbuf_tmp);
+  struct scratch_buffer sbuf_scale;
+  scratch_buffer_init (&sbuf_scale);
+  struct scratch_buffer sbuf_wbuffer;
+  scratch_buffer_init (&sbuf_wbuffer);
 
   p.expsign = 0;
 
@@ -268,9 +276,27 @@ __printf_fp_buffer_1 (struct __printf_buffer *buf, locale_t loc,
 			     + (GREATER_MANT_DIG / BITS_PER_MP_LIMB > 2
 				? 8 : 4))
 			    * sizeof (mp_limb_t);
-    p.frac = (mp_limb_t *) alloca (bignum_size);
-    p.tmp = (mp_limb_t *) alloca (bignum_size);
-    p.scale = (mp_limb_t *) alloca (bignum_size);
+    
+    if (!scratch_buffer_set_array_size (&sbuf_frac, 1, bignum_size))
+      {
+        __printf_buffer_mark_failed (buf);
+        goto free_mem_out;
+      }
+    p.frac = sbuf_frac.data;
+
+    if (!scratch_buffer_set_array_size (&sbuf_tmp, 1, bignum_size))
+      {
+        __printf_buffer_mark_failed (buf);
+        goto free_mem_out;
+      }
+    p.tmp = sbuf_tmp.data;
+
+    if (!scratch_buffer_set_array_size (&sbuf_scale, 1, bignum_size))
+      {
+        __printf_buffer_mark_failed (buf);
+        goto free_mem_out;
+      }
+    p.scale = sbuf_scale.data;
   }
 
   /* We now have to distinguish between numbers with positive and negative
@@ -744,19 +770,13 @@ __printf_fp_buffer_1 (struct __printf_buffer *buf, locale_t loc,
 	return;
       }
     size_t wbuffer_to_alloc = 2 + chars_needed;
-    buffer_malloced = ! __libc_use_alloca (wbuffer_to_alloc);
-    if (__builtin_expect (buffer_malloced, 0))
+    if (!scratch_buffer_set_array_size (&sbuf_wbuffer, 1, wbuffer_to_alloc))
       {
-	wbuffer = malloc (wbuffer_to_alloc);
-	if (wbuffer == NULL)
-	  {
-	    /* Signal an error to the caller.  */
-	    __printf_buffer_mark_failed (buf);
-	    return;
-	  }
+	/* Signal an error to the caller.  */
+	__printf_buffer_mark_failed (buf);
+	goto free_mem_out;
       }
-    else
-      wbuffer = alloca (wbuffer_to_alloc);
+    wbuffer = sbuf_wbuffer.data;
     wcp = wstartp = wbuffer + 2;	/* Let room for rounding.  */
 
     /* Do the real work: put digits in allocated buffer.  */
@@ -1025,8 +1045,11 @@ __printf_fp_buffer_1 (struct __printf_buffer *buf, locale_t loc,
       __printf_buffer_pad (buf, info->pad, width);
   }
 
-  if (buffer_malloced)
-    free (wbuffer);
+free_mem_out:
+  scratch_buffer_free (&sbuf_frac);
+  scratch_buffer_free (&sbuf_tmp);
+  scratch_buffer_free (&sbuf_scale);
+  scratch_buffer_free (&sbuf_wbuffer);
 }
 
 /* ASCII to localization translation.  Multibyte version.  */
-- 
2.39.2


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] printf_fp: Get rid of alloca.
  2023-07-05 17:19 [PATCH] printf_fp: Get rid of alloca Joe Simmons-Talbott
@ 2023-08-28 20:36 ` Joe Simmons-Talbott
  2023-08-31 19:02 ` Adhemerval Zanella Netto
  1 sibling, 0 replies; 8+ messages in thread
From: Joe Simmons-Talbott @ 2023-08-28 20:36 UTC (permalink / raw)
  To: libc-alpha

Ping.

On Wed, Jul 05, 2023 at 01:19:38PM -0400, Joe Simmons-Talbott wrote:
> Replace unbounded alloca calls with scratch_buffers to avoid potential
> stack overflow.
> ---
>  stdio-common/printf_fp.c | 59 ++++++++++++++++++++++++++++------------
>  1 file changed, 41 insertions(+), 18 deletions(-)
> 
> diff --git a/stdio-common/printf_fp.c b/stdio-common/printf_fp.c
> index 6f22985ba1..9d6925a624 100644
> --- a/stdio-common/printf_fp.c
> +++ b/stdio-common/printf_fp.c
> @@ -28,6 +28,7 @@
>  #include <gmp-mparam.h>
>  #include <gmp.h>
>  #include <ieee754.h>
> +#include <scratch_buffer.h>
>  #include <stdlib/gmp-impl.h>
>  #include <stdlib/longlong.h>
>  #include <stdlib/fpioconst.h>
> @@ -181,8 +182,15 @@ __printf_fp_buffer_1 (struct __printf_buffer *buf, locale_t loc,
>  
>    /* Buffer in which we produce the output.  */
>    char *wbuffer = NULL;
> -  /* Flag whether wbuffer and buffer are malloc'ed or not.  */
> -  int buffer_malloced = 0;
> +
> +  struct scratch_buffer sbuf_frac;
> +  scratch_buffer_init (&sbuf_frac);
> +  struct scratch_buffer sbuf_tmp;
> +  scratch_buffer_init (&sbuf_tmp);
> +  struct scratch_buffer sbuf_scale;
> +  scratch_buffer_init (&sbuf_scale);
> +  struct scratch_buffer sbuf_wbuffer;
> +  scratch_buffer_init (&sbuf_wbuffer);
>  
>    p.expsign = 0;
>  
> @@ -268,9 +276,27 @@ __printf_fp_buffer_1 (struct __printf_buffer *buf, locale_t loc,
>  			     + (GREATER_MANT_DIG / BITS_PER_MP_LIMB > 2
>  				? 8 : 4))
>  			    * sizeof (mp_limb_t);
> -    p.frac = (mp_limb_t *) alloca (bignum_size);
> -    p.tmp = (mp_limb_t *) alloca (bignum_size);
> -    p.scale = (mp_limb_t *) alloca (bignum_size);
> +    
> +    if (!scratch_buffer_set_array_size (&sbuf_frac, 1, bignum_size))
> +      {
> +        __printf_buffer_mark_failed (buf);
> +        goto free_mem_out;
> +      }
> +    p.frac = sbuf_frac.data;
> +
> +    if (!scratch_buffer_set_array_size (&sbuf_tmp, 1, bignum_size))
> +      {
> +        __printf_buffer_mark_failed (buf);
> +        goto free_mem_out;
> +      }
> +    p.tmp = sbuf_tmp.data;
> +
> +    if (!scratch_buffer_set_array_size (&sbuf_scale, 1, bignum_size))
> +      {
> +        __printf_buffer_mark_failed (buf);
> +        goto free_mem_out;
> +      }
> +    p.scale = sbuf_scale.data;
>    }
>  
>    /* We now have to distinguish between numbers with positive and negative
> @@ -744,19 +770,13 @@ __printf_fp_buffer_1 (struct __printf_buffer *buf, locale_t loc,
>  	return;
>        }
>      size_t wbuffer_to_alloc = 2 + chars_needed;
> -    buffer_malloced = ! __libc_use_alloca (wbuffer_to_alloc);
> -    if (__builtin_expect (buffer_malloced, 0))
> +    if (!scratch_buffer_set_array_size (&sbuf_wbuffer, 1, wbuffer_to_alloc))
>        {
> -	wbuffer = malloc (wbuffer_to_alloc);
> -	if (wbuffer == NULL)
> -	  {
> -	    /* Signal an error to the caller.  */
> -	    __printf_buffer_mark_failed (buf);
> -	    return;
> -	  }
> +	/* Signal an error to the caller.  */
> +	__printf_buffer_mark_failed (buf);
> +	goto free_mem_out;
>        }
> -    else
> -      wbuffer = alloca (wbuffer_to_alloc);
> +    wbuffer = sbuf_wbuffer.data;
>      wcp = wstartp = wbuffer + 2;	/* Let room for rounding.  */
>  
>      /* Do the real work: put digits in allocated buffer.  */
> @@ -1025,8 +1045,11 @@ __printf_fp_buffer_1 (struct __printf_buffer *buf, locale_t loc,
>        __printf_buffer_pad (buf, info->pad, width);
>    }
>  
> -  if (buffer_malloced)
> -    free (wbuffer);
> +free_mem_out:
> +  scratch_buffer_free (&sbuf_frac);
> +  scratch_buffer_free (&sbuf_tmp);
> +  scratch_buffer_free (&sbuf_scale);
> +  scratch_buffer_free (&sbuf_wbuffer);
>  }
>  
>  /* ASCII to localization translation.  Multibyte version.  */
> -- 
> 2.39.2
> 


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] printf_fp: Get rid of alloca.
  2023-07-05 17:19 [PATCH] printf_fp: Get rid of alloca Joe Simmons-Talbott
  2023-08-28 20:36 ` Joe Simmons-Talbott
@ 2023-08-31 19:02 ` Adhemerval Zanella Netto
  2023-09-01 12:20   ` Florian Weimer
  1 sibling, 1 reply; 8+ messages in thread
From: Adhemerval Zanella Netto @ 2023-08-31 19:02 UTC (permalink / raw)
  To: Joe Simmons-Talbott, libc-alpha



On 05/07/23 14:19, Joe Simmons-Talbott via Libc-alpha wrote:
> Replace unbounded alloca calls with scratch_buffers to avoid potential
> stack overflow.
> ---
>  stdio-common/printf_fp.c | 59 ++++++++++++++++++++++++++++------------
>  1 file changed, 41 insertions(+), 18 deletions(-)
> 
> diff --git a/stdio-common/printf_fp.c b/stdio-common/printf_fp.c
> index 6f22985ba1..9d6925a624 100644
> --- a/stdio-common/printf_fp.c
> +++ b/stdio-common/printf_fp.c
> @@ -28,6 +28,7 @@
>  #include <gmp-mparam.h>
>  #include <gmp.h>
>  #include <ieee754.h>
> +#include <scratch_buffer.h>
>  #include <stdlib/gmp-impl.h>
>  #include <stdlib/longlong.h>
>  #include <stdlib/fpioconst.h>
> @@ -181,8 +182,15 @@ __printf_fp_buffer_1 (struct __printf_buffer *buf, locale_t loc,
>  
>    /* Buffer in which we produce the output.  */
>    char *wbuffer = NULL;
> -  /* Flag whether wbuffer and buffer are malloc'ed or not.  */
> -  int buffer_malloced = 0;
> +
> +  struct scratch_buffer sbuf_frac;
> +  scratch_buffer_init (&sbuf_frac);
> +  struct scratch_buffer sbuf_tmp;
> +  scratch_buffer_init (&sbuf_tmp);
> +  struct scratch_buffer sbuf_scale;
> +  scratch_buffer_init (&sbuf_scale);
> +  struct scratch_buffer sbuf_wbuffer;
> +  scratch_buffer_init (&sbuf_wbuffer);
>  
>    p.expsign = 0;
>  
> @@ -268,9 +276,27 @@ __printf_fp_buffer_1 (struct __printf_buffer *buf, locale_t loc,
>  			     + (GREATER_MANT_DIG / BITS_PER_MP_LIMB > 2
>  				? 8 : 4))
>  			    * sizeof (mp_limb_t);
> -    p.frac = (mp_limb_t *) alloca (bignum_size);
> -    p.tmp = (mp_limb_t *) alloca (bignum_size);
> -    p.scale = (mp_limb_t *) alloca (bignum_size);
> +    
> +    if (!scratch_buffer_set_array_size (&sbuf_frac, 1, bignum_size))
> +      {
> +        __printf_buffer_mark_failed (buf);
> +        goto free_mem_out;
> +      }
> +    p.frac = sbuf_frac.data;
> +
> +    if (!scratch_buffer_set_array_size (&sbuf_tmp, 1, bignum_size))
> +      {
> +        __printf_buffer_mark_failed (buf);
> +        goto free_mem_out;
> +      }
> +    p.tmp = sbuf_tmp.data;
> +
> +    if (!scratch_buffer_set_array_size (&sbuf_scale, 1, bignum_size))
> +      {
> +        __printf_buffer_mark_failed (buf);
> +        goto free_mem_out;
> +      }
> +    p.scale = sbuf_scale.data;
>    }

The maximum size of frac/tmp/scale is bound by the largest size of floating
supported by the ABI.  The p.exponent will at maximum LDBL_MAX_EXP, so the
required size will depend of the maximum support floating point support
(2080 for most architectures, otherwise 160).

Using scratch_buffer will already increase the minimum stack usage for ~3k,
so I wonder if would be better to just define the maximum mp_limb_t size
direct on hack_digit_param:

--
diff --git a/stdio-common/printf_fp.c b/stdio-common/printf_fp.c
index 6f22985ba1..d9fcd00b4b 100644
--- a/stdio-common/printf_fp.c
+++ b/stdio-common/printf_fp.c
@@ -52,7 +52,20 @@
    An MP variable occupies a varying number of entries in its array.  We keep
    track of this number for efficiency reasons.  Otherwise we would always
    have to process the whole array.  */
-#define MPN_VAR(name) mp_limb_t *name; mp_size_t name##size
+
+/* When _Float128 is enabled in the library and ABI-distinct from long
+   double, we need mp_limbs enough for any of them.  */
+#if __HAVE_DISTINCT_FLOAT128
+# define GREATER_MANT_DIG FLT128_MANT_DIG
+#else
+# define GREATER_MANT_DIG LDBL_MANT_DIG
+#endif
+
+enum { MAX_EXP_LIMB_SIZE =
+    ((LDBL_MAX_EXP + BITS_PER_MP_LIMB - 1) / BITS_PER_MP_LIMB
+     + (GREATER_MANT_DIG / BITS_PER_MP_LIMB > 2 ? 8 : 4)) };
+
+#define MPN_VAR(name) mp_limb_t name[MAX_EXP_LIMB_SIZE]; mp_size_t name##size

 #define MPN_ASSIGN(dst,src)                                                  \
   memcpy (dst, src, (dst##size = src##size) * sizeof (mp_limb_t))
@@ -158,13 +171,6 @@ __printf_fp_buffer_1 (struct __printf_buffer *buf, locale_t loc,
   /* Used to determine grouping rules.  */
   int lc_category = info->extra ? LC_MONETARY : LC_NUMERIC;

-  /* When _Float128 is enabled in the library and ABI-distinct from long
-     double, we need mp_limbs enough for any of them.  */
-#if __HAVE_DISTINCT_FLOAT128
-# define GREATER_MANT_DIG FLT128_MANT_DIG
-#else
-# define GREATER_MANT_DIG LDBL_MANT_DIG
-#endif
   /* We need just a few limbs for the input before shifting to the right
      position. */
   mp_limb_t fp_input[(GREATER_MANT_DIG + BITS_PER_MP_LIMB - 1)
@@ -257,22 +263,6 @@ __printf_fp_buffer_1 (struct __printf_buffer *buf, locale_t loc,
       return;
     }

-
-  /* We need three multiprecision variables.  Now that we have the p.exponent
-     of the number we can allocate the needed memory.  It would be more
-     efficient to use variables of the fixed maximum size but because this
-     would be really big it could lead to memory problems.  */
-  {
-    mp_size_t bignum_size = ((abs (p.exponent) + BITS_PER_MP_LIMB - 1)
-                            / BITS_PER_MP_LIMB
-                            + (GREATER_MANT_DIG / BITS_PER_MP_LIMB > 2
-                               ? 8 : 4))
-                           * sizeof (mp_limb_t);
-    p.frac = (mp_limb_t *) alloca (bignum_size);
-    p.tmp = (mp_limb_t *) alloca (bignum_size);
-    p.scale = (mp_limb_t *) alloca (bignum_size);
-  }
-
   /* We now have to distinguish between numbers with positive and negative
      exponents because the method used for the one is not applicable/efficient
      for the other.  */
--

It will increase the required stack to about 6k (if LDBL_MAX_EXP is 16385)
or 480 (if LDBL_MAX_EXP is 1024), which not that larger compared to the 
scratch_buffer, and it simplifies a lot the final code.  Thoughts?

>  
>    /* We now have to distinguish between numbers with positive and negative
> @@ -744,19 +770,13 @@ __printf_fp_buffer_1 (struct __printf_buffer *buf, locale_t loc,
>  	return;
>        }
>      size_t wbuffer_to_alloc = 2 + chars_needed;
> -    buffer_malloced = ! __libc_use_alloca (wbuffer_to_alloc);
> -    if (__builtin_expect (buffer_malloced, 0))
> +    if (!scratch_buffer_set_array_size (&sbuf_wbuffer, 1, wbuffer_to_alloc))
>        {
> -	wbuffer = malloc (wbuffer_to_alloc);
> -	if (wbuffer == NULL)
> -	  {
> -	    /* Signal an error to the caller.  */
> -	    __printf_buffer_mark_failed (buf);
> -	    return;
> -	  }
> +	/* Signal an error to the caller.  */
> +	__printf_buffer_mark_failed (buf);
> +	goto free_mem_out;
>        }
> -    else
> -      wbuffer = alloca (wbuffer_to_alloc);
> +    wbuffer = sbuf_wbuffer.data;
>      wcp = wstartp = wbuffer + 2;	/* Let room for rounding.  */
>  
>      /* Do the real work: put digits in allocated buffer.  */

I think this should a different change.

> @@ -1025,8 +1045,11 @@ __printf_fp_buffer_1 (struct __printf_buffer *buf, locale_t loc,
>        __printf_buffer_pad (buf, info->pad, width);
>    }
>  
> -  if (buffer_malloced)
> -    free (wbuffer);
> +free_mem_out:
> +  scratch_buffer_free (&sbuf_frac);
> +  scratch_buffer_free (&sbuf_tmp);
> +  scratch_buffer_free (&sbuf_scale);
> +  scratch_buffer_free (&sbuf_wbuffer);
>  }
>  
>  /* ASCII to localization translation.  Multibyte version.  */

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] printf_fp: Get rid of alloca.
  2023-08-31 19:02 ` Adhemerval Zanella Netto
@ 2023-09-01 12:20   ` Florian Weimer
  2023-09-01 12:35     ` Adhemerval Zanella Netto
  0 siblings, 1 reply; 8+ messages in thread
From: Florian Weimer @ 2023-09-01 12:20 UTC (permalink / raw)
  To: Adhemerval Zanella Netto via Libc-alpha
  Cc: Joe Simmons-Talbott, Adhemerval Zanella Netto

* Adhemerval Zanella Netto via Libc-alpha:

> The maximum size of frac/tmp/scale is bound by the largest size of floating
> supported by the ABI.  The p.exponent will at maximum LDBL_MAX_EXP, so the
> required size will depend of the maximum support floating point support
> (2080 for most architectures, otherwise 160).
>
> Using scratch_buffer will already increase the minimum stack usage for ~3k,
> so I wonder if would be better to just define the maximum mp_limb_t size
> direct on hack_digit_param:

The issue is that this increases the stack size for functions like
dprintf that might be used in signal handlers.  While it's conditional
and only for floating point formatting, the old approach restricts the
most excess to certain subcases, especially those involving long double.
That's why I left in the alloca when I refactored __printf_fp to avoid
stdio interfaces for internal use.

Thanks,
Florian


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] printf_fp: Get rid of alloca.
  2023-09-01 12:20   ` Florian Weimer
@ 2023-09-01 12:35     ` Adhemerval Zanella Netto
  2023-09-06 16:55       ` Adhemerval Zanella Netto
  0 siblings, 1 reply; 8+ messages in thread
From: Adhemerval Zanella Netto @ 2023-09-01 12:35 UTC (permalink / raw)
  To: Florian Weimer, Adhemerval Zanella Netto via Libc-alpha
  Cc: Joe Simmons-Talbott



On 01/09/23 09:20, Florian Weimer wrote:
> * Adhemerval Zanella Netto via Libc-alpha:
> 
>> The maximum size of frac/tmp/scale is bound by the largest size of floating
>> supported by the ABI.  The p.exponent will at maximum LDBL_MAX_EXP, so the
>> required size will depend of the maximum support floating point support
>> (2080 for most architectures, otherwise 160).
>>
>> Using scratch_buffer will already increase the minimum stack usage for ~3k,
>> so I wonder if would be better to just define the maximum mp_limb_t size
>> direct on hack_digit_param:
> 
> The issue is that this increases the stack size for functions like
> dprintf that might be used in signal handlers.  While it's conditional
> and only for floating point formatting, the old approach restricts the
> most excess to certain subcases, especially those involving long double.
> That's why I left in the alloca when I refactored __printf_fp to avoid
> stdio interfaces for internal use.

It already calls malloc in __printf_fp_buffer_1, so this is not fully 
AS-signal safe (it is unlikely, but possible).  Another option would to 
decompose __printf_fp_buffer_1 and if long double / float128 is use it 
routes to another function that allocates a large stack (the default for 
double should be ok, about ~680 bytes of extra stack).

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] printf_fp: Get rid of alloca.
  2023-09-01 12:35     ` Adhemerval Zanella Netto
@ 2023-09-06 16:55       ` Adhemerval Zanella Netto
  2023-09-06 20:28         ` Joe Simmons-Talbott
  0 siblings, 1 reply; 8+ messages in thread
From: Adhemerval Zanella Netto @ 2023-09-06 16:55 UTC (permalink / raw)
  To: Florian Weimer, Adhemerval Zanella Netto via Libc-alpha
  Cc: Joe Simmons-Talbott



On 01/09/23 09:35, Adhemerval Zanella Netto wrote:
> 
> 
> On 01/09/23 09:20, Florian Weimer wrote:
>> * Adhemerval Zanella Netto via Libc-alpha:
>>
>>> The maximum size of frac/tmp/scale is bound by the largest size of floating
>>> supported by the ABI.  The p.exponent will at maximum LDBL_MAX_EXP, so the
>>> required size will depend of the maximum support floating point support
>>> (2080 for most architectures, otherwise 160).
>>>
>>> Using scratch_buffer will already increase the minimum stack usage for ~3k,
>>> so I wonder if would be better to just define the maximum mp_limb_t size
>>> direct on hack_digit_param:
>>
>> The issue is that this increases the stack size for functions like
>> dprintf that might be used in signal handlers.  While it's conditional
>> and only for floating point formatting, the old approach restricts the
>> most excess to certain subcases, especially those involving long double.
>> That's why I left in the alloca when I refactored __printf_fp to avoid
>> stdio interfaces for internal use.
> 
> It already calls malloc in __printf_fp_buffer_1, so this is not fully 
> AS-signal safe (it is unlikely, but possible).  Another option would to 
> decompose __printf_fp_buffer_1 and if long double / float128 is use it 
> routes to another function that allocates a large stack (the default for 
> double should be ok, about ~680 bytes of extra stack).

I think we still avoid the pessimization of using one scratch_buffer for
each temporary variable (~3KB of stack requirement) with some code
refactoring.  For double we need at maximum ~160 bytes for each
temporary, so we can allocate all the required buffer on the stack.
The main issue is long double / _Float128, which may require up to
~6Kb of extra memory.  In this case, I think a single scratch_buffer
should be suffice.  

Joe, I have implemented these on a personal branch [1].  It does not
avoid the use o malloc (so printf double is still not fully AS-safe),
but it should use a feasible amount of stack.

[1] https://sourceware.org/git/?p=glibc.git;a=blobdiff;f=stdio-common/printf_fp.c;h=431602caed85ee35c341a3565346406ff0736d45;hp=6f22985ba136da2ccb2bb5b9d397ce386c78ee91;hb=3c0bc3ed09ff7ce13524bab5ddb760b363640a1e;hpb=742b35228f3efa25d41d14f27c8911f308514b28

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] printf_fp: Get rid of alloca.
  2023-09-06 16:55       ` Adhemerval Zanella Netto
@ 2023-09-06 20:28         ` Joe Simmons-Talbott
  2023-09-11 12:00           ` Adhemerval Zanella Netto
  0 siblings, 1 reply; 8+ messages in thread
From: Joe Simmons-Talbott @ 2023-09-06 20:28 UTC (permalink / raw)
  To: Adhemerval Zanella Netto
  Cc: Florian Weimer, Adhemerval Zanella Netto via Libc-alpha

On Wed, Sep 06, 2023 at 01:55:55PM -0300, Adhemerval Zanella Netto wrote:
> 
> 
> On 01/09/23 09:35, Adhemerval Zanella Netto wrote:
> > 
> > 
> > On 01/09/23 09:20, Florian Weimer wrote:
> >> * Adhemerval Zanella Netto via Libc-alpha:
> >>
> >>> The maximum size of frac/tmp/scale is bound by the largest size of floating
> >>> supported by the ABI.  The p.exponent will at maximum LDBL_MAX_EXP, so the
> >>> required size will depend of the maximum support floating point support
> >>> (2080 for most architectures, otherwise 160).
> >>>
> >>> Using scratch_buffer will already increase the minimum stack usage for ~3k,
> >>> so I wonder if would be better to just define the maximum mp_limb_t size
> >>> direct on hack_digit_param:
> >>
> >> The issue is that this increases the stack size for functions like
> >> dprintf that might be used in signal handlers.  While it's conditional
> >> and only for floating point formatting, the old approach restricts the
> >> most excess to certain subcases, especially those involving long double.
> >> That's why I left in the alloca when I refactored __printf_fp to avoid
> >> stdio interfaces for internal use.
> > 
> > It already calls malloc in __printf_fp_buffer_1, so this is not fully 
> > AS-signal safe (it is unlikely, but possible).  Another option would to 
> > decompose __printf_fp_buffer_1 and if long double / float128 is use it 
> > routes to another function that allocates a large stack (the default for 
> > double should be ok, about ~680 bytes of extra stack).
> 
> I think we still avoid the pessimization of using one scratch_buffer for
> each temporary variable (~3KB of stack requirement) with some code
> refactoring.  For double we need at maximum ~160 bytes for each
> temporary, so we can allocate all the required buffer on the stack.
> The main issue is long double / _Float128, which may require up to
> ~6Kb of extra memory.  In this case, I think a single scratch_buffer
> should be suffice.  
> 
> Joe, I have implemented these on a personal branch [1].  It does not
> avoid the use o malloc (so printf double is still not fully AS-safe),
> but it should use a feasible amount of stack.
> 
> [1] https://sourceware.org/git/?p=glibc.git;a=blobdiff;f=stdio-common/printf_fp.c;h=431602caed85ee35c341a3565346406ff0736d45;hp=6f22985ba136da2ccb2bb5b9d397ce386c78ee91;hb=3c0bc3ed09ff7ce13524bab5ddb760b363640a1e;hpb=742b35228f3efa25d41d14f27c8911f308514b28
> 

Your branch looks okay to me and passed 'make check' without any
regressions.  Could you post the patch so that others more knowledgable
than myself can comment and review please?

Thanks,
Joe


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] printf_fp: Get rid of alloca.
  2023-09-06 20:28         ` Joe Simmons-Talbott
@ 2023-09-11 12:00           ` Adhemerval Zanella Netto
  0 siblings, 0 replies; 8+ messages in thread
From: Adhemerval Zanella Netto @ 2023-09-11 12:00 UTC (permalink / raw)
  To: Joe Simmons-Talbott
  Cc: Florian Weimer, Adhemerval Zanella Netto via Libc-alpha



On 06/09/23 17:28, Joe Simmons-Talbott wrote:
> On Wed, Sep 06, 2023 at 01:55:55PM -0300, Adhemerval Zanella Netto wrote:
>>
>>
>> On 01/09/23 09:35, Adhemerval Zanella Netto wrote:
>>>
>>>
>>> On 01/09/23 09:20, Florian Weimer wrote:
>>>> * Adhemerval Zanella Netto via Libc-alpha:
>>>>
>>>>> The maximum size of frac/tmp/scale is bound by the largest size of floating
>>>>> supported by the ABI.  The p.exponent will at maximum LDBL_MAX_EXP, so the
>>>>> required size will depend of the maximum support floating point support
>>>>> (2080 for most architectures, otherwise 160).
>>>>>
>>>>> Using scratch_buffer will already increase the minimum stack usage for ~3k,
>>>>> so I wonder if would be better to just define the maximum mp_limb_t size
>>>>> direct on hack_digit_param:
>>>>
>>>> The issue is that this increases the stack size for functions like
>>>> dprintf that might be used in signal handlers.  While it's conditional
>>>> and only for floating point formatting, the old approach restricts the
>>>> most excess to certain subcases, especially those involving long double.
>>>> That's why I left in the alloca when I refactored __printf_fp to avoid
>>>> stdio interfaces for internal use.
>>>
>>> It already calls malloc in __printf_fp_buffer_1, so this is not fully 
>>> AS-signal safe (it is unlikely, but possible).  Another option would to 
>>> decompose __printf_fp_buffer_1 and if long double / float128 is use it 
>>> routes to another function that allocates a large stack (the default for 
>>> double should be ok, about ~680 bytes of extra stack).
>>
>> I think we still avoid the pessimization of using one scratch_buffer for
>> each temporary variable (~3KB of stack requirement) with some code
>> refactoring.  For double we need at maximum ~160 bytes for each
>> temporary, so we can allocate all the required buffer on the stack.
>> The main issue is long double / _Float128, which may require up to
>> ~6Kb of extra memory.  In this case, I think a single scratch_buffer
>> should be suffice.  
>>
>> Joe, I have implemented these on a personal branch [1].  It does not
>> avoid the use o malloc (so printf double is still not fully AS-safe),
>> but it should use a feasible amount of stack.
>>
>> [1] https://sourceware.org/git/?p=glibc.git;a=blobdiff;f=stdio-common/printf_fp.c;h=431602caed85ee35c341a3565346406ff0736d45;hp=6f22985ba136da2ccb2bb5b9d397ce386c78ee91;hb=3c0bc3ed09ff7ce13524bab5ddb760b363640a1e;hpb=742b35228f3efa25d41d14f27c8911f308514b28
>>
> 
> Your branch looks okay to me and passed 'make check' without any
> regressions.  Could you post the patch so that others more knowledgable
> than myself can comment and review please?

Ok, I will do it.

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2023-09-11 12:00 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-07-05 17:19 [PATCH] printf_fp: Get rid of alloca Joe Simmons-Talbott
2023-08-28 20:36 ` Joe Simmons-Talbott
2023-08-31 19:02 ` Adhemerval Zanella Netto
2023-09-01 12:20   ` Florian Weimer
2023-09-01 12:35     ` Adhemerval Zanella Netto
2023-09-06 16:55       ` Adhemerval Zanella Netto
2023-09-06 20:28         ` Joe Simmons-Talbott
2023-09-11 12:00           ` Adhemerval Zanella Netto

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).