public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
From: Adhemerval Zanella Netto <adhemerval.zanella@linaro.org>
To: Joe Simmons-Talbott <josimmon@redhat.com>, libc-alpha@sourceware.org
Subject: Re: [PATCH] vfscanf-internal: Get rid of unbounded allocas
Date: Mon, 3 Jul 2023 10:40:28 -0300	[thread overview]
Message-ID: <8a8f0468-c7aa-61d4-6e7f-fb782dcc3059@linaro.org> (raw)
In-Reply-To: <148e9429-761f-ec58-3631-8fd26413b828@linaro.org>



On 30/06/23 15:14, Adhemerval Zanella Netto wrote:
> 
> 
> On 27/06/23 15:05, Joe Simmons-Talbott via Libc-alpha wrote:
>> Replace two unbounded allocas with scratch_buffers to avoid potential
>> stack overflow.
>> ---
>>  stdio-common/vfscanf-internal.c | 26 +++++++++++++++++++++++---
>>  1 file changed, 23 insertions(+), 3 deletions(-)
>>
>> diff --git a/stdio-common/vfscanf-internal.c b/stdio-common/vfscanf-internal.c
>> index bfb9baa21a..60376ccff9 100644
>> --- a/stdio-common/vfscanf-internal.c
>> +++ b/stdio-common/vfscanf-internal.c
>> @@ -335,6 +335,12 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr,
>>    struct char_buffer charbuf;
>>    scratch_buffer_init (&charbuf.scratch);
>>  
>> +  struct scratch_buffer wc_ext_sbuf;
>> +  scratch_buffer_init (&wc_ext_sbuf);
>> +
>> +  struct scratch_buffer mb_ext_sbuf;
>> +  scratch_buffer_init (&mb_ext_sbuf);
>> +
>>  #ifdef __va_copy
>>    __va_copy (arg, argptr);
>>  #else
>> @@ -1488,8 +1494,14 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr,
>>  		      wcdigits[n] = (const wchar_t *)
>>  			_NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_WC + n);
>>  
>> -		      wchar_t *wc_extended = (wchar_t *)
>> -			alloca ((to_level + 2) * sizeof (wchar_t));
>> +		      wchar_t *wc_extended;
>> +		      if (!scratch_buffer_set_array_size
>> +		          (&wc_ext_sbuf, sizeof (wchar_t), to_level + 2))
>> +		        {
>> +		          done = EOF;
>> +		          goto errout;
>> +		        }
>> +		      wc_extended = wc_ext_sbuf.data;
>>  		      __wmemcpy (wc_extended, wcdigits[n], to_level);
>>  		      wc_extended[to_level] = __towctrans (L'0' + n, map);
>>  		      wc_extended[to_level + 1] = '\0';
>> @@ -1525,7 +1537,13 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr,
>>  
>>  		      /*  Allocate memory for extended multibyte digit.  */
>>  		      char *mb_extended;
>> -		      mb_extended = (char *) alloca (mbdigits_len + mblen + 1);
>> +		      if (!scratch_buffer_set_array_size
>> +		          (&mb_ext_sbuf, 1, mbdigits_len + mblen + 1))
>> +		        {
>> +		          done = EOF;
>> +		          goto errout;
>> +		        }
>> +		      mb_extended = mb_ext_sbuf.data;
> 
> Unfortunately this change is wrong, it will overwrite the wcdigits[n]/mbdigits[n]
> on each iteration since you set the value of the stratch_buffer without adjusting
> the offset for each entry (for the wide operation).
> 
> This code path will just be triggered by a handful of locale, but you can check
> with the example:
> 
> ---
> #include <assert.h>
> #include <stdio.h>
> #include <locale.h>
> 
> #define array_length(var)                                               \
>   (sizeof (var) / sizeof ((var)[0])                                     \
>    + 0 * sizeof (struct {                                               \
>        _Static_assert (!__builtin_types_compatible_p                    \
>                        (__typeof (var), __typeof (&(var)[0])),          \
>                        "argument must be an array");                    \
>    }))
> 
> static const struct
> {
>   int n;
>   const char *str;
> } inputs[] =
> {
>   { 1, "\xdb\xb1" },
>   { 2, "\xdb\xb2" },
>   { 3, "\xdb\xb3" },
>   { 4, "\xdb\xb4" },
>   { 5, "\xdb\xb5" },
>   { 6, "\xdb\xb6" },
>   { 7, "\xdb\xb7" },
>   { 8, "\xdb\xb8" },
>   { 9, "\xdb\xb9" },
>   { 10, "\xdb\xb1\xdb\xb0" },
>   { 11, "\xdb\xb1\xdb\xb1" },
>   { 12, "\xdb\xb1\xdb\xb2" },
>   { 13, "\xdb\xb1\xdb\xb3" },
>   { 14, "\xdb\xb1\xdb\xb4" },
>   { 15, "\xdb\xb1\xdb\xb5" },
>   { 16, "\xdb\xb1\xdb\xb6" },
>   { 17, "\xdb\xb1\xdb\xb7" },
>   { 18, "\xdb\xb1\xdb\xb8" },
>   { 19, "\xdb\xb1\xdb\xb9" },
>   { 20, "\xdb\xb2\xdb\xb0" },
>   { 30, "\xdb\xb3\xdb\xb0" },
>   { 40, "\xdb\xb4\xdb\xb0" },
>   { 50, "\xdb\xb5\xdb\xb0" },
>   { 60, "\xdb\xb6\xdb\xb0" },
>   { 70, "\xdb\xb7\xdb\xb0" },
>   { 80, "\xdb\xb8\xdb\xb0" },
>   { 90, "\xdb\xb9\xdb\xb0" },
>   { 100, "\xdb\xb1\xdb\xb0\xdb\xb0" },
>   { 1000, "\xdb\xb1\xdb\xb0\xdb\xb0\xdb\xb0" },
> };
> 
> int main (int argc, char *argv[])
> {
>   assert (setlocale (LC_ALL, "fa_IR.UTF-8") != NULL);
> 
>   for (int i = 0; i < array_length (inputs); i++)
>     {
>       int n;
>       sscanf (inputs[i].str, "%Id", &n);
>       assert (n == inputs[i].n);
>     }
> 
>   return 0;
> }
> ---
> 
> I generate the inputs by round-trip with printf using the same locale.  A similar test
> for wide char would be:
> 
> ---
> #include <assert.h>
> #include <stdio.h>
> #include <locale.h>
> #include <wchar.h>
> 
> #define array_length(var)                                               \
>   (sizeof (var) / sizeof ((var)[0])                                     \
>    + 0 * sizeof (struct {                                               \
>        _Static_assert (!__builtin_types_compatible_p                    \
>                        (__typeof (var), __typeof (&(var)[0])),          \
>                        "argument must be an array");                    \
>    }))
> 
> static const struct input_t
> {
>   int n;
>   const wchar_t str[5];
> } inputs[] =
> {
>   {    1, { 0x000006f1, L'\0' } },
>   {    2, { 0x000006f2, L'\0' } },
>   {    3, { 0x000006f3, L'\0' } },
>   {    4, { 0x000006f4, L'\0' } },
>   {    5, { 0x000006f5, L'\0' } },
>   {    6, { 0x000006f6, L'\0' } },
>   {    7, { 0x000006f7, L'\0' } },
>   {    8, { 0x000006f8, L'\0' } },
>   {    9, { 0x000006f9, L'\0' } },
>   {   10, { 0x000006f1, 0x000006f0, L'\0' } },
>   {   11, { 0x000006f1, 0x000006f1, L'\0' } },
>   {   12, { 0x000006f1, 0x000006f2, L'\0' } },
>   {   13, { 0x000006f1, 0x000006f3, L'\0' } },
>   {   14, { 0x000006f1, 0x000006f4, L'\0' } },
>   {   15, { 0x000006f1, 0x000006f5, L'\0' } },
>   {   16, { 0x000006f1, 0x000006f6, L'\0' } },
>   {   17, { 0x000006f1, 0x000006f7, L'\0' } },
>   {   18, { 0x000006f1, 0x000006f8, L'\0' } },
>   {   19, { 0x000006f1, 0x000006f9, L'\0' } },
>   {   20, { 0x000006f2, 0x000006f0, L'\0' } },
>   {   30, { 0x000006f3, 0x000006f0, L'\0' } },
>   {   40, { 0x000006f4, 0x000006f0, L'\0' } },
>   {   50, { 0x000006f5, 0x000006f0, L'\0' } },
>   {   60, { 0x000006f6, 0x000006f0, L'\0' } },
>   {   70, { 0x000006f7, 0x000006f0, L'\0' } },
>   {   80, { 0x000006f8, 0x000006f0, L'\0' } },
>   {   90, { 0x000006f9, 0x000006f0, L'\0' } },
>   {  100, { 0x000006f1, 0x000006f0, 0x000006f0, L'\0' } },
>   { 1000, { 0x000006f1, 0x000006f0, 0x000006f0, 0x000006f0, L'\0' } },
> };
> 
> int main (int argc, char *argv[])
> {
>   assert (setlocale (LC_ALL, "fa_IR.UTF-8") != NULL);
> 
>   for (int i = 0; i < array_length (inputs); i++)
>     {
>       int n = 0;
>       swscanf (inputs[i].str, L"%Id", &n);
>       wprintf (L"n=%d input[%d].n=%d (%ls)\n",
>                n,
>                i,
>                inputs[i].n,
>                inputs[i].str);
>       assert (n == inputs[i].n);
>     }
> 
>   return 0;  
> };
> ---
> 
> Also, you do not need two scratch buffer, since either one will be used for
> !COMPILE_WSCANF and COMPILE_WSCANF; and to avoid the extra stack requirement
> (since there are not used for the most locales) only define them if I18N
> is actually used.  The alternate digit handling should be something like:

Scratch that, I just realized that once we reallocate the scratch_buffer the
digits_extended pointers will be invalidated anyway.  I think a better strategy
would be just use malloc here, it will slow down the conversion a bit but since
the size will small it will be fulfilled by the thread cache anyway.

I will send a fix with some testcases.

      reply	other threads:[~2023-07-03 13:40 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-06-27 18:05 Joe Simmons-Talbott
2023-06-30 18:14 ` Adhemerval Zanella Netto
2023-07-03 13:40   ` Adhemerval Zanella Netto [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=8a8f0468-c7aa-61d4-6e7f-fb782dcc3059@linaro.org \
    --to=adhemerval.zanella@linaro.org \
    --cc=josimmon@redhat.com \
    --cc=libc-alpha@sourceware.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).