From: Adhemerval Zanella Netto <adhemerval.zanella@linaro.org>
To: Joe Simmons-Talbott <josimmon@redhat.com>, libc-alpha@sourceware.org
Subject: Re: [PATCH] vfscanf-internal: Get rid of unbounded allocas
Date: Mon, 3 Jul 2023 10:40:28 -0300 [thread overview]
Message-ID: <8a8f0468-c7aa-61d4-6e7f-fb782dcc3059@linaro.org> (raw)
In-Reply-To: <148e9429-761f-ec58-3631-8fd26413b828@linaro.org>
On 30/06/23 15:14, Adhemerval Zanella Netto wrote:
>
>
> On 27/06/23 15:05, Joe Simmons-Talbott via Libc-alpha wrote:
>> Replace two unbounded allocas with scratch_buffers to avoid potential
>> stack overflow.
>> ---
>> stdio-common/vfscanf-internal.c | 26 +++++++++++++++++++++++---
>> 1 file changed, 23 insertions(+), 3 deletions(-)
>>
>> diff --git a/stdio-common/vfscanf-internal.c b/stdio-common/vfscanf-internal.c
>> index bfb9baa21a..60376ccff9 100644
>> --- a/stdio-common/vfscanf-internal.c
>> +++ b/stdio-common/vfscanf-internal.c
>> @@ -335,6 +335,12 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr,
>> struct char_buffer charbuf;
>> scratch_buffer_init (&charbuf.scratch);
>>
>> + struct scratch_buffer wc_ext_sbuf;
>> + scratch_buffer_init (&wc_ext_sbuf);
>> +
>> + struct scratch_buffer mb_ext_sbuf;
>> + scratch_buffer_init (&mb_ext_sbuf);
>> +
>> #ifdef __va_copy
>> __va_copy (arg, argptr);
>> #else
>> @@ -1488,8 +1494,14 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr,
>> wcdigits[n] = (const wchar_t *)
>> _NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_WC + n);
>>
>> - wchar_t *wc_extended = (wchar_t *)
>> - alloca ((to_level + 2) * sizeof (wchar_t));
>> + wchar_t *wc_extended;
>> + if (!scratch_buffer_set_array_size
>> + (&wc_ext_sbuf, sizeof (wchar_t), to_level + 2))
>> + {
>> + done = EOF;
>> + goto errout;
>> + }
>> + wc_extended = wc_ext_sbuf.data;
>> __wmemcpy (wc_extended, wcdigits[n], to_level);
>> wc_extended[to_level] = __towctrans (L'0' + n, map);
>> wc_extended[to_level + 1] = '\0';
>> @@ -1525,7 +1537,13 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr,
>>
>> /* Allocate memory for extended multibyte digit. */
>> char *mb_extended;
>> - mb_extended = (char *) alloca (mbdigits_len + mblen + 1);
>> + if (!scratch_buffer_set_array_size
>> + (&mb_ext_sbuf, 1, mbdigits_len + mblen + 1))
>> + {
>> + done = EOF;
>> + goto errout;
>> + }
>> + mb_extended = mb_ext_sbuf.data;
>
> Unfortunately this change is wrong, it will overwrite the wcdigits[n]/mbdigits[n]
> on each iteration since you set the value of the stratch_buffer without adjusting
> the offset for each entry (for the wide operation).
>
> This code path will just be triggered by a handful of locale, but you can check
> with the example:
>
> ---
> #include <assert.h>
> #include <stdio.h>
> #include <locale.h>
>
> #define array_length(var) \
> (sizeof (var) / sizeof ((var)[0]) \
> + 0 * sizeof (struct { \
> _Static_assert (!__builtin_types_compatible_p \
> (__typeof (var), __typeof (&(var)[0])), \
> "argument must be an array"); \
> }))
>
> static const struct
> {
> int n;
> const char *str;
> } inputs[] =
> {
> { 1, "\xdb\xb1" },
> { 2, "\xdb\xb2" },
> { 3, "\xdb\xb3" },
> { 4, "\xdb\xb4" },
> { 5, "\xdb\xb5" },
> { 6, "\xdb\xb6" },
> { 7, "\xdb\xb7" },
> { 8, "\xdb\xb8" },
> { 9, "\xdb\xb9" },
> { 10, "\xdb\xb1\xdb\xb0" },
> { 11, "\xdb\xb1\xdb\xb1" },
> { 12, "\xdb\xb1\xdb\xb2" },
> { 13, "\xdb\xb1\xdb\xb3" },
> { 14, "\xdb\xb1\xdb\xb4" },
> { 15, "\xdb\xb1\xdb\xb5" },
> { 16, "\xdb\xb1\xdb\xb6" },
> { 17, "\xdb\xb1\xdb\xb7" },
> { 18, "\xdb\xb1\xdb\xb8" },
> { 19, "\xdb\xb1\xdb\xb9" },
> { 20, "\xdb\xb2\xdb\xb0" },
> { 30, "\xdb\xb3\xdb\xb0" },
> { 40, "\xdb\xb4\xdb\xb0" },
> { 50, "\xdb\xb5\xdb\xb0" },
> { 60, "\xdb\xb6\xdb\xb0" },
> { 70, "\xdb\xb7\xdb\xb0" },
> { 80, "\xdb\xb8\xdb\xb0" },
> { 90, "\xdb\xb9\xdb\xb0" },
> { 100, "\xdb\xb1\xdb\xb0\xdb\xb0" },
> { 1000, "\xdb\xb1\xdb\xb0\xdb\xb0\xdb\xb0" },
> };
>
> int main (int argc, char *argv[])
> {
> assert (setlocale (LC_ALL, "fa_IR.UTF-8") != NULL);
>
> for (int i = 0; i < array_length (inputs); i++)
> {
> int n;
> sscanf (inputs[i].str, "%Id", &n);
> assert (n == inputs[i].n);
> }
>
> return 0;
> }
> ---
>
> I generate the inputs by round-trip with printf using the same locale. A similar test
> for wide char would be:
>
> ---
> #include <assert.h>
> #include <stdio.h>
> #include <locale.h>
> #include <wchar.h>
>
> #define array_length(var) \
> (sizeof (var) / sizeof ((var)[0]) \
> + 0 * sizeof (struct { \
> _Static_assert (!__builtin_types_compatible_p \
> (__typeof (var), __typeof (&(var)[0])), \
> "argument must be an array"); \
> }))
>
> static const struct input_t
> {
> int n;
> const wchar_t str[5];
> } inputs[] =
> {
> { 1, { 0x000006f1, L'\0' } },
> { 2, { 0x000006f2, L'\0' } },
> { 3, { 0x000006f3, L'\0' } },
> { 4, { 0x000006f4, L'\0' } },
> { 5, { 0x000006f5, L'\0' } },
> { 6, { 0x000006f6, L'\0' } },
> { 7, { 0x000006f7, L'\0' } },
> { 8, { 0x000006f8, L'\0' } },
> { 9, { 0x000006f9, L'\0' } },
> { 10, { 0x000006f1, 0x000006f0, L'\0' } },
> { 11, { 0x000006f1, 0x000006f1, L'\0' } },
> { 12, { 0x000006f1, 0x000006f2, L'\0' } },
> { 13, { 0x000006f1, 0x000006f3, L'\0' } },
> { 14, { 0x000006f1, 0x000006f4, L'\0' } },
> { 15, { 0x000006f1, 0x000006f5, L'\0' } },
> { 16, { 0x000006f1, 0x000006f6, L'\0' } },
> { 17, { 0x000006f1, 0x000006f7, L'\0' } },
> { 18, { 0x000006f1, 0x000006f8, L'\0' } },
> { 19, { 0x000006f1, 0x000006f9, L'\0' } },
> { 20, { 0x000006f2, 0x000006f0, L'\0' } },
> { 30, { 0x000006f3, 0x000006f0, L'\0' } },
> { 40, { 0x000006f4, 0x000006f0, L'\0' } },
> { 50, { 0x000006f5, 0x000006f0, L'\0' } },
> { 60, { 0x000006f6, 0x000006f0, L'\0' } },
> { 70, { 0x000006f7, 0x000006f0, L'\0' } },
> { 80, { 0x000006f8, 0x000006f0, L'\0' } },
> { 90, { 0x000006f9, 0x000006f0, L'\0' } },
> { 100, { 0x000006f1, 0x000006f0, 0x000006f0, L'\0' } },
> { 1000, { 0x000006f1, 0x000006f0, 0x000006f0, 0x000006f0, L'\0' } },
> };
>
> int main (int argc, char *argv[])
> {
> assert (setlocale (LC_ALL, "fa_IR.UTF-8") != NULL);
>
> for (int i = 0; i < array_length (inputs); i++)
> {
> int n = 0;
> swscanf (inputs[i].str, L"%Id", &n);
> wprintf (L"n=%d input[%d].n=%d (%ls)\n",
> n,
> i,
> inputs[i].n,
> inputs[i].str);
> assert (n == inputs[i].n);
> }
>
> return 0;
> };
> ---
>
> Also, you do not need two scratch buffer, since either one will be used for
> !COMPILE_WSCANF and COMPILE_WSCANF; and to avoid the extra stack requirement
> (since there are not used for the most locales) only define them if I18N
> is actually used. The alternate digit handling should be something like:
Scratch that, I just realized that once we reallocate the scratch_buffer the
digits_extended pointers will be invalidated anyway. I think a better strategy
would be just use malloc here, it will slow down the conversion a bit but since
the size will small it will be fulfilled by the thread cache anyway.
I will send a fix with some testcases.
prev parent reply other threads:[~2023-07-03 13:40 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-06-27 18:05 Joe Simmons-Talbott
2023-06-30 18:14 ` Adhemerval Zanella Netto
2023-07-03 13:40 ` Adhemerval Zanella Netto [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=8a8f0468-c7aa-61d4-6e7f-fb782dcc3059@linaro.org \
--to=adhemerval.zanella@linaro.org \
--cc=josimmon@redhat.com \
--cc=libc-alpha@sourceware.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).