From: Siddhesh Poyarekar <siddhesh@gotplt.org>
To: libc-alpha@sourceware.org, Carlos O'Donell <carlos@redhat.com>
Subject: [PING3][PATCH v2] setlocale: Fail if iconv module for charset is not present [BZ #27996]
Date: Mon, 18 Oct 2021 18:46:05 +0530 [thread overview]
Message-ID: <0e178208-6c8c-1896-918b-9185d4730b0c@gotplt.org> (raw)
In-Reply-To: <f2984ee2-06af-2f70-47c6-e2c7e88affdd@gotplt.org>
Ping!
On 8/17/21 08:28, Siddhesh Poyarekar wrote:
> Ping!
>
> On 8/11/21 1:12 PM, Siddhesh Poyarekar via Libc-alpha wrote:
>> Ping!
>>
>> On 7/20/21 8:06 AM, Siddhesh Poyarekar via Libc-alpha wrote:
>>> setlocale currently succeeds even if the requested locale uses a
>>> charset that does not have a converter module installed. Check for
>>> existence of the charset (either the one requested through the input
>>> name or the one needed by the selected locale file) and fail if it
>>> doesn't.
>>>
>>> The new test tst-invalid-charset verifes that loading test5 and test6
>>> locales fail because both locales have charsets without a converter,
>>> viz. test5 and test6 respectively. Also, test6.c has been removed as
>>> it was unused.
>>> ---
>>> Changes from v1:
>>> - Find full transformation paths both ways instead of merely looking for
>>> a FROM converter.
>>>
>>> locale/findlocale.c | 77 ++++++++++++-----
>>> localedata/Makefile | 12 ++-
>>> localedata/tests/test6.c | 137 -------------------------------
>>> localedata/tst-invalid-charset.c | 31 +++++++
>>> 4 files changed, 95 insertions(+), 162 deletions(-)
>>> delete mode 100644 localedata/tests/test6.c
>>> create mode 100644 localedata/tst-invalid-charset.c
>>>
>>> diff --git a/locale/findlocale.c b/locale/findlocale.c
>>> index ab09122b0c..7ccc98cd8b 100644
>>> --- a/locale/findlocale.c
>>> +++ b/locale/findlocale.c
>>> @@ -98,6 +98,30 @@ valid_locale_name (const char *name)
>>> return 1;
>>> }
>>> +/* Return true if we have gconv modules to transform between the
>>> INTERNAL
>>> + encoding and CODESET. */
>>> +static bool
>>> +codeset_has_module (const char *codeset)
>>> +{
>>> + struct __gconv_step *steps;
>>> + size_t nsteps;
>>> +
>>> + char *ccodeset = (char *) alloca (strlen (codeset) + 3);
>>> + strip (ccodeset, codeset);
>>> +
>>> + if (__gconv_find_transform ("INTERNAL", ccodeset, &steps, &nsteps, 0)
>>> + != __GCONV_OK)
>>> + return false;
>>> + __gconv_close_transform (steps, nsteps);
>>> +
>>> + if (__gconv_find_transform (ccodeset, "INTERNAL", &steps, &nsteps, 0)
>>> + != __GCONV_OK)
>>> + return false;
>>> + __gconv_close_transform (steps, nsteps);
>>> +
>>> + return true;
>>> +}
>>> +
>>> struct __locale_data *
>>> _nl_find_locale (const char *locale_path, size_t locale_path_len,
>>> int category, const char **name)
>>> @@ -200,6 +224,10 @@ _nl_find_locale (const char *locale_path, size_t
>>> locale_path_len,
>>> /* Memory allocate problem. */
>>> return NULL;
>>> + /* The requested codeset does not have a converter, don't use it. */
>>> + if (codeset != NULL && !codeset_has_module (codeset))
>>> + return NULL;
>>> +
>>> /* If exactly this locale was already asked for we have an entry
>>> with
>>> the complete name. */
>>> locale_file = _nl_make_l10nflist (&_nl_locale_file_list[category],
>>> @@ -248,6 +276,33 @@ _nl_find_locale (const char *locale_path, size_t
>>> locale_path_len,
>>> return NULL;
>>> }
>>> + /* Get the codeset information from the locale file. */
>>> + static const int codeset_idx[] =
>>> + {
>>> + [__LC_CTYPE] = _NL_ITEM_INDEX (CODESET),
>>> + [__LC_NUMERIC] = _NL_ITEM_INDEX (_NL_NUMERIC_CODESET),
>>> + [__LC_TIME] = _NL_ITEM_INDEX (_NL_TIME_CODESET),
>>> + [__LC_COLLATE] = _NL_ITEM_INDEX (_NL_COLLATE_CODESET),
>>> + [__LC_MONETARY] = _NL_ITEM_INDEX (_NL_MONETARY_CODESET),
>>> + [__LC_MESSAGES] = _NL_ITEM_INDEX (_NL_MESSAGES_CODESET),
>>> + [__LC_PAPER] = _NL_ITEM_INDEX (_NL_PAPER_CODESET),
>>> + [__LC_NAME] = _NL_ITEM_INDEX (_NL_NAME_CODESET),
>>> + [__LC_ADDRESS] = _NL_ITEM_INDEX (_NL_ADDRESS_CODESET),
>>> + [__LC_TELEPHONE] = _NL_ITEM_INDEX (_NL_TELEPHONE_CODESET),
>>> + [__LC_MEASUREMENT] = _NL_ITEM_INDEX (_NL_MEASUREMENT_CODESET),
>>> + [__LC_IDENTIFICATION] = _NL_ITEM_INDEX
>>> (_NL_IDENTIFICATION_CODESET)
>>> + };
>>> + const struct __locale_data *data;
>>> + const char *locale_codeset;
>>> +
>>> + data = (const struct __locale_data *) locale_file->data;
>>> + locale_codeset = (const char *)
>>> data->values[codeset_idx[category]].string;
>>> + assert (locale_codeset != NULL);
>>> +
>>> + /* The locale codeset does not have a converter, don't use it. */
>>> + if (locale_codeset[0] != '\0' && !codeset_has_module
>>> (locale_codeset))
>>> + return NULL;
>>> +
>>> /* The LC_CTYPE category allows to check whether a locale is really
>>> usable. If the locale name contains a charset name and the
>>> charset name used in the locale (present in the LC_CTYPE data) is
>>> @@ -256,31 +311,9 @@ _nl_find_locale (const char *locale_path, size_t
>>> locale_path_len,
>>> in the locale name. */
>>> if (codeset != NULL)
>>> {
>>> - /* Get the codeset information from the locale file. */
>>> - static const int codeset_idx[] =
>>> - {
>>> - [__LC_CTYPE] = _NL_ITEM_INDEX (CODESET),
>>> - [__LC_NUMERIC] = _NL_ITEM_INDEX (_NL_NUMERIC_CODESET),
>>> - [__LC_TIME] = _NL_ITEM_INDEX (_NL_TIME_CODESET),
>>> - [__LC_COLLATE] = _NL_ITEM_INDEX (_NL_COLLATE_CODESET),
>>> - [__LC_MONETARY] = _NL_ITEM_INDEX (_NL_MONETARY_CODESET),
>>> - [__LC_MESSAGES] = _NL_ITEM_INDEX (_NL_MESSAGES_CODESET),
>>> - [__LC_PAPER] = _NL_ITEM_INDEX (_NL_PAPER_CODESET),
>>> - [__LC_NAME] = _NL_ITEM_INDEX (_NL_NAME_CODESET),
>>> - [__LC_ADDRESS] = _NL_ITEM_INDEX (_NL_ADDRESS_CODESET),
>>> - [__LC_TELEPHONE] = _NL_ITEM_INDEX (_NL_TELEPHONE_CODESET),
>>> - [__LC_MEASUREMENT] = _NL_ITEM_INDEX (_NL_MEASUREMENT_CODESET),
>>> - [__LC_IDENTIFICATION] = _NL_ITEM_INDEX
>>> (_NL_IDENTIFICATION_CODESET)
>>> - };
>>> - const struct __locale_data *data;
>>> - const char *locale_codeset;
>>> char *clocale_codeset;
>>> char *ccodeset;
>>> - data = (const struct __locale_data *) locale_file->data;
>>> - locale_codeset =
>>> - (const char *) data->values[codeset_idx[category]].string;
>>> - assert (locale_codeset != NULL);
>>> /* Note the length of the allocated memory: +3 for up to two
>>> slashes
>>> and the NUL byte. */
>>> clocale_codeset = (char *) alloca (strlen (locale_codeset) + 3);
>>> diff --git a/localedata/Makefile b/localedata/Makefile
>>> index 14e04cd3c5..2af399ec51 100644
>>> --- a/localedata/Makefile
>>> +++ b/localedata/Makefile
>>> @@ -124,11 +124,13 @@ test-input := \
>>> test-input-data = $(addsuffix .in, $(test-input))
>>> test-output := $(foreach s, .out .xout, \
>>> $(addsuffix $s, $(basename $(test-input))))
>>> +# Note that tst-invalid-charset depends on test5 and test6 being
>>> locales that
>>> +# do not have valid charset converters.
>>> ld-test-names := test1 test2 test3 test4 test5 test6 test7
>>> ld-test-srcs := $(addprefix tests/,$(addsuffix .cm,$(ld-test-names)) \
>>> $(addsuffix .def,$(ld-test-names)) \
>>> $(addsuffix .ds,test5 test6) \
>>> - test6.c trans.def)
>>> + trans.def)
>>> fmon-tests = n01y12 n02n40 n10y31 n11y41 n12y11 n20n32 n30y20 n41n00 \
>>> y01y10 y02n22 y22n42 y30y21 y32n31 y40y00 y42n21
>>> @@ -158,7 +160,7 @@ tests = $(locale_test_suite) tst-digits
>>> tst-setlocale bug-iconv-trans \
>>> tst-leaks tst-mbswcs1 tst-mbswcs2 tst-mbswcs3 tst-mbswcs4
>>> tst-mbswcs5 \
>>> tst-mbswcs6 tst-xlocale1 tst-xlocale2 bug-usesetlocale \
>>> tst-strfmon1 tst-sscanf bug-setlocale1 tst-setlocale2
>>> tst-setlocale3 \
>>> - tst-wctype tst-iconv-math-trans
>>> + tst-wctype tst-iconv-math-trans tst-invalid-charset
>>> tests-static = bug-setlocale1-static
>>> tests += $(tests-static)
>>> ifeq (yes,$(build-shared))
>>> @@ -401,7 +403,10 @@ $(objpfx)tst-langinfo-setlocale-static.out:
>>> tst-langinfo.sh \
>>> '$(run-program-env)' '$(test-program-cmd-after-env)' > $@; \
>>> $(evaluate-test)
>>> +# These tests depend on tst-locale because they use the locales
>>> compiled by
>>> +# that test.
>>> $(objpfx)tst-digits.out: $(objpfx)tst-locale.out
>>> +$(objpfx)tst-invalid-charset.out: $(objpfx)tst-locale.out
>>> $(objpfx)tst-mbswcs6.out: $(addprefix $(objpfx),$(CTYPE_FILES))
>>> endif
>>> @@ -461,7 +466,8 @@ $(objpfx)mtrace-tst-leaks.out:
>>> $(objpfx)tst-leaks.out
>>> $(common-objpfx)malloc/mtrace $(objpfx)tst-leaks.mtrace > $@; \
>>> $(evaluate-test)
>>> -bug-setlocale1-ENV-only = LOCPATH=$(objpfx) LC_CTYPE=de_DE.UTF-8
>>> +bug-setlocale1-ENV-only = GCONV_PATH=$(common-objpfx)iconvdata \
>>> + LOCPATH=$(objpfx) LC_CTYPE=de_DE.UTF-8
>>> bug-setlocale1-static-ENV-only = $(bug-setlocale1-ENV-only)
>>> $(objdir)/iconvdata/gconv-modules:
>>> diff --git a/localedata/tests/test6.c b/localedata/tests/test6.c
>>> deleted file mode 100644
>>> index edb5fe4a5f..0000000000
>>> --- a/localedata/tests/test6.c
>>> +++ /dev/null
>>> @@ -1,137 +0,0 @@
>>> -/* Test program for character classes and mappings.
>>> - Copyright (C) 1999-2021 Free Software Foundation, Inc.
>>> - This file is part of the GNU C Library.
>>> - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1999.
>>> -
>>> - The GNU C Library is free software; you can redistribute it and/or
>>> - modify it under the terms of the GNU Lesser General Public
>>> - License as published by the Free Software Foundation; either
>>> - version 2.1 of the License, or (at your option) any later version.
>>> -
>>> - The GNU C Library is distributed in the hope that it will be useful,
>>> - but WITHOUT ANY WARRANTY; without even the implied warranty of
>>> - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>>> - Lesser General Public License for more details.
>>> -
>>> - You should have received a copy of the GNU Lesser General Public
>>> - License along with the GNU C Library; if not, see
>>> - <https://www.gnu.org/licenses/>. */
>>> -
>>> -#include <ctype.h>
>>> -#include <locale.h>
>>> -#include <wchar.h>
>>> -
>>> -
>>> -int
>>> -main (void)
>>> -{
>>> - const char lower[] = "abcdefghijklmnopqrstuvwxyz";
>>> - const char upper[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
>>> -#define LEN (sizeof (upper) - 1)
>>> - const wchar_t wlower[] = L"abcdefghijklmnopqrstuvwxyz";
>>> - const wchar_t wupper[] = L"ABCDEFGHIJKLMNOPQRSTUVWXYZ";
>>> - int i;
>>> - int result = 0;
>>> -
>>> - setlocale (LC_ALL, "test6");
>>> -
>>> - for (i = 0; i < LEN; ++i)
>>> - {
>>> - /* Test basic table handling (basic == not more than 256
>>> characters).
>>> - The charmaps swaps the normal lower-upper case meaning of the
>>> - ASCII characters used in the source code while the Unicode mapping
>>> - in the repertoire map has the normal correspondents. This test
>>> - shows the independence of the tables for `char' and `wchar_t'
>>> - characters. */
>>> -
>>> - if (islower (lower[i]))
>>> - {
>>> - printf ("islower ('%c') false\n", lower[i]);
>>> - result = 1;
>>> - }
>>> - if (! isupper (lower[i]))
>>> - {
>>> - printf ("isupper ('%c') false\n", lower[i]);
>>> - result = 1;
>>> - }
>>> -
>>> - if (! islower (upper[i]))
>>> - {
>>> - printf ("islower ('%c') false\n", upper[i]);
>>> - result = 1;
>>> - }
>>> - if (isupper (upper[i]))
>>> - {
>>> - printf ("isupper ('%c') false\n", upper[i]);
>>> - result = 1;
>>> - }
>>> -
>>> - if (toupper (lower[i]) != lower[i])
>>> - {
>>> - printf ("toupper ('%c') false\n", lower[i]);
>>> - result = 1;
>>> - }
>>> - if (tolower (lower[i]) != upper[i])
>>> - {
>>> - printf ("tolower ('%c') false\n", lower[i]);
>>> - result = 1;
>>> - }
>>> -
>>> - if (tolower (upper[i]) != upper[i])
>>> - {
>>> - printf ("tolower ('%c') false\n", upper[i]);
>>> - result = 1;
>>> - }
>>> - if (toupper (upper[i]) != lower[i])
>>> - {
>>> - printf ("toupper ('%c') false\n", upper[i]);
>>> - result = 1;
>>> - }
>>> -
>>> - if (iswlower (wupper[i]))
>>> - {
>>> - printf ("iswlower (L'%c') false\n", upper[i]);
>>> - result = 1;
>>> - }
>>> - if (! iswupper (wupper[i]))
>>> - {
>>> - printf ("iswupper (L'%c') false\n", upper[i]);
>>> - result = 1;
>>> - }
>>> -
>>> - if (iswupper (wlower[i]))
>>> - {
>>> - printf ("iswupper (L'%c') false\n", lower[i]);
>>> - result = 1;
>>> - }
>>> - if (! iswlower (wlower[i]))
>>> - {
>>> - printf ("iswlower (L'%c') false\n", lower[i]);
>>> - result = 1;
>>> - }
>>> -
>>> - if (towupper (wlower[i]) != wupper[i])
>>> - {
>>> - printf ("towupper ('%c') false\n", lower[i]);
>>> - result = 1;
>>> - }
>>> - if (towlower (wlower[i]) != wlower[i])
>>> - {
>>> - printf ("towlower ('%c') false\n", lower[i]);
>>> - result = 1;
>>> - }
>>> -
>>> - if (towlower (wupper[i]) != wlower[i])
>>> - {
>>> - printf ("towlower ('%c') false\n", upper[i]);
>>> - result = 1;
>>> - }
>>> - if (towupper (wupper[i]) != wupper[i])
>>> - {
>>> - printf ("towupper ('%c') false\n", upper[i]);
>>> - result = 1;
>>> - }
>>> - }
>>> -
>>> - return result;
>>> -}
>>> diff --git a/localedata/tst-invalid-charset.c
>>> b/localedata/tst-invalid-charset.c
>>> new file mode 100644
>>> index 0000000000..46a5198c66
>>> --- /dev/null
>>> +++ b/localedata/tst-invalid-charset.c
>>> @@ -0,0 +1,31 @@
>>> +/* Test program to verify that setlocale fails for charsets that do
>>> not have a
>>> + converter.
>>> + Copyright (C) 2021 Free Software Foundation, Inc.
>>> + This file is part of the GNU C Library.
>>> +
>>> + The GNU C Library is free software; you can redistribute it and/or
>>> + modify it under the terms of the GNU Lesser General Public
>>> + License as published by the Free Software Foundation; either
>>> + version 2.1 of the License, or (at your option) any later version.
>>> +
>>> + The GNU C Library is distributed in the hope that it will be useful,
>>> + but WITHOUT ANY WARRANTY; without even the implied warranty of
>>> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>>> + Lesser General Public License for more details.
>>> +
>>> + You should have received a copy of the GNU Lesser General Public
>>> + License along with the GNU C Library; if not, see
>>> + <https://www.gnu.org/licenses/>. */
>>> +
>>> +#include <ctype.h>
>>> +#include <locale.h>
>>> +#include <wchar.h>
>>> +
>>> +
>>> +int
>>> +main (void)
>>> +{
>>> + /* Fail if setlocale succeeds for any of these locales. */
>>> + return (setlocale (LC_ALL, "test5") != NULL
>>> + || setlocale (LC_ALL, "test6") != NULL);
>>> +}
>>>
>>
>
next prev parent reply other threads:[~2021-10-18 13:16 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-06-30 8:56 [PATCH] " Siddhesh Poyarekar
2021-07-16 20:28 ` Carlos O'Donell
2021-07-19 8:34 ` Siddhesh Poyarekar
2021-07-20 2:36 ` [PATCH v2] " Siddhesh Poyarekar
2021-08-11 7:42 ` [PING][PATCH " Siddhesh Poyarekar
2021-08-17 2:58 ` [PING2][PATCH " Siddhesh Poyarekar
2021-10-18 13:16 ` Siddhesh Poyarekar [this message]
2022-02-08 9:40 ` [PATCH v3] " Siddhesh Poyarekar
2022-02-17 8:08 ` Florian Weimer
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=0e178208-6c8c-1896-918b-9185d4730b0c@gotplt.org \
--to=siddhesh@gotplt.org \
--cc=carlos@redhat.com \
--cc=libc-alpha@sourceware.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).