public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* [PATCH] setlocale: Fail if iconv module for charset is not present [BZ #27996]
@ 2021-06-30  8:56 Siddhesh Poyarekar
  2021-07-16 20:28 ` Carlos O'Donell
  2022-02-08  9:40 ` [PATCH v3] " Siddhesh Poyarekar
  0 siblings, 2 replies; 9+ messages in thread
From: Siddhesh Poyarekar @ 2021-06-30  8:56 UTC (permalink / raw)
  To: libc-alpha; +Cc: fweimer

setlocale currently succeeds even if the requested locale uses a
charset that does not have a converter module installed.  Check for
existence of the charset (either the one requested through the input
name or the one needed by the selected locale file) and fail if it
doesn't.

For testing, test6.c has been recycled because it was unused.  The
test verifes that loading test5 and test6 locales fail because both
locales have charsets without a converter, viz. test5 and test6
respectively.
---
 iconv/gconv_db.c                 |  25 ++++++
 iconv/gconv_int.h                |   2 +
 locale/findlocale.c              |  62 +++++++++-----
 localedata/Makefile              |   8 +-
 localedata/tests/test6.c         | 137 -------------------------------
 localedata/tst-invalid-charset.c |  31 +++++++
 6 files changed, 103 insertions(+), 162 deletions(-)
 delete mode 100644 localedata/tests/test6.c
 create mode 100644 localedata/tst-invalid-charset.c

diff --git a/iconv/gconv_db.c b/iconv/gconv_db.c
index af868e8023..547a6d0a44 100644
--- a/iconv/gconv_db.c
+++ b/iconv/gconv_db.c
@@ -698,6 +698,31 @@ do_lookup_alias (const char *name)
   return found != NULL ? (*found)->toname : NULL;
 }
 
+bool
+__gconv_module_exists (const char *name)
+{
+  /* Ensure that the configuration data is read.  */
+  __gconv_load_conf ();
+
+  name = do_lookup_alias (name) ?: name;
+
+  struct gconv_module *root = __gconv_modules_db;
+
+  while (root != NULL)
+    {
+      int cmpres;
+
+      cmpres = strcmp (name, root->from_string);
+      if (cmpres == 0)
+	return true;
+      else if (cmpres < 0)
+	root = root->left;
+      else
+	root = root->right;
+    }
+
+  return false;
+}
 
 int
 __gconv_compare_alias (const char *name1, const char *name2)
diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h
index 30a9286be2..6a52275700 100644
--- a/iconv/gconv_int.h
+++ b/iconv/gconv_int.h
@@ -269,6 +269,8 @@ libc_hidden_proto (__gconv_transliterate)
 extern int __gconv_compare_alias (const char *name1, const char *name2)
      attribute_hidden;
 
+/* Return true if NAME exists either as a module or an alias.  */
+extern bool __gconv_module_exists (const char *name) attribute_hidden;
 
 /* Builtin transformations.  */
 #ifdef _LIBC
diff --git a/locale/findlocale.c b/locale/findlocale.c
index ab09122b0c..10dfe2aef3 100644
--- a/locale/findlocale.c
+++ b/locale/findlocale.c
@@ -98,6 +98,15 @@ valid_locale_name (const char *name)
   return 1;
 }
 
+static bool
+codeset_has_module (const char *codeset)
+{
+  char *ccodeset = (char *) alloca (strlen (codeset) + 3);
+  strip (ccodeset, codeset);
+
+  return __gconv_module_exists (ccodeset);
+}
+
 struct __locale_data *
 _nl_find_locale (const char *locale_path, size_t locale_path_len,
 		 int category, const char **name)
@@ -200,6 +209,10 @@ _nl_find_locale (const char *locale_path, size_t locale_path_len,
     /* Memory allocate problem.  */
     return NULL;
 
+  /* The requested codeset does not have a converter, don't use it.  */
+  if (codeset != NULL && !codeset_has_module (codeset))
+    return NULL;
+
   /* If exactly this locale was already asked for we have an entry with
      the complete name.  */
   locale_file = _nl_make_l10nflist (&_nl_locale_file_list[category],
@@ -248,6 +261,33 @@ _nl_find_locale (const char *locale_path, size_t locale_path_len,
 	return NULL;
     }
 
+  /* Get the codeset information from the locale file.  */
+  static const int codeset_idx[] =
+    {
+      [__LC_CTYPE] = _NL_ITEM_INDEX (CODESET),
+      [__LC_NUMERIC] = _NL_ITEM_INDEX (_NL_NUMERIC_CODESET),
+      [__LC_TIME] = _NL_ITEM_INDEX (_NL_TIME_CODESET),
+      [__LC_COLLATE] = _NL_ITEM_INDEX (_NL_COLLATE_CODESET),
+      [__LC_MONETARY] = _NL_ITEM_INDEX (_NL_MONETARY_CODESET),
+      [__LC_MESSAGES] = _NL_ITEM_INDEX (_NL_MESSAGES_CODESET),
+      [__LC_PAPER] = _NL_ITEM_INDEX (_NL_PAPER_CODESET),
+      [__LC_NAME] = _NL_ITEM_INDEX (_NL_NAME_CODESET),
+      [__LC_ADDRESS] = _NL_ITEM_INDEX (_NL_ADDRESS_CODESET),
+      [__LC_TELEPHONE] = _NL_ITEM_INDEX (_NL_TELEPHONE_CODESET),
+      [__LC_MEASUREMENT] = _NL_ITEM_INDEX (_NL_MEASUREMENT_CODESET),
+      [__LC_IDENTIFICATION] = _NL_ITEM_INDEX (_NL_IDENTIFICATION_CODESET)
+    };
+  const struct __locale_data *data;
+  const char *locale_codeset;
+
+  data = (const struct __locale_data *) locale_file->data;
+  locale_codeset = (const char *) data->values[codeset_idx[category]].string;
+  assert (locale_codeset != NULL);
+
+  /* The locale codeset does not have a converter, don't use it.  */
+  if (locale_codeset[0] != '\0' && !codeset_has_module (locale_codeset))
+    return NULL;
+
   /* The LC_CTYPE category allows to check whether a locale is really
      usable.  If the locale name contains a charset name and the
      charset name used in the locale (present in the LC_CTYPE data) is
@@ -256,31 +296,9 @@ _nl_find_locale (const char *locale_path, size_t locale_path_len,
      in the locale name.  */
   if (codeset != NULL)
     {
-      /* Get the codeset information from the locale file.  */
-      static const int codeset_idx[] =
-	{
-	  [__LC_CTYPE] = _NL_ITEM_INDEX (CODESET),
-	  [__LC_NUMERIC] = _NL_ITEM_INDEX (_NL_NUMERIC_CODESET),
-	  [__LC_TIME] = _NL_ITEM_INDEX (_NL_TIME_CODESET),
-	  [__LC_COLLATE] = _NL_ITEM_INDEX (_NL_COLLATE_CODESET),
-	  [__LC_MONETARY] = _NL_ITEM_INDEX (_NL_MONETARY_CODESET),
-	  [__LC_MESSAGES] = _NL_ITEM_INDEX (_NL_MESSAGES_CODESET),
-	  [__LC_PAPER] = _NL_ITEM_INDEX (_NL_PAPER_CODESET),
-	  [__LC_NAME] = _NL_ITEM_INDEX (_NL_NAME_CODESET),
-	  [__LC_ADDRESS] = _NL_ITEM_INDEX (_NL_ADDRESS_CODESET),
-	  [__LC_TELEPHONE] = _NL_ITEM_INDEX (_NL_TELEPHONE_CODESET),
-	  [__LC_MEASUREMENT] = _NL_ITEM_INDEX (_NL_MEASUREMENT_CODESET),
-	  [__LC_IDENTIFICATION] = _NL_ITEM_INDEX (_NL_IDENTIFICATION_CODESET)
-	};
-      const struct __locale_data *data;
-      const char *locale_codeset;
       char *clocale_codeset;
       char *ccodeset;
 
-      data = (const struct __locale_data *) locale_file->data;
-      locale_codeset =
-	(const char *) data->values[codeset_idx[category]].string;
-      assert (locale_codeset != NULL);
       /* Note the length of the allocated memory: +3 for up to two slashes
 	 and the NUL byte.  */
       clocale_codeset = (char *) alloca (strlen (locale_codeset) + 3);
diff --git a/localedata/Makefile b/localedata/Makefile
index 14e04cd3c5..797523b250 100644
--- a/localedata/Makefile
+++ b/localedata/Makefile
@@ -128,7 +128,7 @@ ld-test-names := test1 test2 test3 test4 test5 test6 test7
 ld-test-srcs := $(addprefix tests/,$(addsuffix .cm,$(ld-test-names)) \
 				   $(addsuffix .def,$(ld-test-names)) \
 				   $(addsuffix .ds,test5 test6) \
-				   test6.c trans.def)
+				   trans.def)
 
 fmon-tests = n01y12 n02n40 n10y31 n11y41 n12y11 n20n32 n30y20 n41n00 \
 	     y01y10 y02n22 y22n42 y30y21 y32n31 y40y00 y42n21
@@ -158,7 +158,7 @@ tests = $(locale_test_suite) tst-digits tst-setlocale bug-iconv-trans \
 	tst-leaks tst-mbswcs1 tst-mbswcs2 tst-mbswcs3 tst-mbswcs4 tst-mbswcs5 \
 	tst-mbswcs6 tst-xlocale1 tst-xlocale2 bug-usesetlocale \
 	tst-strfmon1 tst-sscanf bug-setlocale1 tst-setlocale2 tst-setlocale3 \
-	tst-wctype tst-iconv-math-trans
+	tst-wctype tst-iconv-math-trans tst-invalid-charset
 tests-static = bug-setlocale1-static
 tests += $(tests-static)
 ifeq (yes,$(build-shared))
@@ -402,6 +402,7 @@ $(objpfx)tst-langinfo-setlocale-static.out: tst-langinfo.sh \
 	$(evaluate-test)
 
 $(objpfx)tst-digits.out: $(objpfx)tst-locale.out
+$(objpfx)tst-invalid-charset.out: $(objpfx)tst-locale.out
 $(objpfx)tst-mbswcs6.out: $(addprefix $(objpfx),$(CTYPE_FILES))
 endif
 
@@ -461,7 +462,8 @@ $(objpfx)mtrace-tst-leaks.out: $(objpfx)tst-leaks.out
 	$(common-objpfx)malloc/mtrace $(objpfx)tst-leaks.mtrace > $@; \
 	$(evaluate-test)
 
-bug-setlocale1-ENV-only = LOCPATH=$(objpfx) LC_CTYPE=de_DE.UTF-8
+bug-setlocale1-ENV-only = GCONV_PATH=$(common-objpfx)iconvdata \
+			  LOCPATH=$(objpfx) LC_CTYPE=de_DE.UTF-8
 bug-setlocale1-static-ENV-only = $(bug-setlocale1-ENV-only)
 
 $(objdir)/iconvdata/gconv-modules:
diff --git a/localedata/tests/test6.c b/localedata/tests/test6.c
deleted file mode 100644
index edb5fe4a5f..0000000000
--- a/localedata/tests/test6.c
+++ /dev/null
@@ -1,137 +0,0 @@
-/* Test program for character classes and mappings.
-   Copyright (C) 1999-2021 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Ulrich Drepper <drepper@cygnus.com>, 1999.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#include <ctype.h>
-#include <locale.h>
-#include <wchar.h>
-
-
-int
-main (void)
-{
-  const char lower[] = "abcdefghijklmnopqrstuvwxyz";
-  const char upper[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
-#define LEN (sizeof (upper) - 1)
-  const wchar_t wlower[] = L"abcdefghijklmnopqrstuvwxyz";
-  const wchar_t wupper[] = L"ABCDEFGHIJKLMNOPQRSTUVWXYZ";
-  int i;
-  int result = 0;
-
-  setlocale (LC_ALL, "test6");
-
-  for (i = 0; i < LEN; ++i)
-    {
-      /* Test basic table handling (basic == not more than 256 characters).
-	 The charmaps swaps the normal lower-upper case meaning of the
-	 ASCII characters used in the source code while the Unicode mapping
-	 in the repertoire map has the normal correspondents.  This test
-	 shows the independence of the tables for `char' and `wchar_t'
-	 characters.  */
-
-      if (islower (lower[i]))
-	{
-	  printf ("islower ('%c') false\n", lower[i]);
-	  result = 1;
-	}
-      if (! isupper (lower[i]))
-	{
-	  printf ("isupper ('%c') false\n", lower[i]);
-	  result = 1;
-	}
-
-      if (! islower (upper[i]))
-	{
-	  printf ("islower ('%c') false\n", upper[i]);
-	  result = 1;
-	}
-      if (isupper (upper[i]))
-	{
-	  printf ("isupper ('%c') false\n", upper[i]);
-	  result = 1;
-	}
-
-      if (toupper (lower[i]) != lower[i])
-	{
-	  printf ("toupper ('%c') false\n", lower[i]);
-	  result = 1;
-	}
-      if (tolower (lower[i]) != upper[i])
-	{
-	  printf ("tolower ('%c') false\n", lower[i]);
-	  result = 1;
-	}
-
-      if (tolower (upper[i]) != upper[i])
-	{
-	  printf ("tolower ('%c') false\n", upper[i]);
-	  result = 1;
-	}
-      if (toupper (upper[i]) != lower[i])
-	{
-	  printf ("toupper ('%c') false\n", upper[i]);
-	  result = 1;
-	}
-
-      if (iswlower (wupper[i]))
-	{
-	  printf ("iswlower (L'%c') false\n", upper[i]);
-	  result = 1;
-	}
-      if (! iswupper (wupper[i]))
-	{
-	  printf ("iswupper (L'%c') false\n", upper[i]);
-	  result = 1;
-	}
-
-      if (iswupper (wlower[i]))
-	{
-	  printf ("iswupper (L'%c') false\n", lower[i]);
-	  result = 1;
-	}
-      if (! iswlower (wlower[i]))
-	{
-	  printf ("iswlower (L'%c') false\n", lower[i]);
-	  result = 1;
-	}
-
-      if (towupper (wlower[i]) != wupper[i])
-	{
-	  printf ("towupper ('%c') false\n", lower[i]);
-	  result = 1;
-	}
-      if (towlower (wlower[i]) != wlower[i])
-	{
-	  printf ("towlower ('%c') false\n", lower[i]);
-	  result = 1;
-	}
-
-      if (towlower (wupper[i]) != wlower[i])
-	{
-	  printf ("towlower ('%c') false\n", upper[i]);
-	  result = 1;
-	}
-      if (towupper (wupper[i]) != wupper[i])
-	{
-	  printf ("towupper ('%c') false\n", upper[i]);
-	  result = 1;
-	}
-    }
-
-  return result;
-}
diff --git a/localedata/tst-invalid-charset.c b/localedata/tst-invalid-charset.c
new file mode 100644
index 0000000000..46a5198c66
--- /dev/null
+++ b/localedata/tst-invalid-charset.c
@@ -0,0 +1,31 @@
+/* Test program to verify that setlocale fails for charsets that do not have a
+   converter.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <ctype.h>
+#include <locale.h>
+#include <wchar.h>
+
+
+int
+main (void)
+{
+  /* Fail if setlocale succeeds for any of these locales.  */
+  return (setlocale (LC_ALL, "test5") != NULL
+	  || setlocale (LC_ALL, "test6") != NULL);
+}
-- 
2.31.1


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] setlocale: Fail if iconv module for charset is not present [BZ #27996]
  2021-06-30  8:56 [PATCH] setlocale: Fail if iconv module for charset is not present [BZ #27996] Siddhesh Poyarekar
@ 2021-07-16 20:28 ` Carlos O'Donell
  2021-07-19  8:34   ` Siddhesh Poyarekar
  2022-02-08  9:40 ` [PATCH v3] " Siddhesh Poyarekar
  1 sibling, 1 reply; 9+ messages in thread
From: Carlos O'Donell @ 2021-07-16 20:28 UTC (permalink / raw)
  To: Siddhesh Poyarekar, libc-alpha; +Cc: fweimer

On 6/30/21 4:56 AM, Siddhesh Poyarekar via Libc-alpha wrote:
> setlocale currently succeeds even if the requested locale uses a
> charset that does not have a converter module installed.  Check for
> existence of the charset (either the one requested through the input
> name or the one needed by the selected locale file) and fail if it
> doesn't.

I think this is an important part of being able to reliably reduce the
number of installed iconv converters as a method for hardening an install.

I think we need stricter testing in __gconv_module_exists before this is
complete. Given that you are testing both directions the name of the functions
probably needs changing. You are no longer testing for a module but for the
modules required to transit the charset encoding into and out of glibc.
 
> For testing, test6.c has been recycled because it was unused.  The
> test verifes that loading test5 and test6 locales fail because both
> locales have charsets without a converter, viz. test5 and test6
> respectively.

Please indicate that test6.c has been *removed* not recycled (commit
message update).

> ---
>  iconv/gconv_db.c                 |  25 ++++++
>  iconv/gconv_int.h                |   2 +
>  locale/findlocale.c              |  62 +++++++++-----
>  localedata/Makefile              |   8 +-
>  localedata/tests/test6.c         | 137 -------------------------------
>  localedata/tst-invalid-charset.c |  31 +++++++
>  6 files changed, 103 insertions(+), 162 deletions(-)
>  delete mode 100644 localedata/tests/test6.c
>  create mode 100644 localedata/tst-invalid-charset.c
> 
> diff --git a/iconv/gconv_db.c b/iconv/gconv_db.c
> index af868e8023..547a6d0a44 100644
> --- a/iconv/gconv_db.c
> +++ b/iconv/gconv_db.c
> @@ -698,6 +698,31 @@ do_lookup_alias (const char *name)
>    return found != NULL ? (*found)->toname : NULL;
>  }
>  
> +bool
> +__gconv_module_exists (const char *name)
> +{
> +  /* Ensure that the configuration data is read.  */
> +  __gconv_load_conf ();

OK. Load modules available on the system.

> +
> +  name = do_lookup_alias (name) ?: name;

OK.

> +
> +  struct gconv_module *root = __gconv_modules_db;
> +
> +  while (root != NULL)
> +    {
> +      int cmpres;
> +
> +      cmpres = strcmp (name, root->from_string);

I think this is incomplete.

(a) We must have a converter that goes from name->INTERNAL.
(b) We must have a converter that goes from INTERNAL->name.

You are only checking for (a) here, which means we can read
input of the charset, but we can't write it out via printf
and I don't think that would match user expectations.

> +      if (cmpres == 0)
> +	return true;

OK. I guess we don't care to match the ->same entries.

> +      else if (cmpres < 0)
> +	root = root->left;
> +      else
> +	root = root->right;

OK.

> +    }
> +
> +  return false;
> +}

OK.

>  
>  int
>  __gconv_compare_alias (const char *name1, const char *name2)
> diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h
> index 30a9286be2..6a52275700 100644
> --- a/iconv/gconv_int.h
> +++ b/iconv/gconv_int.h
> @@ -269,6 +269,8 @@ libc_hidden_proto (__gconv_transliterate)
>  extern int __gconv_compare_alias (const char *name1, const char *name2)
>       attribute_hidden;
>  
> +/* Return true if NAME exists either as a module or an alias.  */
> +extern bool __gconv_module_exists (const char *name) attribute_hidden;

OK. New function.

>  
>  /* Builtin transformations.  */
>  #ifdef _LIBC
> diff --git a/locale/findlocale.c b/locale/findlocale.c
> index ab09122b0c..10dfe2aef3 100644
> --- a/locale/findlocale.c
> +++ b/locale/findlocale.c
> @@ -98,6 +98,15 @@ valid_locale_name (const char *name)
>    return 1;
>  }
>  
> +static bool
> +codeset_has_module (const char *codeset)
> +{
> +  char *ccodeset = (char *) alloca (strlen (codeset) + 3);

OK. NULL + up to 2 chars from strip.

> +  strip (ccodeset, codeset);

OK. Use strip to normalize.

> +
> +  return __gconv_module_exists (ccodeset);
> +}
> +
>  struct __locale_data *
>  _nl_find_locale (const char *locale_path, size_t locale_path_len,
>  		 int category, const char **name)
> @@ -200,6 +209,10 @@ _nl_find_locale (const char *locale_path, size_t locale_path_len,
>      /* Memory allocate problem.  */
>      return NULL;
>  
> +  /* The requested codeset does not have a converter, don't use it.  */
> +  if (codeset != NULL && !codeset_has_module (codeset))
> +    return NULL;

OK.

> +
>    /* If exactly this locale was already asked for we have an entry with
>       the complete name.  */
>    locale_file = _nl_make_l10nflist (&_nl_locale_file_list[category],
> @@ -248,6 +261,33 @@ _nl_find_locale (const char *locale_path, size_t locale_path_len,
>  	return NULL;
>      }
>  
> +  /* Get the codeset information from the locale file.  */
> +  static const int codeset_idx[] =
> +    {
> +      [__LC_CTYPE] = _NL_ITEM_INDEX (CODESET),
> +      [__LC_NUMERIC] = _NL_ITEM_INDEX (_NL_NUMERIC_CODESET),
> +      [__LC_TIME] = _NL_ITEM_INDEX (_NL_TIME_CODESET),
> +      [__LC_COLLATE] = _NL_ITEM_INDEX (_NL_COLLATE_CODESET),
> +      [__LC_MONETARY] = _NL_ITEM_INDEX (_NL_MONETARY_CODESET),
> +      [__LC_MESSAGES] = _NL_ITEM_INDEX (_NL_MESSAGES_CODESET),
> +      [__LC_PAPER] = _NL_ITEM_INDEX (_NL_PAPER_CODESET),
> +      [__LC_NAME] = _NL_ITEM_INDEX (_NL_NAME_CODESET),
> +      [__LC_ADDRESS] = _NL_ITEM_INDEX (_NL_ADDRESS_CODESET),
> +      [__LC_TELEPHONE] = _NL_ITEM_INDEX (_NL_TELEPHONE_CODESET),
> +      [__LC_MEASUREMENT] = _NL_ITEM_INDEX (_NL_MEASUREMENT_CODESET),
> +      [__LC_IDENTIFICATION] = _NL_ITEM_INDEX (_NL_IDENTIFICATION_CODESET)
> +    };
> +  const struct __locale_data *data;
> +  const char *locale_codeset;
> +
> +  data = (const struct __locale_data *) locale_file->data;
> +  locale_codeset = (const char *) data->values[codeset_idx[category]].string;
> +  assert (locale_codeset != NULL);
> +
> +  /* The locale codeset does not have a converter, don't use it.  */
> +  if (locale_codeset[0] != '\0' && !codeset_has_module (locale_codeset))
> +    return NULL;

OK. Move this code earlier and look for a converter for the codeset.

> +
>    /* The LC_CTYPE category allows to check whether a locale is really
>       usable.  If the locale name contains a charset name and the
>       charset name used in the locale (present in the LC_CTYPE data) is
> @@ -256,31 +296,9 @@ _nl_find_locale (const char *locale_path, size_t locale_path_len,
>       in the locale name.  */
>    if (codeset != NULL)
>      {
> -      /* Get the codeset information from the locale file.  */
> -      static const int codeset_idx[] =
> -	{
> -	  [__LC_CTYPE] = _NL_ITEM_INDEX (CODESET),
> -	  [__LC_NUMERIC] = _NL_ITEM_INDEX (_NL_NUMERIC_CODESET),
> -	  [__LC_TIME] = _NL_ITEM_INDEX (_NL_TIME_CODESET),
> -	  [__LC_COLLATE] = _NL_ITEM_INDEX (_NL_COLLATE_CODESET),
> -	  [__LC_MONETARY] = _NL_ITEM_INDEX (_NL_MONETARY_CODESET),
> -	  [__LC_MESSAGES] = _NL_ITEM_INDEX (_NL_MESSAGES_CODESET),
> -	  [__LC_PAPER] = _NL_ITEM_INDEX (_NL_PAPER_CODESET),
> -	  [__LC_NAME] = _NL_ITEM_INDEX (_NL_NAME_CODESET),
> -	  [__LC_ADDRESS] = _NL_ITEM_INDEX (_NL_ADDRESS_CODESET),
> -	  [__LC_TELEPHONE] = _NL_ITEM_INDEX (_NL_TELEPHONE_CODESET),
> -	  [__LC_MEASUREMENT] = _NL_ITEM_INDEX (_NL_MEASUREMENT_CODESET),
> -	  [__LC_IDENTIFICATION] = _NL_ITEM_INDEX (_NL_IDENTIFICATION_CODESET)
> -	};
> -      const struct __locale_data *data;
> -      const char *locale_codeset;
>        char *clocale_codeset;
>        char *ccodeset;
>  
> -      data = (const struct __locale_data *) locale_file->data;
> -      locale_codeset =
> -	(const char *) data->values[codeset_idx[category]].string;
> -      assert (locale_codeset != NULL);
>        /* Note the length of the allocated memory: +3 for up to two slashes
>  	 and the NUL byte.  */
>        clocale_codeset = (char *) alloca (strlen (locale_codeset) + 3);
> diff --git a/localedata/Makefile b/localedata/Makefile
> index 14e04cd3c5..797523b250 100644
> --- a/localedata/Makefile
> +++ b/localedata/Makefile
> @@ -128,7 +128,7 @@ ld-test-names := test1 test2 test3 test4 test5 test6 test7
>  ld-test-srcs := $(addprefix tests/,$(addsuffix .cm,$(ld-test-names)) \
>  				   $(addsuffix .def,$(ld-test-names)) \
>  				   $(addsuffix .ds,test5 test6) \
> -				   test6.c trans.def)
> +				   trans.def)

OK. Remove unused test6.c

>  
>  fmon-tests = n01y12 n02n40 n10y31 n11y41 n12y11 n20n32 n30y20 n41n00 \
>  	     y01y10 y02n22 y22n42 y30y21 y32n31 y40y00 y42n21
> @@ -158,7 +158,7 @@ tests = $(locale_test_suite) tst-digits tst-setlocale bug-iconv-trans \
>  	tst-leaks tst-mbswcs1 tst-mbswcs2 tst-mbswcs3 tst-mbswcs4 tst-mbswcs5 \
>  	tst-mbswcs6 tst-xlocale1 tst-xlocale2 bug-usesetlocale \
>  	tst-strfmon1 tst-sscanf bug-setlocale1 tst-setlocale2 tst-setlocale3 \
> -	tst-wctype tst-iconv-math-trans
> +	tst-wctype tst-iconv-math-trans tst-invalid-charset

OK.

>  tests-static = bug-setlocale1-static
>  tests += $(tests-static)
>  ifeq (yes,$(build-shared))
> @@ -402,6 +402,7 @@ $(objpfx)tst-langinfo-setlocale-static.out: tst-langinfo.sh \
>  	$(evaluate-test)
>  
>  $(objpfx)tst-digits.out: $(objpfx)tst-locale.out
> +$(objpfx)tst-invalid-charset.out: $(objpfx)tst-locale.out

This needs a comment saying why it depends on tst-locale.out
e.g. because it wants to use the compiled test locales which have
no charset converters.

>  $(objpfx)tst-mbswcs6.out: $(addprefix $(objpfx),$(CTYPE_FILES))
>  endif
>  
> @@ -461,7 +462,8 @@ $(objpfx)mtrace-tst-leaks.out: $(objpfx)tst-leaks.out
>  	$(common-objpfx)malloc/mtrace $(objpfx)tst-leaks.mtrace > $@; \
>  	$(evaluate-test)
>  
> -bug-setlocale1-ENV-only = LOCPATH=$(objpfx) LC_CTYPE=de_DE.UTF-8
> +bug-setlocale1-ENV-only = GCONV_PATH=$(common-objpfx)iconvdata \
> +			  LOCPATH=$(objpfx) LC_CTYPE=de_DE.UTF-8

OK. Fix test.

>  bug-setlocale1-static-ENV-only = $(bug-setlocale1-ENV-only)
>  
>  $(objdir)/iconvdata/gconv-modules:
> diff --git a/localedata/tests/test6.c b/localedata/tests/test6.c
> deleted file mode 100644
> index edb5fe4a5f..0000000000
> --- a/localedata/tests/test6.c
> +++ /dev/null
> @@ -1,137 +0,0 @@
> -/* Test program for character classes and mappings.
> -   Copyright (C) 1999-2021 Free Software Foundation, Inc.
> -   This file is part of the GNU C Library.
> -   Contributed by Ulrich Drepper <drepper@cygnus.com>, 1999.
> -
> -   The GNU C Library is free software; you can redistribute it and/or
> -   modify it under the terms of the GNU Lesser General Public
> -   License as published by the Free Software Foundation; either
> -   version 2.1 of the License, or (at your option) any later version.
> -
> -   The GNU C Library is distributed in the hope that it will be useful,
> -   but WITHOUT ANY WARRANTY; without even the implied warranty of
> -   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> -   Lesser General Public License for more details.
> -
> -   You should have received a copy of the GNU Lesser General Public
> -   License along with the GNU C Library; if not, see
> -   <https://www.gnu.org/licenses/>.  */
> -
> -#include <ctype.h>
> -#include <locale.h>
> -#include <wchar.h>
> -
> -
> -int
> -main (void)
> -{
> -  const char lower[] = "abcdefghijklmnopqrstuvwxyz";
> -  const char upper[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
> -#define LEN (sizeof (upper) - 1)
> -  const wchar_t wlower[] = L"abcdefghijklmnopqrstuvwxyz";
> -  const wchar_t wupper[] = L"ABCDEFGHIJKLMNOPQRSTUVWXYZ";
> -  int i;
> -  int result = 0;
> -
> -  setlocale (LC_ALL, "test6");
> -
> -  for (i = 0; i < LEN; ++i)
> -    {
> -      /* Test basic table handling (basic == not more than 256 characters).
> -	 The charmaps swaps the normal lower-upper case meaning of the
> -	 ASCII characters used in the source code while the Unicode mapping
> -	 in the repertoire map has the normal correspondents.  This test
> -	 shows the independence of the tables for `char' and `wchar_t'
> -	 characters.  */
> -
> -      if (islower (lower[i]))
> -	{
> -	  printf ("islower ('%c') false\n", lower[i]);
> -	  result = 1;
> -	}
> -      if (! isupper (lower[i]))
> -	{
> -	  printf ("isupper ('%c') false\n", lower[i]);
> -	  result = 1;
> -	}
> -
> -      if (! islower (upper[i]))
> -	{
> -	  printf ("islower ('%c') false\n", upper[i]);
> -	  result = 1;
> -	}
> -      if (isupper (upper[i]))
> -	{
> -	  printf ("isupper ('%c') false\n", upper[i]);
> -	  result = 1;
> -	}
> -
> -      if (toupper (lower[i]) != lower[i])
> -	{
> -	  printf ("toupper ('%c') false\n", lower[i]);
> -	  result = 1;
> -	}
> -      if (tolower (lower[i]) != upper[i])
> -	{
> -	  printf ("tolower ('%c') false\n", lower[i]);
> -	  result = 1;
> -	}
> -
> -      if (tolower (upper[i]) != upper[i])
> -	{
> -	  printf ("tolower ('%c') false\n", upper[i]);
> -	  result = 1;
> -	}
> -      if (toupper (upper[i]) != lower[i])
> -	{
> -	  printf ("toupper ('%c') false\n", upper[i]);
> -	  result = 1;
> -	}
> -
> -      if (iswlower (wupper[i]))
> -	{
> -	  printf ("iswlower (L'%c') false\n", upper[i]);
> -	  result = 1;
> -	}
> -      if (! iswupper (wupper[i]))
> -	{
> -	  printf ("iswupper (L'%c') false\n", upper[i]);
> -	  result = 1;
> -	}
> -
> -      if (iswupper (wlower[i]))
> -	{
> -	  printf ("iswupper (L'%c') false\n", lower[i]);
> -	  result = 1;
> -	}
> -      if (! iswlower (wlower[i]))
> -	{
> -	  printf ("iswlower (L'%c') false\n", lower[i]);
> -	  result = 1;
> -	}
> -
> -      if (towupper (wlower[i]) != wupper[i])
> -	{
> -	  printf ("towupper ('%c') false\n", lower[i]);
> -	  result = 1;
> -	}
> -      if (towlower (wlower[i]) != wlower[i])
> -	{
> -	  printf ("towlower ('%c') false\n", lower[i]);
> -	  result = 1;
> -	}
> -
> -      if (towlower (wupper[i]) != wlower[i])
> -	{
> -	  printf ("towlower ('%c') false\n", upper[i]);
> -	  result = 1;
> -	}
> -      if (towupper (wupper[i]) != wupper[i])
> -	{
> -	  printf ("towupper ('%c') false\n", upper[i]);
> -	  result = 1;
> -	}
> -    }
> -
> -  return result;
> -}
> diff --git a/localedata/tst-invalid-charset.c b/localedata/tst-invalid-charset.c
> new file mode 100644
> index 0000000000..46a5198c66
> --- /dev/null
> +++ b/localedata/tst-invalid-charset.c
> @@ -0,0 +1,31 @@
> +/* Test program to verify that setlocale fails for charsets that do not have a
> +   converter.
> +   Copyright (C) 2021 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <ctype.h>
> +#include <locale.h>
> +#include <wchar.h>
> +
> +
> +int
> +main (void)
> +{
> +  /* Fail if setlocale succeeds for any of these locales.  */

Please add a comment in localedata/Makefile where ld-test-names
is set and mention that this test depends on test5 and test6
having no charset converters.

> +  return (setlocale (LC_ALL, "test5") != NULL
> +	  || setlocale (LC_ALL, "test6") != NULL);
> +}
> 


-- 
Cheers,
Carlos.


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] setlocale: Fail if iconv module for charset is not present [BZ #27996]
  2021-07-16 20:28 ` Carlos O'Donell
@ 2021-07-19  8:34   ` Siddhesh Poyarekar
  2021-07-20  2:36     ` [PATCH v2] " Siddhesh Poyarekar
  0 siblings, 1 reply; 9+ messages in thread
From: Siddhesh Poyarekar @ 2021-07-19  8:34 UTC (permalink / raw)
  To: Carlos O'Donell, libc-alpha; +Cc: fweimer

On 7/17/21 1:58 AM, Carlos O'Donell wrote:
>> +
>> +  struct gconv_module *root = __gconv_modules_db;
>> +
>> +  while (root != NULL)
>> +    {
>> +      int cmpres;
>> +
>> +      cmpres = strcmp (name, root->from_string);
> 
> I think this is incomplete.
> 
> (a) We must have a converter that goes from name->INTERNAL.
> (b) We must have a converter that goes from INTERNAL->name.
> 
> You are only checking for (a) here, which means we can read
> input of the charset, but we can't write it out via printf
> and I don't think that would match user expectations.

Hmm, I had assumed that the presence of name->INTERNAL implies the 
presence of INTERNAL->name.  If we had to be properly general purpose 
then the check ought to confirm that we can actually find a 
transformation from name->INTERNAL and back:

===========
if (__gconv_find_transform ("INTERNAL", name, &steps, &nsteps,
                             0) != __GCONV_OK)
   return false;

__gconv_close_transform (steps, nsteps);

if (__gconv_find_transform (name, "INTERNAL", &steps, &nsteps,
                             0) != __GCONV_OK)
   return false;

__gconv_close_transform (steps, nsteps);

return true;
===========

Does that sound right?

Siddhesh

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH v2] setlocale: Fail if iconv module for charset is not present [BZ #27996]
  2021-07-19  8:34   ` Siddhesh Poyarekar
@ 2021-07-20  2:36     ` Siddhesh Poyarekar
  2021-08-11  7:42       ` [PING][PATCH " Siddhesh Poyarekar
  0 siblings, 1 reply; 9+ messages in thread
From: Siddhesh Poyarekar @ 2021-07-20  2:36 UTC (permalink / raw)
  To: libc-alpha

setlocale currently succeeds even if the requested locale uses a
charset that does not have a converter module installed.  Check for
existence of the charset (either the one requested through the input
name or the one needed by the selected locale file) and fail if it
doesn't.

The new test tst-invalid-charset verifes that loading test5 and test6
locales fail because both locales have charsets without a converter,
viz. test5 and test6 respectively.  Also, test6.c has been removed as
it was unused.
---
Changes from v1:
- Find full transformation paths both ways instead of merely looking for
  a FROM converter.

 locale/findlocale.c              |  77 ++++++++++++-----
 localedata/Makefile              |  12 ++-
 localedata/tests/test6.c         | 137 -------------------------------
 localedata/tst-invalid-charset.c |  31 +++++++
 4 files changed, 95 insertions(+), 162 deletions(-)
 delete mode 100644 localedata/tests/test6.c
 create mode 100644 localedata/tst-invalid-charset.c

diff --git a/locale/findlocale.c b/locale/findlocale.c
index ab09122b0c..7ccc98cd8b 100644
--- a/locale/findlocale.c
+++ b/locale/findlocale.c
@@ -98,6 +98,30 @@ valid_locale_name (const char *name)
   return 1;
 }
 
+/* Return true if we have gconv modules to transform between the INTERNAL
+   encoding and CODESET.  */
+static bool
+codeset_has_module (const char *codeset)
+{
+  struct __gconv_step *steps;
+  size_t nsteps;
+
+  char *ccodeset = (char *) alloca (strlen (codeset) + 3);
+  strip (ccodeset, codeset);
+
+  if (__gconv_find_transform ("INTERNAL", ccodeset, &steps, &nsteps, 0)
+      != __GCONV_OK)
+    return false;
+  __gconv_close_transform (steps, nsteps);
+
+  if (__gconv_find_transform (ccodeset, "INTERNAL", &steps, &nsteps, 0)
+      != __GCONV_OK)
+    return false;
+  __gconv_close_transform (steps, nsteps);
+
+  return true;
+}
+
 struct __locale_data *
 _nl_find_locale (const char *locale_path, size_t locale_path_len,
 		 int category, const char **name)
@@ -200,6 +224,10 @@ _nl_find_locale (const char *locale_path, size_t locale_path_len,
     /* Memory allocate problem.  */
     return NULL;
 
+  /* The requested codeset does not have a converter, don't use it.  */
+  if (codeset != NULL && !codeset_has_module (codeset))
+    return NULL;
+
   /* If exactly this locale was already asked for we have an entry with
      the complete name.  */
   locale_file = _nl_make_l10nflist (&_nl_locale_file_list[category],
@@ -248,6 +276,33 @@ _nl_find_locale (const char *locale_path, size_t locale_path_len,
 	return NULL;
     }
 
+  /* Get the codeset information from the locale file.  */
+  static const int codeset_idx[] =
+    {
+      [__LC_CTYPE] = _NL_ITEM_INDEX (CODESET),
+      [__LC_NUMERIC] = _NL_ITEM_INDEX (_NL_NUMERIC_CODESET),
+      [__LC_TIME] = _NL_ITEM_INDEX (_NL_TIME_CODESET),
+      [__LC_COLLATE] = _NL_ITEM_INDEX (_NL_COLLATE_CODESET),
+      [__LC_MONETARY] = _NL_ITEM_INDEX (_NL_MONETARY_CODESET),
+      [__LC_MESSAGES] = _NL_ITEM_INDEX (_NL_MESSAGES_CODESET),
+      [__LC_PAPER] = _NL_ITEM_INDEX (_NL_PAPER_CODESET),
+      [__LC_NAME] = _NL_ITEM_INDEX (_NL_NAME_CODESET),
+      [__LC_ADDRESS] = _NL_ITEM_INDEX (_NL_ADDRESS_CODESET),
+      [__LC_TELEPHONE] = _NL_ITEM_INDEX (_NL_TELEPHONE_CODESET),
+      [__LC_MEASUREMENT] = _NL_ITEM_INDEX (_NL_MEASUREMENT_CODESET),
+      [__LC_IDENTIFICATION] = _NL_ITEM_INDEX (_NL_IDENTIFICATION_CODESET)
+    };
+  const struct __locale_data *data;
+  const char *locale_codeset;
+
+  data = (const struct __locale_data *) locale_file->data;
+  locale_codeset = (const char *) data->values[codeset_idx[category]].string;
+  assert (locale_codeset != NULL);
+
+  /* The locale codeset does not have a converter, don't use it.  */
+  if (locale_codeset[0] != '\0' && !codeset_has_module (locale_codeset))
+    return NULL;
+
   /* The LC_CTYPE category allows to check whether a locale is really
      usable.  If the locale name contains a charset name and the
      charset name used in the locale (present in the LC_CTYPE data) is
@@ -256,31 +311,9 @@ _nl_find_locale (const char *locale_path, size_t locale_path_len,
      in the locale name.  */
   if (codeset != NULL)
     {
-      /* Get the codeset information from the locale file.  */
-      static const int codeset_idx[] =
-	{
-	  [__LC_CTYPE] = _NL_ITEM_INDEX (CODESET),
-	  [__LC_NUMERIC] = _NL_ITEM_INDEX (_NL_NUMERIC_CODESET),
-	  [__LC_TIME] = _NL_ITEM_INDEX (_NL_TIME_CODESET),
-	  [__LC_COLLATE] = _NL_ITEM_INDEX (_NL_COLLATE_CODESET),
-	  [__LC_MONETARY] = _NL_ITEM_INDEX (_NL_MONETARY_CODESET),
-	  [__LC_MESSAGES] = _NL_ITEM_INDEX (_NL_MESSAGES_CODESET),
-	  [__LC_PAPER] = _NL_ITEM_INDEX (_NL_PAPER_CODESET),
-	  [__LC_NAME] = _NL_ITEM_INDEX (_NL_NAME_CODESET),
-	  [__LC_ADDRESS] = _NL_ITEM_INDEX (_NL_ADDRESS_CODESET),
-	  [__LC_TELEPHONE] = _NL_ITEM_INDEX (_NL_TELEPHONE_CODESET),
-	  [__LC_MEASUREMENT] = _NL_ITEM_INDEX (_NL_MEASUREMENT_CODESET),
-	  [__LC_IDENTIFICATION] = _NL_ITEM_INDEX (_NL_IDENTIFICATION_CODESET)
-	};
-      const struct __locale_data *data;
-      const char *locale_codeset;
       char *clocale_codeset;
       char *ccodeset;
 
-      data = (const struct __locale_data *) locale_file->data;
-      locale_codeset =
-	(const char *) data->values[codeset_idx[category]].string;
-      assert (locale_codeset != NULL);
       /* Note the length of the allocated memory: +3 for up to two slashes
 	 and the NUL byte.  */
       clocale_codeset = (char *) alloca (strlen (locale_codeset) + 3);
diff --git a/localedata/Makefile b/localedata/Makefile
index 14e04cd3c5..2af399ec51 100644
--- a/localedata/Makefile
+++ b/localedata/Makefile
@@ -124,11 +124,13 @@ test-input := \
 test-input-data = $(addsuffix .in, $(test-input))
 test-output := $(foreach s, .out .xout, \
 			 $(addsuffix $s, $(basename $(test-input))))
+# Note that tst-invalid-charset depends on test5 and test6 being locales that
+# do not have valid charset converters.
 ld-test-names := test1 test2 test3 test4 test5 test6 test7
 ld-test-srcs := $(addprefix tests/,$(addsuffix .cm,$(ld-test-names)) \
 				   $(addsuffix .def,$(ld-test-names)) \
 				   $(addsuffix .ds,test5 test6) \
-				   test6.c trans.def)
+				   trans.def)
 
 fmon-tests = n01y12 n02n40 n10y31 n11y41 n12y11 n20n32 n30y20 n41n00 \
 	     y01y10 y02n22 y22n42 y30y21 y32n31 y40y00 y42n21
@@ -158,7 +160,7 @@ tests = $(locale_test_suite) tst-digits tst-setlocale bug-iconv-trans \
 	tst-leaks tst-mbswcs1 tst-mbswcs2 tst-mbswcs3 tst-mbswcs4 tst-mbswcs5 \
 	tst-mbswcs6 tst-xlocale1 tst-xlocale2 bug-usesetlocale \
 	tst-strfmon1 tst-sscanf bug-setlocale1 tst-setlocale2 tst-setlocale3 \
-	tst-wctype tst-iconv-math-trans
+	tst-wctype tst-iconv-math-trans tst-invalid-charset
 tests-static = bug-setlocale1-static
 tests += $(tests-static)
 ifeq (yes,$(build-shared))
@@ -401,7 +403,10 @@ $(objpfx)tst-langinfo-setlocale-static.out: tst-langinfo.sh \
 		 '$(run-program-env)' '$(test-program-cmd-after-env)' > $@; \
 	$(evaluate-test)
 
+# These tests depend on tst-locale because they use the locales compiled by
+# that test.
 $(objpfx)tst-digits.out: $(objpfx)tst-locale.out
+$(objpfx)tst-invalid-charset.out: $(objpfx)tst-locale.out
 $(objpfx)tst-mbswcs6.out: $(addprefix $(objpfx),$(CTYPE_FILES))
 endif
 
@@ -461,7 +466,8 @@ $(objpfx)mtrace-tst-leaks.out: $(objpfx)tst-leaks.out
 	$(common-objpfx)malloc/mtrace $(objpfx)tst-leaks.mtrace > $@; \
 	$(evaluate-test)
 
-bug-setlocale1-ENV-only = LOCPATH=$(objpfx) LC_CTYPE=de_DE.UTF-8
+bug-setlocale1-ENV-only = GCONV_PATH=$(common-objpfx)iconvdata \
+			  LOCPATH=$(objpfx) LC_CTYPE=de_DE.UTF-8
 bug-setlocale1-static-ENV-only = $(bug-setlocale1-ENV-only)
 
 $(objdir)/iconvdata/gconv-modules:
diff --git a/localedata/tests/test6.c b/localedata/tests/test6.c
deleted file mode 100644
index edb5fe4a5f..0000000000
--- a/localedata/tests/test6.c
+++ /dev/null
@@ -1,137 +0,0 @@
-/* Test program for character classes and mappings.
-   Copyright (C) 1999-2021 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Ulrich Drepper <drepper@cygnus.com>, 1999.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#include <ctype.h>
-#include <locale.h>
-#include <wchar.h>
-
-
-int
-main (void)
-{
-  const char lower[] = "abcdefghijklmnopqrstuvwxyz";
-  const char upper[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
-#define LEN (sizeof (upper) - 1)
-  const wchar_t wlower[] = L"abcdefghijklmnopqrstuvwxyz";
-  const wchar_t wupper[] = L"ABCDEFGHIJKLMNOPQRSTUVWXYZ";
-  int i;
-  int result = 0;
-
-  setlocale (LC_ALL, "test6");
-
-  for (i = 0; i < LEN; ++i)
-    {
-      /* Test basic table handling (basic == not more than 256 characters).
-	 The charmaps swaps the normal lower-upper case meaning of the
-	 ASCII characters used in the source code while the Unicode mapping
-	 in the repertoire map has the normal correspondents.  This test
-	 shows the independence of the tables for `char' and `wchar_t'
-	 characters.  */
-
-      if (islower (lower[i]))
-	{
-	  printf ("islower ('%c') false\n", lower[i]);
-	  result = 1;
-	}
-      if (! isupper (lower[i]))
-	{
-	  printf ("isupper ('%c') false\n", lower[i]);
-	  result = 1;
-	}
-
-      if (! islower (upper[i]))
-	{
-	  printf ("islower ('%c') false\n", upper[i]);
-	  result = 1;
-	}
-      if (isupper (upper[i]))
-	{
-	  printf ("isupper ('%c') false\n", upper[i]);
-	  result = 1;
-	}
-
-      if (toupper (lower[i]) != lower[i])
-	{
-	  printf ("toupper ('%c') false\n", lower[i]);
-	  result = 1;
-	}
-      if (tolower (lower[i]) != upper[i])
-	{
-	  printf ("tolower ('%c') false\n", lower[i]);
-	  result = 1;
-	}
-
-      if (tolower (upper[i]) != upper[i])
-	{
-	  printf ("tolower ('%c') false\n", upper[i]);
-	  result = 1;
-	}
-      if (toupper (upper[i]) != lower[i])
-	{
-	  printf ("toupper ('%c') false\n", upper[i]);
-	  result = 1;
-	}
-
-      if (iswlower (wupper[i]))
-	{
-	  printf ("iswlower (L'%c') false\n", upper[i]);
-	  result = 1;
-	}
-      if (! iswupper (wupper[i]))
-	{
-	  printf ("iswupper (L'%c') false\n", upper[i]);
-	  result = 1;
-	}
-
-      if (iswupper (wlower[i]))
-	{
-	  printf ("iswupper (L'%c') false\n", lower[i]);
-	  result = 1;
-	}
-      if (! iswlower (wlower[i]))
-	{
-	  printf ("iswlower (L'%c') false\n", lower[i]);
-	  result = 1;
-	}
-
-      if (towupper (wlower[i]) != wupper[i])
-	{
-	  printf ("towupper ('%c') false\n", lower[i]);
-	  result = 1;
-	}
-      if (towlower (wlower[i]) != wlower[i])
-	{
-	  printf ("towlower ('%c') false\n", lower[i]);
-	  result = 1;
-	}
-
-      if (towlower (wupper[i]) != wlower[i])
-	{
-	  printf ("towlower ('%c') false\n", upper[i]);
-	  result = 1;
-	}
-      if (towupper (wupper[i]) != wupper[i])
-	{
-	  printf ("towupper ('%c') false\n", upper[i]);
-	  result = 1;
-	}
-    }
-
-  return result;
-}
diff --git a/localedata/tst-invalid-charset.c b/localedata/tst-invalid-charset.c
new file mode 100644
index 0000000000..46a5198c66
--- /dev/null
+++ b/localedata/tst-invalid-charset.c
@@ -0,0 +1,31 @@
+/* Test program to verify that setlocale fails for charsets that do not have a
+   converter.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <ctype.h>
+#include <locale.h>
+#include <wchar.h>
+
+
+int
+main (void)
+{
+  /* Fail if setlocale succeeds for any of these locales.  */
+  return (setlocale (LC_ALL, "test5") != NULL
+	  || setlocale (LC_ALL, "test6") != NULL);
+}
-- 
2.31.1


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PING][PATCH v2] setlocale: Fail if iconv module for charset is not present [BZ #27996]
  2021-07-20  2:36     ` [PATCH v2] " Siddhesh Poyarekar
@ 2021-08-11  7:42       ` Siddhesh Poyarekar
  2021-08-17  2:58         ` [PING2][PATCH " Siddhesh Poyarekar
  0 siblings, 1 reply; 9+ messages in thread
From: Siddhesh Poyarekar @ 2021-08-11  7:42 UTC (permalink / raw)
  To: libc-alpha

Ping!

On 7/20/21 8:06 AM, Siddhesh Poyarekar via Libc-alpha wrote:
> setlocale currently succeeds even if the requested locale uses a
> charset that does not have a converter module installed.  Check for
> existence of the charset (either the one requested through the input
> name or the one needed by the selected locale file) and fail if it
> doesn't.
> 
> The new test tst-invalid-charset verifes that loading test5 and test6
> locales fail because both locales have charsets without a converter,
> viz. test5 and test6 respectively.  Also, test6.c has been removed as
> it was unused.
> ---
> Changes from v1:
> - Find full transformation paths both ways instead of merely looking for
>    a FROM converter.
> 
>   locale/findlocale.c              |  77 ++++++++++++-----
>   localedata/Makefile              |  12 ++-
>   localedata/tests/test6.c         | 137 -------------------------------
>   localedata/tst-invalid-charset.c |  31 +++++++
>   4 files changed, 95 insertions(+), 162 deletions(-)
>   delete mode 100644 localedata/tests/test6.c
>   create mode 100644 localedata/tst-invalid-charset.c
> 
> diff --git a/locale/findlocale.c b/locale/findlocale.c
> index ab09122b0c..7ccc98cd8b 100644
> --- a/locale/findlocale.c
> +++ b/locale/findlocale.c
> @@ -98,6 +98,30 @@ valid_locale_name (const char *name)
>     return 1;
>   }
>   
> +/* Return true if we have gconv modules to transform between the INTERNAL
> +   encoding and CODESET.  */
> +static bool
> +codeset_has_module (const char *codeset)
> +{
> +  struct __gconv_step *steps;
> +  size_t nsteps;
> +
> +  char *ccodeset = (char *) alloca (strlen (codeset) + 3);
> +  strip (ccodeset, codeset);
> +
> +  if (__gconv_find_transform ("INTERNAL", ccodeset, &steps, &nsteps, 0)
> +      != __GCONV_OK)
> +    return false;
> +  __gconv_close_transform (steps, nsteps);
> +
> +  if (__gconv_find_transform (ccodeset, "INTERNAL", &steps, &nsteps, 0)
> +      != __GCONV_OK)
> +    return false;
> +  __gconv_close_transform (steps, nsteps);
> +
> +  return true;
> +}
> +
>   struct __locale_data *
>   _nl_find_locale (const char *locale_path, size_t locale_path_len,
>   		 int category, const char **name)
> @@ -200,6 +224,10 @@ _nl_find_locale (const char *locale_path, size_t locale_path_len,
>       /* Memory allocate problem.  */
>       return NULL;
>   
> +  /* The requested codeset does not have a converter, don't use it.  */
> +  if (codeset != NULL && !codeset_has_module (codeset))
> +    return NULL;
> +
>     /* If exactly this locale was already asked for we have an entry with
>        the complete name.  */
>     locale_file = _nl_make_l10nflist (&_nl_locale_file_list[category],
> @@ -248,6 +276,33 @@ _nl_find_locale (const char *locale_path, size_t locale_path_len,
>   	return NULL;
>       }
>   
> +  /* Get the codeset information from the locale file.  */
> +  static const int codeset_idx[] =
> +    {
> +      [__LC_CTYPE] = _NL_ITEM_INDEX (CODESET),
> +      [__LC_NUMERIC] = _NL_ITEM_INDEX (_NL_NUMERIC_CODESET),
> +      [__LC_TIME] = _NL_ITEM_INDEX (_NL_TIME_CODESET),
> +      [__LC_COLLATE] = _NL_ITEM_INDEX (_NL_COLLATE_CODESET),
> +      [__LC_MONETARY] = _NL_ITEM_INDEX (_NL_MONETARY_CODESET),
> +      [__LC_MESSAGES] = _NL_ITEM_INDEX (_NL_MESSAGES_CODESET),
> +      [__LC_PAPER] = _NL_ITEM_INDEX (_NL_PAPER_CODESET),
> +      [__LC_NAME] = _NL_ITEM_INDEX (_NL_NAME_CODESET),
> +      [__LC_ADDRESS] = _NL_ITEM_INDEX (_NL_ADDRESS_CODESET),
> +      [__LC_TELEPHONE] = _NL_ITEM_INDEX (_NL_TELEPHONE_CODESET),
> +      [__LC_MEASUREMENT] = _NL_ITEM_INDEX (_NL_MEASUREMENT_CODESET),
> +      [__LC_IDENTIFICATION] = _NL_ITEM_INDEX (_NL_IDENTIFICATION_CODESET)
> +    };
> +  const struct __locale_data *data;
> +  const char *locale_codeset;
> +
> +  data = (const struct __locale_data *) locale_file->data;
> +  locale_codeset = (const char *) data->values[codeset_idx[category]].string;
> +  assert (locale_codeset != NULL);
> +
> +  /* The locale codeset does not have a converter, don't use it.  */
> +  if (locale_codeset[0] != '\0' && !codeset_has_module (locale_codeset))
> +    return NULL;
> +
>     /* The LC_CTYPE category allows to check whether a locale is really
>        usable.  If the locale name contains a charset name and the
>        charset name used in the locale (present in the LC_CTYPE data) is
> @@ -256,31 +311,9 @@ _nl_find_locale (const char *locale_path, size_t locale_path_len,
>        in the locale name.  */
>     if (codeset != NULL)
>       {
> -      /* Get the codeset information from the locale file.  */
> -      static const int codeset_idx[] =
> -	{
> -	  [__LC_CTYPE] = _NL_ITEM_INDEX (CODESET),
> -	  [__LC_NUMERIC] = _NL_ITEM_INDEX (_NL_NUMERIC_CODESET),
> -	  [__LC_TIME] = _NL_ITEM_INDEX (_NL_TIME_CODESET),
> -	  [__LC_COLLATE] = _NL_ITEM_INDEX (_NL_COLLATE_CODESET),
> -	  [__LC_MONETARY] = _NL_ITEM_INDEX (_NL_MONETARY_CODESET),
> -	  [__LC_MESSAGES] = _NL_ITEM_INDEX (_NL_MESSAGES_CODESET),
> -	  [__LC_PAPER] = _NL_ITEM_INDEX (_NL_PAPER_CODESET),
> -	  [__LC_NAME] = _NL_ITEM_INDEX (_NL_NAME_CODESET),
> -	  [__LC_ADDRESS] = _NL_ITEM_INDEX (_NL_ADDRESS_CODESET),
> -	  [__LC_TELEPHONE] = _NL_ITEM_INDEX (_NL_TELEPHONE_CODESET),
> -	  [__LC_MEASUREMENT] = _NL_ITEM_INDEX (_NL_MEASUREMENT_CODESET),
> -	  [__LC_IDENTIFICATION] = _NL_ITEM_INDEX (_NL_IDENTIFICATION_CODESET)
> -	};
> -      const struct __locale_data *data;
> -      const char *locale_codeset;
>         char *clocale_codeset;
>         char *ccodeset;
>   
> -      data = (const struct __locale_data *) locale_file->data;
> -      locale_codeset =
> -	(const char *) data->values[codeset_idx[category]].string;
> -      assert (locale_codeset != NULL);
>         /* Note the length of the allocated memory: +3 for up to two slashes
>   	 and the NUL byte.  */
>         clocale_codeset = (char *) alloca (strlen (locale_codeset) + 3);
> diff --git a/localedata/Makefile b/localedata/Makefile
> index 14e04cd3c5..2af399ec51 100644
> --- a/localedata/Makefile
> +++ b/localedata/Makefile
> @@ -124,11 +124,13 @@ test-input := \
>   test-input-data = $(addsuffix .in, $(test-input))
>   test-output := $(foreach s, .out .xout, \
>   			 $(addsuffix $s, $(basename $(test-input))))
> +# Note that tst-invalid-charset depends on test5 and test6 being locales that
> +# do not have valid charset converters.
>   ld-test-names := test1 test2 test3 test4 test5 test6 test7
>   ld-test-srcs := $(addprefix tests/,$(addsuffix .cm,$(ld-test-names)) \
>   				   $(addsuffix .def,$(ld-test-names)) \
>   				   $(addsuffix .ds,test5 test6) \
> -				   test6.c trans.def)
> +				   trans.def)
>   
>   fmon-tests = n01y12 n02n40 n10y31 n11y41 n12y11 n20n32 n30y20 n41n00 \
>   	     y01y10 y02n22 y22n42 y30y21 y32n31 y40y00 y42n21
> @@ -158,7 +160,7 @@ tests = $(locale_test_suite) tst-digits tst-setlocale bug-iconv-trans \
>   	tst-leaks tst-mbswcs1 tst-mbswcs2 tst-mbswcs3 tst-mbswcs4 tst-mbswcs5 \
>   	tst-mbswcs6 tst-xlocale1 tst-xlocale2 bug-usesetlocale \
>   	tst-strfmon1 tst-sscanf bug-setlocale1 tst-setlocale2 tst-setlocale3 \
> -	tst-wctype tst-iconv-math-trans
> +	tst-wctype tst-iconv-math-trans tst-invalid-charset
>   tests-static = bug-setlocale1-static
>   tests += $(tests-static)
>   ifeq (yes,$(build-shared))
> @@ -401,7 +403,10 @@ $(objpfx)tst-langinfo-setlocale-static.out: tst-langinfo.sh \
>   		 '$(run-program-env)' '$(test-program-cmd-after-env)' > $@; \
>   	$(evaluate-test)
>   
> +# These tests depend on tst-locale because they use the locales compiled by
> +# that test.
>   $(objpfx)tst-digits.out: $(objpfx)tst-locale.out
> +$(objpfx)tst-invalid-charset.out: $(objpfx)tst-locale.out
>   $(objpfx)tst-mbswcs6.out: $(addprefix $(objpfx),$(CTYPE_FILES))
>   endif
>   
> @@ -461,7 +466,8 @@ $(objpfx)mtrace-tst-leaks.out: $(objpfx)tst-leaks.out
>   	$(common-objpfx)malloc/mtrace $(objpfx)tst-leaks.mtrace > $@; \
>   	$(evaluate-test)
>   
> -bug-setlocale1-ENV-only = LOCPATH=$(objpfx) LC_CTYPE=de_DE.UTF-8
> +bug-setlocale1-ENV-only = GCONV_PATH=$(common-objpfx)iconvdata \
> +			  LOCPATH=$(objpfx) LC_CTYPE=de_DE.UTF-8
>   bug-setlocale1-static-ENV-only = $(bug-setlocale1-ENV-only)
>   
>   $(objdir)/iconvdata/gconv-modules:
> diff --git a/localedata/tests/test6.c b/localedata/tests/test6.c
> deleted file mode 100644
> index edb5fe4a5f..0000000000
> --- a/localedata/tests/test6.c
> +++ /dev/null
> @@ -1,137 +0,0 @@
> -/* Test program for character classes and mappings.
> -   Copyright (C) 1999-2021 Free Software Foundation, Inc.
> -   This file is part of the GNU C Library.
> -   Contributed by Ulrich Drepper <drepper@cygnus.com>, 1999.
> -
> -   The GNU C Library is free software; you can redistribute it and/or
> -   modify it under the terms of the GNU Lesser General Public
> -   License as published by the Free Software Foundation; either
> -   version 2.1 of the License, or (at your option) any later version.
> -
> -   The GNU C Library is distributed in the hope that it will be useful,
> -   but WITHOUT ANY WARRANTY; without even the implied warranty of
> -   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> -   Lesser General Public License for more details.
> -
> -   You should have received a copy of the GNU Lesser General Public
> -   License along with the GNU C Library; if not, see
> -   <https://www.gnu.org/licenses/>.  */
> -
> -#include <ctype.h>
> -#include <locale.h>
> -#include <wchar.h>
> -
> -
> -int
> -main (void)
> -{
> -  const char lower[] = "abcdefghijklmnopqrstuvwxyz";
> -  const char upper[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
> -#define LEN (sizeof (upper) - 1)
> -  const wchar_t wlower[] = L"abcdefghijklmnopqrstuvwxyz";
> -  const wchar_t wupper[] = L"ABCDEFGHIJKLMNOPQRSTUVWXYZ";
> -  int i;
> -  int result = 0;
> -
> -  setlocale (LC_ALL, "test6");
> -
> -  for (i = 0; i < LEN; ++i)
> -    {
> -      /* Test basic table handling (basic == not more than 256 characters).
> -	 The charmaps swaps the normal lower-upper case meaning of the
> -	 ASCII characters used in the source code while the Unicode mapping
> -	 in the repertoire map has the normal correspondents.  This test
> -	 shows the independence of the tables for `char' and `wchar_t'
> -	 characters.  */
> -
> -      if (islower (lower[i]))
> -	{
> -	  printf ("islower ('%c') false\n", lower[i]);
> -	  result = 1;
> -	}
> -      if (! isupper (lower[i]))
> -	{
> -	  printf ("isupper ('%c') false\n", lower[i]);
> -	  result = 1;
> -	}
> -
> -      if (! islower (upper[i]))
> -	{
> -	  printf ("islower ('%c') false\n", upper[i]);
> -	  result = 1;
> -	}
> -      if (isupper (upper[i]))
> -	{
> -	  printf ("isupper ('%c') false\n", upper[i]);
> -	  result = 1;
> -	}
> -
> -      if (toupper (lower[i]) != lower[i])
> -	{
> -	  printf ("toupper ('%c') false\n", lower[i]);
> -	  result = 1;
> -	}
> -      if (tolower (lower[i]) != upper[i])
> -	{
> -	  printf ("tolower ('%c') false\n", lower[i]);
> -	  result = 1;
> -	}
> -
> -      if (tolower (upper[i]) != upper[i])
> -	{
> -	  printf ("tolower ('%c') false\n", upper[i]);
> -	  result = 1;
> -	}
> -      if (toupper (upper[i]) != lower[i])
> -	{
> -	  printf ("toupper ('%c') false\n", upper[i]);
> -	  result = 1;
> -	}
> -
> -      if (iswlower (wupper[i]))
> -	{
> -	  printf ("iswlower (L'%c') false\n", upper[i]);
> -	  result = 1;
> -	}
> -      if (! iswupper (wupper[i]))
> -	{
> -	  printf ("iswupper (L'%c') false\n", upper[i]);
> -	  result = 1;
> -	}
> -
> -      if (iswupper (wlower[i]))
> -	{
> -	  printf ("iswupper (L'%c') false\n", lower[i]);
> -	  result = 1;
> -	}
> -      if (! iswlower (wlower[i]))
> -	{
> -	  printf ("iswlower (L'%c') false\n", lower[i]);
> -	  result = 1;
> -	}
> -
> -      if (towupper (wlower[i]) != wupper[i])
> -	{
> -	  printf ("towupper ('%c') false\n", lower[i]);
> -	  result = 1;
> -	}
> -      if (towlower (wlower[i]) != wlower[i])
> -	{
> -	  printf ("towlower ('%c') false\n", lower[i]);
> -	  result = 1;
> -	}
> -
> -      if (towlower (wupper[i]) != wlower[i])
> -	{
> -	  printf ("towlower ('%c') false\n", upper[i]);
> -	  result = 1;
> -	}
> -      if (towupper (wupper[i]) != wupper[i])
> -	{
> -	  printf ("towupper ('%c') false\n", upper[i]);
> -	  result = 1;
> -	}
> -    }
> -
> -  return result;
> -}
> diff --git a/localedata/tst-invalid-charset.c b/localedata/tst-invalid-charset.c
> new file mode 100644
> index 0000000000..46a5198c66
> --- /dev/null
> +++ b/localedata/tst-invalid-charset.c
> @@ -0,0 +1,31 @@
> +/* Test program to verify that setlocale fails for charsets that do not have a
> +   converter.
> +   Copyright (C) 2021 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <ctype.h>
> +#include <locale.h>
> +#include <wchar.h>
> +
> +
> +int
> +main (void)
> +{
> +  /* Fail if setlocale succeeds for any of these locales.  */
> +  return (setlocale (LC_ALL, "test5") != NULL
> +	  || setlocale (LC_ALL, "test6") != NULL);
> +}
> 


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PING2][PATCH v2] setlocale: Fail if iconv module for charset is not present [BZ #27996]
  2021-08-11  7:42       ` [PING][PATCH " Siddhesh Poyarekar
@ 2021-08-17  2:58         ` Siddhesh Poyarekar
  2021-10-18 13:16           ` [PING3][PATCH " Siddhesh Poyarekar
  0 siblings, 1 reply; 9+ messages in thread
From: Siddhesh Poyarekar @ 2021-08-17  2:58 UTC (permalink / raw)
  To: libc-alpha

Ping!

On 8/11/21 1:12 PM, Siddhesh Poyarekar via Libc-alpha wrote:
> Ping!
> 
> On 7/20/21 8:06 AM, Siddhesh Poyarekar via Libc-alpha wrote:
>> setlocale currently succeeds even if the requested locale uses a
>> charset that does not have a converter module installed.  Check for
>> existence of the charset (either the one requested through the input
>> name or the one needed by the selected locale file) and fail if it
>> doesn't.
>>
>> The new test tst-invalid-charset verifes that loading test5 and test6
>> locales fail because both locales have charsets without a converter,
>> viz. test5 and test6 respectively.  Also, test6.c has been removed as
>> it was unused.
>> ---
>> Changes from v1:
>> - Find full transformation paths both ways instead of merely looking for
>>    a FROM converter.
>>
>>   locale/findlocale.c              |  77 ++++++++++++-----
>>   localedata/Makefile              |  12 ++-
>>   localedata/tests/test6.c         | 137 -------------------------------
>>   localedata/tst-invalid-charset.c |  31 +++++++
>>   4 files changed, 95 insertions(+), 162 deletions(-)
>>   delete mode 100644 localedata/tests/test6.c
>>   create mode 100644 localedata/tst-invalid-charset.c
>>
>> diff --git a/locale/findlocale.c b/locale/findlocale.c
>> index ab09122b0c..7ccc98cd8b 100644
>> --- a/locale/findlocale.c
>> +++ b/locale/findlocale.c
>> @@ -98,6 +98,30 @@ valid_locale_name (const char *name)
>>     return 1;
>>   }
>> +/* Return true if we have gconv modules to transform between the 
>> INTERNAL
>> +   encoding and CODESET.  */
>> +static bool
>> +codeset_has_module (const char *codeset)
>> +{
>> +  struct __gconv_step *steps;
>> +  size_t nsteps;
>> +
>> +  char *ccodeset = (char *) alloca (strlen (codeset) + 3);
>> +  strip (ccodeset, codeset);
>> +
>> +  if (__gconv_find_transform ("INTERNAL", ccodeset, &steps, &nsteps, 0)
>> +      != __GCONV_OK)
>> +    return false;
>> +  __gconv_close_transform (steps, nsteps);
>> +
>> +  if (__gconv_find_transform (ccodeset, "INTERNAL", &steps, &nsteps, 0)
>> +      != __GCONV_OK)
>> +    return false;
>> +  __gconv_close_transform (steps, nsteps);
>> +
>> +  return true;
>> +}
>> +
>>   struct __locale_data *
>>   _nl_find_locale (const char *locale_path, size_t locale_path_len,
>>            int category, const char **name)
>> @@ -200,6 +224,10 @@ _nl_find_locale (const char *locale_path, size_t 
>> locale_path_len,
>>       /* Memory allocate problem.  */
>>       return NULL;
>> +  /* The requested codeset does not have a converter, don't use it.  */
>> +  if (codeset != NULL && !codeset_has_module (codeset))
>> +    return NULL;
>> +
>>     /* If exactly this locale was already asked for we have an entry with
>>        the complete name.  */
>>     locale_file = _nl_make_l10nflist (&_nl_locale_file_list[category],
>> @@ -248,6 +276,33 @@ _nl_find_locale (const char *locale_path, size_t 
>> locale_path_len,
>>       return NULL;
>>       }
>> +  /* Get the codeset information from the locale file.  */
>> +  static const int codeset_idx[] =
>> +    {
>> +      [__LC_CTYPE] = _NL_ITEM_INDEX (CODESET),
>> +      [__LC_NUMERIC] = _NL_ITEM_INDEX (_NL_NUMERIC_CODESET),
>> +      [__LC_TIME] = _NL_ITEM_INDEX (_NL_TIME_CODESET),
>> +      [__LC_COLLATE] = _NL_ITEM_INDEX (_NL_COLLATE_CODESET),
>> +      [__LC_MONETARY] = _NL_ITEM_INDEX (_NL_MONETARY_CODESET),
>> +      [__LC_MESSAGES] = _NL_ITEM_INDEX (_NL_MESSAGES_CODESET),
>> +      [__LC_PAPER] = _NL_ITEM_INDEX (_NL_PAPER_CODESET),
>> +      [__LC_NAME] = _NL_ITEM_INDEX (_NL_NAME_CODESET),
>> +      [__LC_ADDRESS] = _NL_ITEM_INDEX (_NL_ADDRESS_CODESET),
>> +      [__LC_TELEPHONE] = _NL_ITEM_INDEX (_NL_TELEPHONE_CODESET),
>> +      [__LC_MEASUREMENT] = _NL_ITEM_INDEX (_NL_MEASUREMENT_CODESET),
>> +      [__LC_IDENTIFICATION] = _NL_ITEM_INDEX 
>> (_NL_IDENTIFICATION_CODESET)
>> +    };
>> +  const struct __locale_data *data;
>> +  const char *locale_codeset;
>> +
>> +  data = (const struct __locale_data *) locale_file->data;
>> +  locale_codeset = (const char *) 
>> data->values[codeset_idx[category]].string;
>> +  assert (locale_codeset != NULL);
>> +
>> +  /* The locale codeset does not have a converter, don't use it.  */
>> +  if (locale_codeset[0] != '\0' && !codeset_has_module (locale_codeset))
>> +    return NULL;
>> +
>>     /* The LC_CTYPE category allows to check whether a locale is really
>>        usable.  If the locale name contains a charset name and the
>>        charset name used in the locale (present in the LC_CTYPE data) is
>> @@ -256,31 +311,9 @@ _nl_find_locale (const char *locale_path, size_t 
>> locale_path_len,
>>        in the locale name.  */
>>     if (codeset != NULL)
>>       {
>> -      /* Get the codeset information from the locale file.  */
>> -      static const int codeset_idx[] =
>> -    {
>> -      [__LC_CTYPE] = _NL_ITEM_INDEX (CODESET),
>> -      [__LC_NUMERIC] = _NL_ITEM_INDEX (_NL_NUMERIC_CODESET),
>> -      [__LC_TIME] = _NL_ITEM_INDEX (_NL_TIME_CODESET),
>> -      [__LC_COLLATE] = _NL_ITEM_INDEX (_NL_COLLATE_CODESET),
>> -      [__LC_MONETARY] = _NL_ITEM_INDEX (_NL_MONETARY_CODESET),
>> -      [__LC_MESSAGES] = _NL_ITEM_INDEX (_NL_MESSAGES_CODESET),
>> -      [__LC_PAPER] = _NL_ITEM_INDEX (_NL_PAPER_CODESET),
>> -      [__LC_NAME] = _NL_ITEM_INDEX (_NL_NAME_CODESET),
>> -      [__LC_ADDRESS] = _NL_ITEM_INDEX (_NL_ADDRESS_CODESET),
>> -      [__LC_TELEPHONE] = _NL_ITEM_INDEX (_NL_TELEPHONE_CODESET),
>> -      [__LC_MEASUREMENT] = _NL_ITEM_INDEX (_NL_MEASUREMENT_CODESET),
>> -      [__LC_IDENTIFICATION] = _NL_ITEM_INDEX 
>> (_NL_IDENTIFICATION_CODESET)
>> -    };
>> -      const struct __locale_data *data;
>> -      const char *locale_codeset;
>>         char *clocale_codeset;
>>         char *ccodeset;
>> -      data = (const struct __locale_data *) locale_file->data;
>> -      locale_codeset =
>> -    (const char *) data->values[codeset_idx[category]].string;
>> -      assert (locale_codeset != NULL);
>>         /* Note the length of the allocated memory: +3 for up to two 
>> slashes
>>        and the NUL byte.  */
>>         clocale_codeset = (char *) alloca (strlen (locale_codeset) + 3);
>> diff --git a/localedata/Makefile b/localedata/Makefile
>> index 14e04cd3c5..2af399ec51 100644
>> --- a/localedata/Makefile
>> +++ b/localedata/Makefile
>> @@ -124,11 +124,13 @@ test-input := \
>>   test-input-data = $(addsuffix .in, $(test-input))
>>   test-output := $(foreach s, .out .xout, \
>>                $(addsuffix $s, $(basename $(test-input))))
>> +# Note that tst-invalid-charset depends on test5 and test6 being 
>> locales that
>> +# do not have valid charset converters.
>>   ld-test-names := test1 test2 test3 test4 test5 test6 test7
>>   ld-test-srcs := $(addprefix tests/,$(addsuffix .cm,$(ld-test-names)) \
>>                      $(addsuffix .def,$(ld-test-names)) \
>>                      $(addsuffix .ds,test5 test6) \
>> -                   test6.c trans.def)
>> +                   trans.def)
>>   fmon-tests = n01y12 n02n40 n10y31 n11y41 n12y11 n20n32 n30y20 n41n00 \
>>            y01y10 y02n22 y22n42 y30y21 y32n31 y40y00 y42n21
>> @@ -158,7 +160,7 @@ tests = $(locale_test_suite) tst-digits 
>> tst-setlocale bug-iconv-trans \
>>       tst-leaks tst-mbswcs1 tst-mbswcs2 tst-mbswcs3 tst-mbswcs4 
>> tst-mbswcs5 \
>>       tst-mbswcs6 tst-xlocale1 tst-xlocale2 bug-usesetlocale \
>>       tst-strfmon1 tst-sscanf bug-setlocale1 tst-setlocale2 
>> tst-setlocale3 \
>> -    tst-wctype tst-iconv-math-trans
>> +    tst-wctype tst-iconv-math-trans tst-invalid-charset
>>   tests-static = bug-setlocale1-static
>>   tests += $(tests-static)
>>   ifeq (yes,$(build-shared))
>> @@ -401,7 +403,10 @@ $(objpfx)tst-langinfo-setlocale-static.out: 
>> tst-langinfo.sh \
>>            '$(run-program-env)' '$(test-program-cmd-after-env)' > $@; \
>>       $(evaluate-test)
>> +# These tests depend on tst-locale because they use the locales 
>> compiled by
>> +# that test.
>>   $(objpfx)tst-digits.out: $(objpfx)tst-locale.out
>> +$(objpfx)tst-invalid-charset.out: $(objpfx)tst-locale.out
>>   $(objpfx)tst-mbswcs6.out: $(addprefix $(objpfx),$(CTYPE_FILES))
>>   endif
>> @@ -461,7 +466,8 @@ $(objpfx)mtrace-tst-leaks.out: $(objpfx)tst-leaks.out
>>       $(common-objpfx)malloc/mtrace $(objpfx)tst-leaks.mtrace > $@; \
>>       $(evaluate-test)
>> -bug-setlocale1-ENV-only = LOCPATH=$(objpfx) LC_CTYPE=de_DE.UTF-8
>> +bug-setlocale1-ENV-only = GCONV_PATH=$(common-objpfx)iconvdata \
>> +              LOCPATH=$(objpfx) LC_CTYPE=de_DE.UTF-8
>>   bug-setlocale1-static-ENV-only = $(bug-setlocale1-ENV-only)
>>   $(objdir)/iconvdata/gconv-modules:
>> diff --git a/localedata/tests/test6.c b/localedata/tests/test6.c
>> deleted file mode 100644
>> index edb5fe4a5f..0000000000
>> --- a/localedata/tests/test6.c
>> +++ /dev/null
>> @@ -1,137 +0,0 @@
>> -/* Test program for character classes and mappings.
>> -   Copyright (C) 1999-2021 Free Software Foundation, Inc.
>> -   This file is part of the GNU C Library.
>> -   Contributed by Ulrich Drepper <drepper@cygnus.com>, 1999.
>> -
>> -   The GNU C Library is free software; you can redistribute it and/or
>> -   modify it under the terms of the GNU Lesser General Public
>> -   License as published by the Free Software Foundation; either
>> -   version 2.1 of the License, or (at your option) any later version.
>> -
>> -   The GNU C Library is distributed in the hope that it will be useful,
>> -   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> -   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> -   Lesser General Public License for more details.
>> -
>> -   You should have received a copy of the GNU Lesser General Public
>> -   License along with the GNU C Library; if not, see
>> -   <https://www.gnu.org/licenses/>.  */
>> -
>> -#include <ctype.h>
>> -#include <locale.h>
>> -#include <wchar.h>
>> -
>> -
>> -int
>> -main (void)
>> -{
>> -  const char lower[] = "abcdefghijklmnopqrstuvwxyz";
>> -  const char upper[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
>> -#define LEN (sizeof (upper) - 1)
>> -  const wchar_t wlower[] = L"abcdefghijklmnopqrstuvwxyz";
>> -  const wchar_t wupper[] = L"ABCDEFGHIJKLMNOPQRSTUVWXYZ";
>> -  int i;
>> -  int result = 0;
>> -
>> -  setlocale (LC_ALL, "test6");
>> -
>> -  for (i = 0; i < LEN; ++i)
>> -    {
>> -      /* Test basic table handling (basic == not more than 256 
>> characters).
>> -     The charmaps swaps the normal lower-upper case meaning of the
>> -     ASCII characters used in the source code while the Unicode mapping
>> -     in the repertoire map has the normal correspondents.  This test
>> -     shows the independence of the tables for `char' and `wchar_t'
>> -     characters.  */
>> -
>> -      if (islower (lower[i]))
>> -    {
>> -      printf ("islower ('%c') false\n", lower[i]);
>> -      result = 1;
>> -    }
>> -      if (! isupper (lower[i]))
>> -    {
>> -      printf ("isupper ('%c') false\n", lower[i]);
>> -      result = 1;
>> -    }
>> -
>> -      if (! islower (upper[i]))
>> -    {
>> -      printf ("islower ('%c') false\n", upper[i]);
>> -      result = 1;
>> -    }
>> -      if (isupper (upper[i]))
>> -    {
>> -      printf ("isupper ('%c') false\n", upper[i]);
>> -      result = 1;
>> -    }
>> -
>> -      if (toupper (lower[i]) != lower[i])
>> -    {
>> -      printf ("toupper ('%c') false\n", lower[i]);
>> -      result = 1;
>> -    }
>> -      if (tolower (lower[i]) != upper[i])
>> -    {
>> -      printf ("tolower ('%c') false\n", lower[i]);
>> -      result = 1;
>> -    }
>> -
>> -      if (tolower (upper[i]) != upper[i])
>> -    {
>> -      printf ("tolower ('%c') false\n", upper[i]);
>> -      result = 1;
>> -    }
>> -      if (toupper (upper[i]) != lower[i])
>> -    {
>> -      printf ("toupper ('%c') false\n", upper[i]);
>> -      result = 1;
>> -    }
>> -
>> -      if (iswlower (wupper[i]))
>> -    {
>> -      printf ("iswlower (L'%c') false\n", upper[i]);
>> -      result = 1;
>> -    }
>> -      if (! iswupper (wupper[i]))
>> -    {
>> -      printf ("iswupper (L'%c') false\n", upper[i]);
>> -      result = 1;
>> -    }
>> -
>> -      if (iswupper (wlower[i]))
>> -    {
>> -      printf ("iswupper (L'%c') false\n", lower[i]);
>> -      result = 1;
>> -    }
>> -      if (! iswlower (wlower[i]))
>> -    {
>> -      printf ("iswlower (L'%c') false\n", lower[i]);
>> -      result = 1;
>> -    }
>> -
>> -      if (towupper (wlower[i]) != wupper[i])
>> -    {
>> -      printf ("towupper ('%c') false\n", lower[i]);
>> -      result = 1;
>> -    }
>> -      if (towlower (wlower[i]) != wlower[i])
>> -    {
>> -      printf ("towlower ('%c') false\n", lower[i]);
>> -      result = 1;
>> -    }
>> -
>> -      if (towlower (wupper[i]) != wlower[i])
>> -    {
>> -      printf ("towlower ('%c') false\n", upper[i]);
>> -      result = 1;
>> -    }
>> -      if (towupper (wupper[i]) != wupper[i])
>> -    {
>> -      printf ("towupper ('%c') false\n", upper[i]);
>> -      result = 1;
>> -    }
>> -    }
>> -
>> -  return result;
>> -}
>> diff --git a/localedata/tst-invalid-charset.c 
>> b/localedata/tst-invalid-charset.c
>> new file mode 100644
>> index 0000000000..46a5198c66
>> --- /dev/null
>> +++ b/localedata/tst-invalid-charset.c
>> @@ -0,0 +1,31 @@
>> +/* Test program to verify that setlocale fails for charsets that do 
>> not have a
>> +   converter.
>> +   Copyright (C) 2021 Free Software Foundation, Inc.
>> +   This file is part of the GNU C Library.
>> +
>> +   The GNU C Library is free software; you can redistribute it and/or
>> +   modify it under the terms of the GNU Lesser General Public
>> +   License as published by the Free Software Foundation; either
>> +   version 2.1 of the License, or (at your option) any later version.
>> +
>> +   The GNU C Library is distributed in the hope that it will be useful,
>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> +   Lesser General Public License for more details.
>> +
>> +   You should have received a copy of the GNU Lesser General Public
>> +   License along with the GNU C Library; if not, see
>> +   <https://www.gnu.org/licenses/>.  */
>> +
>> +#include <ctype.h>
>> +#include <locale.h>
>> +#include <wchar.h>
>> +
>> +
>> +int
>> +main (void)
>> +{
>> +  /* Fail if setlocale succeeds for any of these locales.  */
>> +  return (setlocale (LC_ALL, "test5") != NULL
>> +      || setlocale (LC_ALL, "test6") != NULL);
>> +}
>>
> 


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PING3][PATCH v2] setlocale: Fail if iconv module for charset is not present [BZ #27996]
  2021-08-17  2:58         ` [PING2][PATCH " Siddhesh Poyarekar
@ 2021-10-18 13:16           ` Siddhesh Poyarekar
  0 siblings, 0 replies; 9+ messages in thread
From: Siddhesh Poyarekar @ 2021-10-18 13:16 UTC (permalink / raw)
  To: libc-alpha, Carlos O'Donell

Ping!

On 8/17/21 08:28, Siddhesh Poyarekar wrote:
> Ping!
> 
> On 8/11/21 1:12 PM, Siddhesh Poyarekar via Libc-alpha wrote:
>> Ping!
>>
>> On 7/20/21 8:06 AM, Siddhesh Poyarekar via Libc-alpha wrote:
>>> setlocale currently succeeds even if the requested locale uses a
>>> charset that does not have a converter module installed.  Check for
>>> existence of the charset (either the one requested through the input
>>> name or the one needed by the selected locale file) and fail if it
>>> doesn't.
>>>
>>> The new test tst-invalid-charset verifes that loading test5 and test6
>>> locales fail because both locales have charsets without a converter,
>>> viz. test5 and test6 respectively.  Also, test6.c has been removed as
>>> it was unused.
>>> ---
>>> Changes from v1:
>>> - Find full transformation paths both ways instead of merely looking for
>>>    a FROM converter.
>>>
>>>   locale/findlocale.c              |  77 ++++++++++++-----
>>>   localedata/Makefile              |  12 ++-
>>>   localedata/tests/test6.c         | 137 -------------------------------
>>>   localedata/tst-invalid-charset.c |  31 +++++++
>>>   4 files changed, 95 insertions(+), 162 deletions(-)
>>>   delete mode 100644 localedata/tests/test6.c
>>>   create mode 100644 localedata/tst-invalid-charset.c
>>>
>>> diff --git a/locale/findlocale.c b/locale/findlocale.c
>>> index ab09122b0c..7ccc98cd8b 100644
>>> --- a/locale/findlocale.c
>>> +++ b/locale/findlocale.c
>>> @@ -98,6 +98,30 @@ valid_locale_name (const char *name)
>>>     return 1;
>>>   }
>>> +/* Return true if we have gconv modules to transform between the 
>>> INTERNAL
>>> +   encoding and CODESET.  */
>>> +static bool
>>> +codeset_has_module (const char *codeset)
>>> +{
>>> +  struct __gconv_step *steps;
>>> +  size_t nsteps;
>>> +
>>> +  char *ccodeset = (char *) alloca (strlen (codeset) + 3);
>>> +  strip (ccodeset, codeset);
>>> +
>>> +  if (__gconv_find_transform ("INTERNAL", ccodeset, &steps, &nsteps, 0)
>>> +      != __GCONV_OK)
>>> +    return false;
>>> +  __gconv_close_transform (steps, nsteps);
>>> +
>>> +  if (__gconv_find_transform (ccodeset, "INTERNAL", &steps, &nsteps, 0)
>>> +      != __GCONV_OK)
>>> +    return false;
>>> +  __gconv_close_transform (steps, nsteps);
>>> +
>>> +  return true;
>>> +}
>>> +
>>>   struct __locale_data *
>>>   _nl_find_locale (const char *locale_path, size_t locale_path_len,
>>>            int category, const char **name)
>>> @@ -200,6 +224,10 @@ _nl_find_locale (const char *locale_path, size_t 
>>> locale_path_len,
>>>       /* Memory allocate problem.  */
>>>       return NULL;
>>> +  /* The requested codeset does not have a converter, don't use it.  */
>>> +  if (codeset != NULL && !codeset_has_module (codeset))
>>> +    return NULL;
>>> +
>>>     /* If exactly this locale was already asked for we have an entry 
>>> with
>>>        the complete name.  */
>>>     locale_file = _nl_make_l10nflist (&_nl_locale_file_list[category],
>>> @@ -248,6 +276,33 @@ _nl_find_locale (const char *locale_path, size_t 
>>> locale_path_len,
>>>       return NULL;
>>>       }
>>> +  /* Get the codeset information from the locale file.  */
>>> +  static const int codeset_idx[] =
>>> +    {
>>> +      [__LC_CTYPE] = _NL_ITEM_INDEX (CODESET),
>>> +      [__LC_NUMERIC] = _NL_ITEM_INDEX (_NL_NUMERIC_CODESET),
>>> +      [__LC_TIME] = _NL_ITEM_INDEX (_NL_TIME_CODESET),
>>> +      [__LC_COLLATE] = _NL_ITEM_INDEX (_NL_COLLATE_CODESET),
>>> +      [__LC_MONETARY] = _NL_ITEM_INDEX (_NL_MONETARY_CODESET),
>>> +      [__LC_MESSAGES] = _NL_ITEM_INDEX (_NL_MESSAGES_CODESET),
>>> +      [__LC_PAPER] = _NL_ITEM_INDEX (_NL_PAPER_CODESET),
>>> +      [__LC_NAME] = _NL_ITEM_INDEX (_NL_NAME_CODESET),
>>> +      [__LC_ADDRESS] = _NL_ITEM_INDEX (_NL_ADDRESS_CODESET),
>>> +      [__LC_TELEPHONE] = _NL_ITEM_INDEX (_NL_TELEPHONE_CODESET),
>>> +      [__LC_MEASUREMENT] = _NL_ITEM_INDEX (_NL_MEASUREMENT_CODESET),
>>> +      [__LC_IDENTIFICATION] = _NL_ITEM_INDEX 
>>> (_NL_IDENTIFICATION_CODESET)
>>> +    };
>>> +  const struct __locale_data *data;
>>> +  const char *locale_codeset;
>>> +
>>> +  data = (const struct __locale_data *) locale_file->data;
>>> +  locale_codeset = (const char *) 
>>> data->values[codeset_idx[category]].string;
>>> +  assert (locale_codeset != NULL);
>>> +
>>> +  /* The locale codeset does not have a converter, don't use it.  */
>>> +  if (locale_codeset[0] != '\0' && !codeset_has_module 
>>> (locale_codeset))
>>> +    return NULL;
>>> +
>>>     /* The LC_CTYPE category allows to check whether a locale is really
>>>        usable.  If the locale name contains a charset name and the
>>>        charset name used in the locale (present in the LC_CTYPE data) is
>>> @@ -256,31 +311,9 @@ _nl_find_locale (const char *locale_path, size_t 
>>> locale_path_len,
>>>        in the locale name.  */
>>>     if (codeset != NULL)
>>>       {
>>> -      /* Get the codeset information from the locale file.  */
>>> -      static const int codeset_idx[] =
>>> -    {
>>> -      [__LC_CTYPE] = _NL_ITEM_INDEX (CODESET),
>>> -      [__LC_NUMERIC] = _NL_ITEM_INDEX (_NL_NUMERIC_CODESET),
>>> -      [__LC_TIME] = _NL_ITEM_INDEX (_NL_TIME_CODESET),
>>> -      [__LC_COLLATE] = _NL_ITEM_INDEX (_NL_COLLATE_CODESET),
>>> -      [__LC_MONETARY] = _NL_ITEM_INDEX (_NL_MONETARY_CODESET),
>>> -      [__LC_MESSAGES] = _NL_ITEM_INDEX (_NL_MESSAGES_CODESET),
>>> -      [__LC_PAPER] = _NL_ITEM_INDEX (_NL_PAPER_CODESET),
>>> -      [__LC_NAME] = _NL_ITEM_INDEX (_NL_NAME_CODESET),
>>> -      [__LC_ADDRESS] = _NL_ITEM_INDEX (_NL_ADDRESS_CODESET),
>>> -      [__LC_TELEPHONE] = _NL_ITEM_INDEX (_NL_TELEPHONE_CODESET),
>>> -      [__LC_MEASUREMENT] = _NL_ITEM_INDEX (_NL_MEASUREMENT_CODESET),
>>> -      [__LC_IDENTIFICATION] = _NL_ITEM_INDEX 
>>> (_NL_IDENTIFICATION_CODESET)
>>> -    };
>>> -      const struct __locale_data *data;
>>> -      const char *locale_codeset;
>>>         char *clocale_codeset;
>>>         char *ccodeset;
>>> -      data = (const struct __locale_data *) locale_file->data;
>>> -      locale_codeset =
>>> -    (const char *) data->values[codeset_idx[category]].string;
>>> -      assert (locale_codeset != NULL);
>>>         /* Note the length of the allocated memory: +3 for up to two 
>>> slashes
>>>        and the NUL byte.  */
>>>         clocale_codeset = (char *) alloca (strlen (locale_codeset) + 3);
>>> diff --git a/localedata/Makefile b/localedata/Makefile
>>> index 14e04cd3c5..2af399ec51 100644
>>> --- a/localedata/Makefile
>>> +++ b/localedata/Makefile
>>> @@ -124,11 +124,13 @@ test-input := \
>>>   test-input-data = $(addsuffix .in, $(test-input))
>>>   test-output := $(foreach s, .out .xout, \
>>>                $(addsuffix $s, $(basename $(test-input))))
>>> +# Note that tst-invalid-charset depends on test5 and test6 being 
>>> locales that
>>> +# do not have valid charset converters.
>>>   ld-test-names := test1 test2 test3 test4 test5 test6 test7
>>>   ld-test-srcs := $(addprefix tests/,$(addsuffix .cm,$(ld-test-names)) \
>>>                      $(addsuffix .def,$(ld-test-names)) \
>>>                      $(addsuffix .ds,test5 test6) \
>>> -                   test6.c trans.def)
>>> +                   trans.def)
>>>   fmon-tests = n01y12 n02n40 n10y31 n11y41 n12y11 n20n32 n30y20 n41n00 \
>>>            y01y10 y02n22 y22n42 y30y21 y32n31 y40y00 y42n21
>>> @@ -158,7 +160,7 @@ tests = $(locale_test_suite) tst-digits 
>>> tst-setlocale bug-iconv-trans \
>>>       tst-leaks tst-mbswcs1 tst-mbswcs2 tst-mbswcs3 tst-mbswcs4 
>>> tst-mbswcs5 \
>>>       tst-mbswcs6 tst-xlocale1 tst-xlocale2 bug-usesetlocale \
>>>       tst-strfmon1 tst-sscanf bug-setlocale1 tst-setlocale2 
>>> tst-setlocale3 \
>>> -    tst-wctype tst-iconv-math-trans
>>> +    tst-wctype tst-iconv-math-trans tst-invalid-charset
>>>   tests-static = bug-setlocale1-static
>>>   tests += $(tests-static)
>>>   ifeq (yes,$(build-shared))
>>> @@ -401,7 +403,10 @@ $(objpfx)tst-langinfo-setlocale-static.out: 
>>> tst-langinfo.sh \
>>>            '$(run-program-env)' '$(test-program-cmd-after-env)' > $@; \
>>>       $(evaluate-test)
>>> +# These tests depend on tst-locale because they use the locales 
>>> compiled by
>>> +# that test.
>>>   $(objpfx)tst-digits.out: $(objpfx)tst-locale.out
>>> +$(objpfx)tst-invalid-charset.out: $(objpfx)tst-locale.out
>>>   $(objpfx)tst-mbswcs6.out: $(addprefix $(objpfx),$(CTYPE_FILES))
>>>   endif
>>> @@ -461,7 +466,8 @@ $(objpfx)mtrace-tst-leaks.out: 
>>> $(objpfx)tst-leaks.out
>>>       $(common-objpfx)malloc/mtrace $(objpfx)tst-leaks.mtrace > $@; \
>>>       $(evaluate-test)
>>> -bug-setlocale1-ENV-only = LOCPATH=$(objpfx) LC_CTYPE=de_DE.UTF-8
>>> +bug-setlocale1-ENV-only = GCONV_PATH=$(common-objpfx)iconvdata \
>>> +              LOCPATH=$(objpfx) LC_CTYPE=de_DE.UTF-8
>>>   bug-setlocale1-static-ENV-only = $(bug-setlocale1-ENV-only)
>>>   $(objdir)/iconvdata/gconv-modules:
>>> diff --git a/localedata/tests/test6.c b/localedata/tests/test6.c
>>> deleted file mode 100644
>>> index edb5fe4a5f..0000000000
>>> --- a/localedata/tests/test6.c
>>> +++ /dev/null
>>> @@ -1,137 +0,0 @@
>>> -/* Test program for character classes and mappings.
>>> -   Copyright (C) 1999-2021 Free Software Foundation, Inc.
>>> -   This file is part of the GNU C Library.
>>> -   Contributed by Ulrich Drepper <drepper@cygnus.com>, 1999.
>>> -
>>> -   The GNU C Library is free software; you can redistribute it and/or
>>> -   modify it under the terms of the GNU Lesser General Public
>>> -   License as published by the Free Software Foundation; either
>>> -   version 2.1 of the License, or (at your option) any later version.
>>> -
>>> -   The GNU C Library is distributed in the hope that it will be useful,
>>> -   but WITHOUT ANY WARRANTY; without even the implied warranty of
>>> -   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>>> -   Lesser General Public License for more details.
>>> -
>>> -   You should have received a copy of the GNU Lesser General Public
>>> -   License along with the GNU C Library; if not, see
>>> -   <https://www.gnu.org/licenses/>.  */
>>> -
>>> -#include <ctype.h>
>>> -#include <locale.h>
>>> -#include <wchar.h>
>>> -
>>> -
>>> -int
>>> -main (void)
>>> -{
>>> -  const char lower[] = "abcdefghijklmnopqrstuvwxyz";
>>> -  const char upper[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
>>> -#define LEN (sizeof (upper) - 1)
>>> -  const wchar_t wlower[] = L"abcdefghijklmnopqrstuvwxyz";
>>> -  const wchar_t wupper[] = L"ABCDEFGHIJKLMNOPQRSTUVWXYZ";
>>> -  int i;
>>> -  int result = 0;
>>> -
>>> -  setlocale (LC_ALL, "test6");
>>> -
>>> -  for (i = 0; i < LEN; ++i)
>>> -    {
>>> -      /* Test basic table handling (basic == not more than 256 
>>> characters).
>>> -     The charmaps swaps the normal lower-upper case meaning of the
>>> -     ASCII characters used in the source code while the Unicode mapping
>>> -     in the repertoire map has the normal correspondents.  This test
>>> -     shows the independence of the tables for `char' and `wchar_t'
>>> -     characters.  */
>>> -
>>> -      if (islower (lower[i]))
>>> -    {
>>> -      printf ("islower ('%c') false\n", lower[i]);
>>> -      result = 1;
>>> -    }
>>> -      if (! isupper (lower[i]))
>>> -    {
>>> -      printf ("isupper ('%c') false\n", lower[i]);
>>> -      result = 1;
>>> -    }
>>> -
>>> -      if (! islower (upper[i]))
>>> -    {
>>> -      printf ("islower ('%c') false\n", upper[i]);
>>> -      result = 1;
>>> -    }
>>> -      if (isupper (upper[i]))
>>> -    {
>>> -      printf ("isupper ('%c') false\n", upper[i]);
>>> -      result = 1;
>>> -    }
>>> -
>>> -      if (toupper (lower[i]) != lower[i])
>>> -    {
>>> -      printf ("toupper ('%c') false\n", lower[i]);
>>> -      result = 1;
>>> -    }
>>> -      if (tolower (lower[i]) != upper[i])
>>> -    {
>>> -      printf ("tolower ('%c') false\n", lower[i]);
>>> -      result = 1;
>>> -    }
>>> -
>>> -      if (tolower (upper[i]) != upper[i])
>>> -    {
>>> -      printf ("tolower ('%c') false\n", upper[i]);
>>> -      result = 1;
>>> -    }
>>> -      if (toupper (upper[i]) != lower[i])
>>> -    {
>>> -      printf ("toupper ('%c') false\n", upper[i]);
>>> -      result = 1;
>>> -    }
>>> -
>>> -      if (iswlower (wupper[i]))
>>> -    {
>>> -      printf ("iswlower (L'%c') false\n", upper[i]);
>>> -      result = 1;
>>> -    }
>>> -      if (! iswupper (wupper[i]))
>>> -    {
>>> -      printf ("iswupper (L'%c') false\n", upper[i]);
>>> -      result = 1;
>>> -    }
>>> -
>>> -      if (iswupper (wlower[i]))
>>> -    {
>>> -      printf ("iswupper (L'%c') false\n", lower[i]);
>>> -      result = 1;
>>> -    }
>>> -      if (! iswlower (wlower[i]))
>>> -    {
>>> -      printf ("iswlower (L'%c') false\n", lower[i]);
>>> -      result = 1;
>>> -    }
>>> -
>>> -      if (towupper (wlower[i]) != wupper[i])
>>> -    {
>>> -      printf ("towupper ('%c') false\n", lower[i]);
>>> -      result = 1;
>>> -    }
>>> -      if (towlower (wlower[i]) != wlower[i])
>>> -    {
>>> -      printf ("towlower ('%c') false\n", lower[i]);
>>> -      result = 1;
>>> -    }
>>> -
>>> -      if (towlower (wupper[i]) != wlower[i])
>>> -    {
>>> -      printf ("towlower ('%c') false\n", upper[i]);
>>> -      result = 1;
>>> -    }
>>> -      if (towupper (wupper[i]) != wupper[i])
>>> -    {
>>> -      printf ("towupper ('%c') false\n", upper[i]);
>>> -      result = 1;
>>> -    }
>>> -    }
>>> -
>>> -  return result;
>>> -}
>>> diff --git a/localedata/tst-invalid-charset.c 
>>> b/localedata/tst-invalid-charset.c
>>> new file mode 100644
>>> index 0000000000..46a5198c66
>>> --- /dev/null
>>> +++ b/localedata/tst-invalid-charset.c
>>> @@ -0,0 +1,31 @@
>>> +/* Test program to verify that setlocale fails for charsets that do 
>>> not have a
>>> +   converter.
>>> +   Copyright (C) 2021 Free Software Foundation, Inc.
>>> +   This file is part of the GNU C Library.
>>> +
>>> +   The GNU C Library is free software; you can redistribute it and/or
>>> +   modify it under the terms of the GNU Lesser General Public
>>> +   License as published by the Free Software Foundation; either
>>> +   version 2.1 of the License, or (at your option) any later version.
>>> +
>>> +   The GNU C Library is distributed in the hope that it will be useful,
>>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>>> +   Lesser General Public License for more details.
>>> +
>>> +   You should have received a copy of the GNU Lesser General Public
>>> +   License along with the GNU C Library; if not, see
>>> +   <https://www.gnu.org/licenses/>.  */
>>> +
>>> +#include <ctype.h>
>>> +#include <locale.h>
>>> +#include <wchar.h>
>>> +
>>> +
>>> +int
>>> +main (void)
>>> +{
>>> +  /* Fail if setlocale succeeds for any of these locales.  */
>>> +  return (setlocale (LC_ALL, "test5") != NULL
>>> +      || setlocale (LC_ALL, "test6") != NULL);
>>> +}
>>>
>>
> 


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH v3] setlocale: Fail if iconv module for charset is not present [BZ #27996]
  2021-06-30  8:56 [PATCH] setlocale: Fail if iconv module for charset is not present [BZ #27996] Siddhesh Poyarekar
  2021-07-16 20:28 ` Carlos O'Donell
@ 2022-02-08  9:40 ` Siddhesh Poyarekar
  2022-02-17  8:08   ` Florian Weimer
  1 sibling, 1 reply; 9+ messages in thread
From: Siddhesh Poyarekar @ 2022-02-08  9:40 UTC (permalink / raw)
  To: libc-alpha

setlocale currently succeeds even if the requested locale uses a
charset that does not have a converter module installed.  Check for
existence of the charset (either the one requested through the input
name or the one needed by the selected locale file) and fail if it
doesn't.

The new test tst-invalid-charset verifes that loading test5 and test6
locales fail because both locales have charsets without a converter,
viz. test5 and test6 respectively.  Also, test6.c has been removed as
it was unused.

Signed-off-by: Siddhesh Poyarekar <siddhesh@sourceware.org>
---
Tested on x86_64

Changes from v2:
- Added GNU Toolchain Authors copyright
- Rebased on latest master

 locale/findlocale.c              |  78 +++++++++++++-----
 localedata/Makefile              |  11 ++-
 localedata/tests/test6.c         | 136 -------------------------------
 localedata/tst-invalid-charset.c |  31 +++++++
 4 files changed, 96 insertions(+), 160 deletions(-)
 delete mode 100644 localedata/tests/test6.c
 create mode 100644 localedata/tst-invalid-charset.c

diff --git a/locale/findlocale.c b/locale/findlocale.c
index 64f687ea9d..e78664b0bf 100644
--- a/locale/findlocale.c
+++ b/locale/findlocale.c
@@ -1,4 +1,5 @@
 /* Copyright (C) 1996-2022 Free Software Foundation, Inc.
+   Copyright The GNU Toolchain Authors.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -97,6 +98,30 @@ valid_locale_name (const char *name)
   return 1;
 }
 
+/* Return true if we have gconv modules to transform between the INTERNAL
+   encoding and CODESET.  */
+static bool
+codeset_has_module (const char *codeset)
+{
+  struct __gconv_step *steps;
+  size_t nsteps;
+
+  char *ccodeset = (char *) alloca (strlen (codeset) + 3);
+  strip (ccodeset, codeset);
+
+  if (__gconv_find_transform ("INTERNAL", ccodeset, &steps, &nsteps, 0)
+      != __GCONV_OK)
+    return false;
+  __gconv_close_transform (steps, nsteps);
+
+  if (__gconv_find_transform (ccodeset, "INTERNAL", &steps, &nsteps, 0)
+      != __GCONV_OK)
+    return false;
+  __gconv_close_transform (steps, nsteps);
+
+  return true;
+}
+
 struct __locale_data *
 _nl_find_locale (const char *locale_path, size_t locale_path_len,
 		 int category, const char **name)
@@ -199,6 +224,10 @@ _nl_find_locale (const char *locale_path, size_t locale_path_len,
     /* Memory allocate problem.  */
     return NULL;
 
+  /* The requested codeset does not have a converter, don't use it.  */
+  if (codeset != NULL && !codeset_has_module (codeset))
+    return NULL;
+
   /* If exactly this locale was already asked for we have an entry with
      the complete name.  */
   locale_file = _nl_make_l10nflist (&_nl_locale_file_list[category],
@@ -247,6 +276,33 @@ _nl_find_locale (const char *locale_path, size_t locale_path_len,
 	return NULL;
     }
 
+  /* Get the codeset information from the locale file.  */
+  static const int codeset_idx[] =
+    {
+      [__LC_CTYPE] = _NL_ITEM_INDEX (CODESET),
+      [__LC_NUMERIC] = _NL_ITEM_INDEX (_NL_NUMERIC_CODESET),
+      [__LC_TIME] = _NL_ITEM_INDEX (_NL_TIME_CODESET),
+      [__LC_COLLATE] = _NL_ITEM_INDEX (_NL_COLLATE_CODESET),
+      [__LC_MONETARY] = _NL_ITEM_INDEX (_NL_MONETARY_CODESET),
+      [__LC_MESSAGES] = _NL_ITEM_INDEX (_NL_MESSAGES_CODESET),
+      [__LC_PAPER] = _NL_ITEM_INDEX (_NL_PAPER_CODESET),
+      [__LC_NAME] = _NL_ITEM_INDEX (_NL_NAME_CODESET),
+      [__LC_ADDRESS] = _NL_ITEM_INDEX (_NL_ADDRESS_CODESET),
+      [__LC_TELEPHONE] = _NL_ITEM_INDEX (_NL_TELEPHONE_CODESET),
+      [__LC_MEASUREMENT] = _NL_ITEM_INDEX (_NL_MEASUREMENT_CODESET),
+      [__LC_IDENTIFICATION] = _NL_ITEM_INDEX (_NL_IDENTIFICATION_CODESET)
+    };
+  const struct __locale_data *data;
+  const char *locale_codeset;
+
+  data = (const struct __locale_data *) locale_file->data;
+  locale_codeset = (const char *) data->values[codeset_idx[category]].string;
+  assert (locale_codeset != NULL);
+
+  /* The locale codeset does not have a converter, don't use it.  */
+  if (locale_codeset[0] != '\0' && !codeset_has_module (locale_codeset))
+    return NULL;
+
   /* The LC_CTYPE category allows to check whether a locale is really
      usable.  If the locale name contains a charset name and the
      charset name used in the locale (present in the LC_CTYPE data) is
@@ -255,31 +311,9 @@ _nl_find_locale (const char *locale_path, size_t locale_path_len,
      in the locale name.  */
   if (codeset != NULL)
     {
-      /* Get the codeset information from the locale file.  */
-      static const int codeset_idx[] =
-	{
-	  [__LC_CTYPE] = _NL_ITEM_INDEX (CODESET),
-	  [__LC_NUMERIC] = _NL_ITEM_INDEX (_NL_NUMERIC_CODESET),
-	  [__LC_TIME] = _NL_ITEM_INDEX (_NL_TIME_CODESET),
-	  [__LC_COLLATE] = _NL_ITEM_INDEX (_NL_COLLATE_CODESET),
-	  [__LC_MONETARY] = _NL_ITEM_INDEX (_NL_MONETARY_CODESET),
-	  [__LC_MESSAGES] = _NL_ITEM_INDEX (_NL_MESSAGES_CODESET),
-	  [__LC_PAPER] = _NL_ITEM_INDEX (_NL_PAPER_CODESET),
-	  [__LC_NAME] = _NL_ITEM_INDEX (_NL_NAME_CODESET),
-	  [__LC_ADDRESS] = _NL_ITEM_INDEX (_NL_ADDRESS_CODESET),
-	  [__LC_TELEPHONE] = _NL_ITEM_INDEX (_NL_TELEPHONE_CODESET),
-	  [__LC_MEASUREMENT] = _NL_ITEM_INDEX (_NL_MEASUREMENT_CODESET),
-	  [__LC_IDENTIFICATION] = _NL_ITEM_INDEX (_NL_IDENTIFICATION_CODESET)
-	};
-      const struct __locale_data *data;
-      const char *locale_codeset;
       char *clocale_codeset;
       char *ccodeset;
 
-      data = (const struct __locale_data *) locale_file->data;
-      locale_codeset =
-	(const char *) data->values[codeset_idx[category]].string;
-      assert (locale_codeset != NULL);
       /* Note the length of the allocated memory: +3 for up to two slashes
 	 and the NUL byte.  */
       clocale_codeset = (char *) alloca (strlen (locale_codeset) + 3);
diff --git a/localedata/Makefile b/localedata/Makefile
index 9ae2e5c161..8504303654 100644
--- a/localedata/Makefile
+++ b/localedata/Makefile
@@ -125,11 +125,13 @@ test-input := \
 test-input-data = $(addsuffix .in, $(test-input))
 test-output := $(foreach s, .out .xout, \
 			 $(addsuffix $s, $(basename $(test-input))))
+# Note that tst-invalid-charset depends on test5 and test6 being locales that
+# do not have valid charset converters.
 ld-test-names := test1 test2 test3 test4 test5 test6 test7
 ld-test-srcs := $(addprefix tests/,$(addsuffix .cm,$(ld-test-names)) \
 				   $(addsuffix .def,$(ld-test-names)) \
 				   $(addsuffix .ds,test5 test6) \
-				   test6.c trans.def)
+				   trans.def)
 
 fmon-tests = n01y12 n02n40 n10y31 n11y41 n12y11 n20n32 n30y20 n41n00 \
 	     y01y10 y02n22 y22n42 y30y21 y32n31 y40y00 y42n21
@@ -163,6 +165,7 @@ tests = \
   tst-c-utf8-consistency \
   tst-digits \
   tst-iconv-math-trans \
+  tst-invalid-charset \
   tst-leaks \
   tst-mbswcs1 \
   tst-mbswcs2 \
@@ -423,7 +426,10 @@ $(objpfx)tst-langinfo-setlocale-static.out: tst-langinfo.sh \
 		 '$(run-program-env)' '$(test-program-cmd-after-env)' > $@; \
 	$(evaluate-test)
 
+# These tests depend on tst-locale because they use the locales compiled by
+# that test.
 $(objpfx)tst-digits.out: $(objpfx)tst-locale.out
+$(objpfx)tst-invalid-charset.out: $(objpfx)tst-locale.out
 $(objpfx)tst-mbswcs6.out: $(addprefix $(objpfx),$(CTYPE_FILES))
 endif
 
@@ -484,7 +490,8 @@ $(objpfx)mtrace-tst-leaks.out: $(objpfx)tst-leaks.out
 	$(common-objpfx)malloc/mtrace $(objpfx)tst-leaks.mtrace > $@; \
 	$(evaluate-test)
 
-bug-setlocale1-ENV-only = LOCPATH=$(objpfx) LC_CTYPE=de_DE.UTF-8
+bug-setlocale1-ENV-only = GCONV_PATH=$(common-objpfx)iconvdata \
+			  LOCPATH=$(objpfx) LC_CTYPE=de_DE.UTF-8
 bug-setlocale1-static-ENV-only = $(bug-setlocale1-ENV-only)
 
 $(objdir)/iconvdata/gconv-modules:
diff --git a/localedata/tests/test6.c b/localedata/tests/test6.c
deleted file mode 100644
index fc841454f0..0000000000
--- a/localedata/tests/test6.c
+++ /dev/null
@@ -1,136 +0,0 @@
-/* Test program for character classes and mappings.
-   Copyright (C) 1999-2022 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#include <ctype.h>
-#include <locale.h>
-#include <wchar.h>
-
-
-int
-main (void)
-{
-  const char lower[] = "abcdefghijklmnopqrstuvwxyz";
-  const char upper[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
-#define LEN (sizeof (upper) - 1)
-  const wchar_t wlower[] = L"abcdefghijklmnopqrstuvwxyz";
-  const wchar_t wupper[] = L"ABCDEFGHIJKLMNOPQRSTUVWXYZ";
-  int i;
-  int result = 0;
-
-  setlocale (LC_ALL, "test6");
-
-  for (i = 0; i < LEN; ++i)
-    {
-      /* Test basic table handling (basic == not more than 256 characters).
-	 The charmaps swaps the normal lower-upper case meaning of the
-	 ASCII characters used in the source code while the Unicode mapping
-	 in the repertoire map has the normal correspondents.  This test
-	 shows the independence of the tables for `char' and `wchar_t'
-	 characters.  */
-
-      if (islower (lower[i]))
-	{
-	  printf ("islower ('%c') false\n", lower[i]);
-	  result = 1;
-	}
-      if (! isupper (lower[i]))
-	{
-	  printf ("isupper ('%c') false\n", lower[i]);
-	  result = 1;
-	}
-
-      if (! islower (upper[i]))
-	{
-	  printf ("islower ('%c') false\n", upper[i]);
-	  result = 1;
-	}
-      if (isupper (upper[i]))
-	{
-	  printf ("isupper ('%c') false\n", upper[i]);
-	  result = 1;
-	}
-
-      if (toupper (lower[i]) != lower[i])
-	{
-	  printf ("toupper ('%c') false\n", lower[i]);
-	  result = 1;
-	}
-      if (tolower (lower[i]) != upper[i])
-	{
-	  printf ("tolower ('%c') false\n", lower[i]);
-	  result = 1;
-	}
-
-      if (tolower (upper[i]) != upper[i])
-	{
-	  printf ("tolower ('%c') false\n", upper[i]);
-	  result = 1;
-	}
-      if (toupper (upper[i]) != lower[i])
-	{
-	  printf ("toupper ('%c') false\n", upper[i]);
-	  result = 1;
-	}
-
-      if (iswlower (wupper[i]))
-	{
-	  printf ("iswlower (L'%c') false\n", upper[i]);
-	  result = 1;
-	}
-      if (! iswupper (wupper[i]))
-	{
-	  printf ("iswupper (L'%c') false\n", upper[i]);
-	  result = 1;
-	}
-
-      if (iswupper (wlower[i]))
-	{
-	  printf ("iswupper (L'%c') false\n", lower[i]);
-	  result = 1;
-	}
-      if (! iswlower (wlower[i]))
-	{
-	  printf ("iswlower (L'%c') false\n", lower[i]);
-	  result = 1;
-	}
-
-      if (towupper (wlower[i]) != wupper[i])
-	{
-	  printf ("towupper ('%c') false\n", lower[i]);
-	  result = 1;
-	}
-      if (towlower (wlower[i]) != wlower[i])
-	{
-	  printf ("towlower ('%c') false\n", lower[i]);
-	  result = 1;
-	}
-
-      if (towlower (wupper[i]) != wlower[i])
-	{
-	  printf ("towlower ('%c') false\n", upper[i]);
-	  result = 1;
-	}
-      if (towupper (wupper[i]) != wupper[i])
-	{
-	  printf ("towupper ('%c') false\n", upper[i]);
-	  result = 1;
-	}
-    }
-
-  return result;
-}
diff --git a/localedata/tst-invalid-charset.c b/localedata/tst-invalid-charset.c
new file mode 100644
index 0000000000..067bb689da
--- /dev/null
+++ b/localedata/tst-invalid-charset.c
@@ -0,0 +1,31 @@
+/* Test program to verify that setlocale fails for charsets that do not have a
+   converter.
+   Copyright The GNU Toolchain Authors.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <ctype.h>
+#include <locale.h>
+#include <wchar.h>
+
+
+int
+main (void)
+{
+  /* Fail if setlocale succeeds for any of these locales.  */
+  return (setlocale (LC_ALL, "test5") != NULL
+	  || setlocale (LC_ALL, "test6") != NULL);
+}
-- 
2.34.1


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v3] setlocale: Fail if iconv module for charset is not present [BZ #27996]
  2022-02-08  9:40 ` [PATCH v3] " Siddhesh Poyarekar
@ 2022-02-17  8:08   ` Florian Weimer
  0 siblings, 0 replies; 9+ messages in thread
From: Florian Weimer @ 2022-02-17  8:08 UTC (permalink / raw)
  To: Siddhesh Poyarekar via Libc-alpha; +Cc: Siddhesh Poyarekar

* Siddhesh Poyarekar via Libc-alpha:

> +/* Return true if we have gconv modules to transform between the INTERNAL
> +   encoding and CODESET.  */
> +static bool
> +codeset_has_module (const char *codeset)
> +{
> +  struct __gconv_step *steps;
> +  size_t nsteps;
> +
> +  char *ccodeset = (char *) alloca (strlen (codeset) + 3);
> +  strip (ccodeset, codeset);
> +
> +  if (__gconv_find_transform ("INTERNAL", ccodeset, &steps, &nsteps, 0)
> +      != __GCONV_OK)
> +    return false;
> +  __gconv_close_transform (steps, nsteps);
> +
> +  if (__gconv_find_transform (ccodeset, "INTERNAL", &steps, &nsteps, 0)
> +      != __GCONV_OK)
> +    return false;
> +  __gconv_close_transform (steps, nsteps);
> +
> +  return true;
> +}

I think we should actually load the converters and keep them loaded.
See __wcsmbs_load_conv.  I'm worried this is actually much more
complicated than expected. 8-(

Thanks,
Florian


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2022-02-17  8:08 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-30  8:56 [PATCH] setlocale: Fail if iconv module for charset is not present [BZ #27996] Siddhesh Poyarekar
2021-07-16 20:28 ` Carlos O'Donell
2021-07-19  8:34   ` Siddhesh Poyarekar
2021-07-20  2:36     ` [PATCH v2] " Siddhesh Poyarekar
2021-08-11  7:42       ` [PING][PATCH " Siddhesh Poyarekar
2021-08-17  2:58         ` [PING2][PATCH " Siddhesh Poyarekar
2021-10-18 13:16           ` [PING3][PATCH " Siddhesh Poyarekar
2022-02-08  9:40 ` [PATCH v3] " Siddhesh Poyarekar
2022-02-17  8:08   ` Florian Weimer

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).