From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2155) id 692023858284; Fri, 24 Feb 2023 15:42:00 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 692023858284 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=sourceware.org; s=default; t=1677253320; bh=MyTVHsTWiF5E3PgzCU+u9qL9/Ncu5LFqsREiPxtivdc=; h=From:To:Subject:Date:From; b=QjZAq+9+DTheAnUrxb/4FRZdAPsdHh1Nw9Ki7CmGJ9U2YVK5dU74NbqUQCHay0lu3 Zg7Y1b13DVV3Gss8q0Z5PLtQ/ykxzKIz8dXIxxqEUBsomtzyu8pz4cY9QkksQ1dDl8 ynco29QIMw56jGwD20vAkSjc1VmCSCEHgY4tKf44= Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable From: Corinna Vinschen To: cygwin-cvs@sourceware.org Subject: [newlib-cygwin/main] Cygwin: locale(1): drop using LCID, use Windows locale names X-Act-Checkin: newlib-cygwin X-Git-Author: Corinna Vinschen X-Git-Refname: refs/heads/main X-Git-Oldrev: 17ac400c11bab30ac2c0bef12cbf7788f0b6f954 X-Git-Newrev: ac405ab9bc07789d5576fd823cbaa7eef472b11b Message-Id: <20230224154200.692023858284@sourceware.org> Date: Fri, 24 Feb 2023 15:42:00 +0000 (GMT) List-Id: https://sourceware.org/git/gitweb.cgi?p=3Dnewlib-cygwin.git;h=3Dac405ab9bc0= 7789d5576fd823cbaa7eef472b11b commit ac405ab9bc07789d5576fd823cbaa7eef472b11b Author: Corinna Vinschen AuthorDate: Thu Feb 23 00:22:56 2023 +0100 Commit: Corinna Vinschen CommitDate: Fri Feb 24 16:40:58 2023 +0100 Cygwin: locale(1): drop using LCID, use Windows locale names =20 LCIDs are deprecated since Windows Vista. Worse, lots of new locales have been added in the meantime which have no LCID attached. They are only available by locale name. =20 As first step, rearrange the locale(1) tool to use Windows locale names, rather than LCIDs, so we can now enumerate *all* locales available in more recent Windows versions. =20 Signed-off-by: Corinna Vinschen Diff: --- winsup/utils/locale.cc | 320 +++++++++++++++++++++++++++++----------------= ---- 1 file changed, 190 insertions(+), 130 deletions(-) diff --git a/winsup/utils/locale.cc b/winsup/utils/locale.cc index 3f7e5dcca6a0..1d90550f343d 100644 --- a/winsup/utils/locale.cc +++ b/winsup/utils/locale.cc @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -107,17 +108,42 @@ struct option longopts[] =3D { const char *opts =3D "acfhikmnsuUvV"; =20 int -getlocale (LCID lcid, char *name) +getlocale (PWCHAR loc_name, wchar_t *iso639, wchar_t *iso3166, + wchar_t *iso15924 =3D NULL) { - char iso639[10]; - char iso3166[10]; + wchar_t *cp; =20 - iso3166[0] =3D '\0'; - if (!GetLocaleInfo (lcid, LOCALE_SISO639LANGNAME, iso639, 10)) + /* Skip language-only locales, e. g. "en" */ + if (!(cp =3D wcschr (loc_name, L'-'))) return 0; - GetLocaleInfo (lcid, LOCALE_SISO3166CTRYNAME, iso3166, 10); - sprintf (name, "%s%s%s", iso639, lcid > 0x3ff ? "_" : "", - lcid > 0x3ff ? iso3166 : ""); + ++cp; + /* Script inside? Scripts are Upper/Lower, e. g. "Latn" */ + if (iswupper (cp[0]) && iswlower (cp[1])) + { + wchar_t *cp2; + + /* Skip language-Script locales, missing country */ + if (!(cp2 =3D wcschr (cp + 2, L'-'))) + return 0; + /* Otherwise, store in iso15924 */ + if (iso15924) + wcpcpy (wcpncpy (iso15924, cp, cp2 - cp), L";"); + } + cp =3D wcsrchr (loc_name, L'-'); + if (cp) + { + /* Skip numeric iso3166 country name. */ + if (iswdigit (cp[1])) + return 0; + /* Special case postfix after iso3166 country name: ca-ES-valencia. + Use the postfix thingy as script so it will become a @modifier */ + if (iswlower (cp[1])) + wcpcpy (iso15924, cp + 1); + } + + if (!GetLocaleInfoEx (loc_name, LOCALE_SISO639LANGNAME, iso639, 10)) + return 0; + GetLocaleInfoEx (loc_name, LOCALE_SISO3166CTRYNAME, iso3166, 10); return 1; } =20 @@ -132,14 +158,6 @@ loc_t *locale; size_t loc_max; size_t loc_num; =20 -void -print_codeset (const char *codeset) -{ - for (; *codeset; ++codeset) - if (*codeset !=3D '-') - putc (tolower ((int)(unsigned char) *codeset), stdout); -} - void print_locale_with_codeset (int verbose, loc_t *locale, bool utf8, const char *modifier) @@ -158,7 +176,7 @@ print_locale_with_codeset (int verbose, loc_t *locale, = bool utf8, sysroot =3D (const char *) cygwin_create_path (CCP_WIN_W_TO_POSIX, sysbuf); if (!sysroot) - sysroot =3D "kernel32.dll"; + sysroot =3D "kernel32.dll"; } snprintf (locname, 32, "%s%s%s%s", locale->name, utf8 ? ".utf8" : "", modifier ? "@" : "", modifier ?: ""); @@ -187,7 +205,7 @@ print_locale (int verbose, loc_t *locale) { if (!modifier) print_locale_with_codeset (verbose, locale, true, NULL); - else if (!strcmp (modifier, "@cjknarrow")) + else if (strcmp (modifier, "@euro")) { *modifier++ =3D '\0'; print_locale_with_codeset (verbose, locale, true, modifier); @@ -213,7 +231,7 @@ add_locale (const char *name, const wchar_t *language, = const wchar_t *territory, { loc_t *tmp =3D (loc_t *) realloc (locale, (loc_max + 32) * sizeof (l= oc_t)); if (!tmp) - { + { fprintf (stderr, "Out of memory!\n"); exit (1); } @@ -266,110 +284,147 @@ add_locale_alias_locales () c =3D strchr (replace, '.'); if (c) *c =3D '\0'; + /* Ignore "ja_JP" and "ko_KR" locales from here, they are in the Win= dows + DB anyway. */ + if (!strcmp (alias, "ja_JP") || !strcmp (alias, "ko_KR")) + continue; search.name =3D replace; loc =3D (loc_t *) bsearch (&search, locale, orig_loc_num, sizeof (lo= c_t), compare_locales); + add_locale (alias, loc ? loc->language : L"", loc ? loc->territory := L"", true); } fclose (fp); } =20 -void -print_all_locales (int verbose) +BOOL +print_all_locales_proc (LPWSTR loc_name, DWORD info, LPARAM param) { - LCID lcid =3D 0; - char name[32]; + wchar_t iso639[32] =3D { 0 }; + wchar_t iso3166[32] =3D { 0 }; + wchar_t iso15924[32] =3D { 0 }; DWORD cp; =20 - unsigned lang, sublang; +#if 0 + struct { + wchar_t language[256]; + wchar_t country[256]; + char loc[32]; + } loc_list[32]; + int lcnt =3D 0; +#endif =20 - add_locale ("C", L"C", L"POSIX"); - add_locale ("POSIX", L"C", L"POSIX", true); - for (lang =3D 1; lang <=3D 0xff; ++lang) + if (getlocale (loc_name, iso639, iso3166, iso15924)) { - struct { - wchar_t language[256]; - wchar_t country[256]; - char loc[32]; - } loc_list[32]; - int lcnt =3D 0; - - for (sublang =3D 1; sublang <=3D 0x3f; ++sublang) + char *c, posix_loc[32]; + wchar_t language[256]; + wchar_t country[256]; + wchar_t currency[9]; + + c =3D posix_loc + snprintf (posix_loc, sizeof posix_loc, "%ls_%ls", + iso639, iso3166); + /* Inuktitut: equivalent @latin due to lack of info on Linux */ + if (!wcscmp (iso639, L"iu")) { - lcid =3D (sublang << 10) | lang; - if (getlocale (lcid, name)) + if (wcscmp (iso15924, L"Latn;")) + return TRUE; + } + /* Javanese: only use @latin locale. */ + else if (!wcscmp (iso639, L"jv")) + { + if (wcscmp (iso15924, L"Latn;")) + return TRUE; + } + /* Mongolian: only use @mongolian locale. */ + else if (!wcscmp (iso639, L"mn")) + { + if (wcscmp (iso15924, L"Mong;")) + return TRUE; + } + /* Serbian: Windows default is Latin, Linux default is Cyrillic. + We want the Linux default and attach @latin otherwise */ + else if (!wcscmp (iso639, L"sr") && !wcscmp (iso15924, L"Latn;")) + stpcpy (c, "@latin"); + /* Tamazight: no modifier, iso639 is "ber" on Linux. + "zgh-Tfng-MA" is equivalent to "ber_MA". */ + else if (!wcscmp (iso639, L"zgh")) + snprintf (posix_loc, sizeof posix_loc, "ber_%.27ls", iso3166); + /* Tamazight: "tzm-Latn-DZ" is equivalent to "ber_DZ", + skip everything else. */ + else if (!wcscmp (iso639, L"tzm")) + { + if (!wcscmp (iso3166, L"DZ") && !wcscmp (iso15924, L"Latn;")) + snprintf (posix_loc, sizeof posix_loc, "ber_%.27ls", iso3166); + else + return TRUE; + } + /* In all other cases, we check if the script from the Windows + locale is the default locale in that language. If not, we + add it as modifier if possible, or skip it */ + else if (iso15924[0]) + { + wchar_t scriptless_win_locale[32]; + wchar_t default_iso15924[32]; + + wcpcpy (wcpcpy (wcpcpy (scriptless_win_locale, iso639), L"_"), + iso3166); + if ((GetLocaleInfoEx (scriptless_win_locale, LOCALE_SSCRIPTS, + default_iso15924, 32) + || GetLocaleInfoEx (iso639, LOCALE_SSCRIPTS, + default_iso15924, 32)) + && !wcsstr (default_iso15924, iso15924)) { - wchar_t language[256]; - wchar_t country[256]; - int i; - char *c, loc[32]; - wchar_t wbuf[9]; - - /* Go figure. Even the English name of a language or - locale might contain native characters. */ - GetLocaleInfoW (lcid, LOCALE_SENGLANGUAGE, language, 256); - GetLocaleInfoW (lcid, LOCALE_SENGCOUNTRY, country, 256); - /* Avoid dups */ - for (i =3D 0; i < lcnt; ++ i) - if (!wcscmp (loc_list[i].language, language) - && !wcscmp (loc_list[i].country, country)) - break; - if (i < lcnt) - continue; - if (lcnt < 32) - { - wcscpy (loc_list[lcnt].language, language); - wcscpy (loc_list[lcnt].country, country); - } - c =3D stpcpy (loc, name); - /* Now check certain conditions to figure out if that - locale requires a modifier. */ - if (lang =3D=3D LANG_SERBIAN && !strncmp (loc, "sr_", 3) - && wcsstr (language, L"(Latin)")) + if (!wcscmp (iso15924, L"Latn;")) stpcpy (c, "@latin"); - else if (lang =3D=3D LANG_UZBEK - && sublang =3D=3D SUBLANG_UZBEK_CYRILLIC) + else if (!wcscmp (iso15924, L"Cyrl;")) stpcpy (c, "@cyrillic"); - /* Avoid more dups */ - for (i =3D 0; i < lcnt; ++ i) - if (!strcmp (loc_list[i].loc, loc)) - { - lcnt++; - break; - } - if (i < lcnt) - continue; - if (lcnt < 32) - strcpy (loc_list[lcnt++].loc, loc); - /* Print */ - add_locale (loc, language, country); - /* Check for locales which sport a modifier for - changing the codeset and other stuff. */ - if (lang =3D=3D LANG_BELARUSIAN - && sublang =3D=3D SUBLANG_BELARUSIAN_BELARUS) - stpcpy (c, "@latin"); - else if (lang =3D=3D LANG_TATAR - && sublang =3D=3D SUBLANG_TATAR_RUSSIA) - stpcpy (c, "@iqtelif"); - else if (GetLocaleInfoW (lcid, - LOCALE_IDEFAULTANSICODEPAGE - | LOCALE_RETURN_NUMBER, - (PWCHAR) &cp, sizeof cp) - && cp =3D=3D 1252 /* Latin1*/ - && GetLocaleInfoW (lcid, LOCALE_SINTLSYMBOL, wbuf, 9) - && !wcsncmp (wbuf, L"EUR", 3)) - stpcpy (c, "@euro"); - else if (lang =3D=3D LANG_JAPANESE - || lang =3D=3D LANG_KOREAN - || lang =3D=3D LANG_CHINESE) - stpcpy (c, "@cjknarrow"); + else if (!wcscmp (iso15924, L"Deva;")) + stpcpy (c, "@devanagari"); + else if (!wcscmp (iso15924, L"Adlm;")) + stpcpy (c, "@adlam"); else - continue; - add_locale (loc, language, country); + return TRUE; } } + + /* Print */ + GetLocaleInfoEx (loc_name, LOCALE_SENGLISHLANGUAGENAME, language, 25= 6); + GetLocaleInfoEx (loc_name, LOCALE_SENGLISHCOUNTRYNAME, country, 256); + add_locale (posix_loc, language, country); + /* Check for locales sporting an additional modifier for + changing the codeset and other stuff. */ + if (!wcscmp (iso639, L"be") && !wcscmp (iso3166, L"BY")) + stpcpy (c, "@latin"); + if (!wcscmp (iso639, L"tt") && !wcscmp (iso3166, L"RU")) + stpcpy (c, "@iqtelif"); + else if (GetLocaleInfoEx (loc_name, + LOCALE_IDEFAULTANSICODEPAGE + | LOCALE_RETURN_NUMBER, + (PWCHAR) &cp, sizeof cp) + && cp =3D=3D 1252 /* Latin1*/ + && GetLocaleInfoEx (loc_name, LOCALE_SINTLSYMBOL, currency, 9) + && !wcsncmp (currency, L"EUR", 3)) + stpcpy (c, "@euro"); + else if (!wcscmp (iso639, L"ja") + || !wcscmp (iso639, L"ko") + || !wcscmp (iso639, L"zh")) + stpcpy (c, "@cjknarrow"); + else + return TRUE; + add_locale (posix_loc, language, country); } + return TRUE; +} + +void +print_all_locales (int verbose) +{ + add_locale ("C", L"C", L"POSIX"); + add_locale ("POSIX", L"C", L"POSIX", true); + EnumSystemLocalesEx (print_all_locales_proc, + LOCALE_WINDOWS | LOCALE_SUPPLEMENTAL, + 0, NULL); /* First sort allows add_locale_alias_locales to bsearch in locales. */ qsort (locale, loc_num, sizeof (loc_t), compare_locales); add_locale_alias_locales (); @@ -544,8 +599,8 @@ const char *fake_string[] =3D { =20 lc_names_t lc_ctype_names[] =3D { - { "ctype-class-names", is_string_fake, 0, 0 }, - { "ctype-map-names", is_string_fake, 2, 0 }, + { "ctype-class-names", is_string_fake, 0, 0 }, + { "ctype-map-names", is_string_fake, 2, 0 }, { "ctype-outdigit0_mb", is_string, _NL_CTYPE_OUTDIGITS0_MB, 0 }, { "ctype-outdigit1_mb", is_string, _NL_CTYPE_OUTDIGITS1_MB, 0 }, { "ctype-outdigit2_mb", is_string, _NL_CTYPE_OUTDIGITS2_MB, 0 }, @@ -556,30 +611,30 @@ lc_names_t lc_ctype_names[] =3D { "ctype-outdigit7_mb", is_string, _NL_CTYPE_OUTDIGITS7_MB, 0 }, { "ctype-outdigit8_mb", is_string, _NL_CTYPE_OUTDIGITS8_MB, 0 }, { "ctype-outdigit9_mb", is_string, _NL_CTYPE_OUTDIGITS9_MB, 0 }, - { "ctype-outdigit0_wc", is_wchar, _NL_CTYPE_OUTDIGITS0_WC, 0 }, - { "ctype-outdigit1_wc", is_wchar, _NL_CTYPE_OUTDIGITS1_WC, 0 }, - { "ctype-outdigit2_wc", is_wchar, _NL_CTYPE_OUTDIGITS2_WC, 0 }, - { "ctype-outdigit3_wc", is_wchar, _NL_CTYPE_OUTDIGITS3_WC, 0 }, - { "ctype-outdigit4_wc", is_wchar, _NL_CTYPE_OUTDIGITS4_WC, 0 }, - { "ctype-outdigit5_wc", is_wchar, _NL_CTYPE_OUTDIGITS5_WC, 0 }, - { "ctype-outdigit6_wc", is_wchar, _NL_CTYPE_OUTDIGITS6_WC, 0 }, - { "ctype-outdigit7_wc", is_wchar, _NL_CTYPE_OUTDIGITS7_WC, 0 }, - { "ctype-outdigit8_wc", is_wchar, _NL_CTYPE_OUTDIGITS8_WC, 0 }, - { "ctype-outdigit9_wc", is_wchar, _NL_CTYPE_OUTDIGITS9_WC, 0 }, + { "ctype-outdigit0_wc", is_wchar, _NL_CTYPE_OUTDIGITS0_WC, 0 }, + { "ctype-outdigit1_wc", is_wchar, _NL_CTYPE_OUTDIGITS1_WC, 0 }, + { "ctype-outdigit2_wc", is_wchar, _NL_CTYPE_OUTDIGITS2_WC, 0 }, + { "ctype-outdigit3_wc", is_wchar, _NL_CTYPE_OUTDIGITS3_WC, 0 }, + { "ctype-outdigit4_wc", is_wchar, _NL_CTYPE_OUTDIGITS4_WC, 0 }, + { "ctype-outdigit5_wc", is_wchar, _NL_CTYPE_OUTDIGITS5_WC, 0 }, + { "ctype-outdigit6_wc", is_wchar, _NL_CTYPE_OUTDIGITS6_WC, 0 }, + { "ctype-outdigit7_wc", is_wchar, _NL_CTYPE_OUTDIGITS7_WC, 0 }, + { "ctype-outdigit8_wc", is_wchar, _NL_CTYPE_OUTDIGITS8_WC, 0 }, + { "ctype-outdigit9_wc", is_wchar, _NL_CTYPE_OUTDIGITS9_WC, 0 }, { "charmap", is_string, CODESET, 0 }, { "ctype-mb-cur-max", is_int, _NL_CTYPE_MB_CUR_MAX, 0 }, - { NULL, is_end, 0, 0 } + { NULL, is_end, 0, 0 } }; =20 lc_names_t lc_numeric_names[] =3D { { "decimal_point", is_string, RADIXCHAR, 0 }, - { "thousands_sep", is_string, THOUSEP, 0 }, + { "thousands_sep", is_string, THOUSEP, 0 }, { "grouping", is_grouping, _NL_NUMERIC_GROUPING, 0 }, { "numeric-decimal-point-wc", is_wchar, _NL_NUMERIC_DECIMAL_POINT_WC, 0= }, { "numeric-thousands-sep-wc", is_wchar, _NL_NUMERIC_THOUSANDS_SEP_WC, 0= }, { "numeric-codeset", is_string, _NL_NUMERIC_CODESET, 0 }, - { NULL, is_end, 0, 0 } + { NULL, is_end, 0, 0 } }; =20 lc_names_t lc_time_names[] =3D @@ -600,13 +655,13 @@ lc_names_t lc_time_names[] =3D { "era_t_fmt", is_string, ERA_T_FMT, 0 }, { "date_fmt", is_string, _DATE_FMT, 0 }, { "time-codeset", is_string, _NL_TIME_CODESET, 0 }, - { NULL, is_end, 0, 0 } + { NULL, is_end, 0, 0 } }; =20 lc_names_t lc_collate_names[] =3D { { "collate-codeset", is_string, _NL_COLLATE_CODESET, 0 }, - { NULL, is_end, 0, 0 } + { NULL, is_end, 0, 0 } }; =20 lc_names_t lc_monetary_names[] =3D @@ -635,7 +690,7 @@ lc_names_t lc_monetary_names[] =3D { "monetary-decimal-point-wc", is_wchar, _NL_MONETARY_WMON_DECIMAL_POINT= , 0 }, { "monetary-thousands-sep-wc", is_wchar, _NL_MONETARY_WMON_THOUSANDS_SEP= , 0 }, { "monetary-codeset", is_string, _NL_MONETARY_CODESET, 0 }, - { NULL, is_end, 0, 0 } + { NULL, is_end, 0, 0 } }; =20 lc_names_t lc_messages_names[] =3D @@ -645,7 +700,7 @@ lc_names_t lc_messages_names[] =3D { "yesstr", is_string, YESSTR, 0 }, { "nostr", is_string, NOSTR, 0 }, { "messages-codeset", is_string, _NL_MESSAGES_CODESET, 0 }, - { NULL, is_end, 0, 0 } + { NULL, is_end, 0, 0 } }; =20 void @@ -739,14 +794,13 @@ int main (int argc, char **argv) { int opt; - LCID lcid =3D 0; + wchar_t loc_name[256] =3D { 0 }; int all =3D 0; int cat =3D 0; int key =3D 0; int maps =3D 0; int verbose =3D 0; const char *utf =3D ""; - char name[32]; =20 setlocale (LC_ALL, ""); while ((opt =3D getopt_long (argc, argv, opts, longopts, NULL)) !=3D -1) @@ -765,19 +819,22 @@ main (int argc, char **argv) maps =3D 1; break; case 'i': - lcid =3D (UINT_PTR) GetKeyboardLayout (0) & 0xffff; + GetLocaleInfoW ((UINT_PTR) GetKeyboardLayout (0) & 0xffff, LOCALE_SNAME, + loc_name, 256); break; case 's': - lcid =3D GetSystemDefaultUILanguage (); + GetLocaleInfoW (GetSystemDefaultUILanguage (), LOCALE_SNAME, + loc_name, 256); break; case 'u': - lcid =3D GetUserDefaultUILanguage (); + GetLocaleInfoW (GetUserDefaultUILanguage (), LOCALE_SNAME, + loc_name, 256); break; case 'f': - lcid =3D GetUserDefaultLCID (); + GetUserDefaultLocaleName (loc_name, 256); break; case 'n': - lcid =3D GetSystemDefaultLCID (); + GetSystemDefaultLocaleName (loc_name, 256); break; case 'U': utf =3D ".UTF-8"; @@ -799,10 +856,13 @@ main (int argc, char **argv) print_all_locales (verbose); else if (maps) print_charmaps (); - else if (lcid) + else if (loc_name[0]) { - if (getlocale (lcid, name)) - printf ("%s%s\n", name, utf); + wchar_t iso639[10]; + wchar_t iso3166[10]; + + if (getlocale (loc_name, iso639, iso3166, NULL)) + printf ("%ls_%ls%s", iso639, iso3166, utf); } else if (optind < argc) while (optind < argc)