public inbox for cygwin-cvs@sourceware.org
help / color / mirror / Atom feed
* [newlib-cygwin/main] Cygwin: locale: Set default charset from Linux locale -> codeset mapping
@ 2023-02-25 15:25 Corinna Vinschen
  0 siblings, 0 replies; only message in thread
From: Corinna Vinschen @ 2023-02-25 15:25 UTC (permalink / raw)
  To: cygwin-cvs

https://sourceware.org/git/gitweb.cgi?p=newlib-cygwin.git;h=2483e54be852e66d1a96a0da2af8324b51ef8b65

commit 2483e54be852e66d1a96a0da2af8324b51ef8b65
Author:     Corinna Vinschen <corinna@vinschen.de>
AuthorDate: Sat Feb 25 16:11:54 2023 +0100
Commit:     Corinna Vinschen <corinna@vinschen.de>
CommitDate: Sat Feb 25 16:12:51 2023 +0100

    Cygwin: locale: Set default charset from Linux locale -> codeset mapping
    
    Generate lc_def_codeset.h header containing the default mapping from
    locale to codeset on Linux.  Use this mapping in __set_charset_from_locale
    in the first place.
    
    For every locale not covered by this table, just map Windows codepages
    to equivalent codesets used on Linux/Unix, getting rid of LCIDs entirely.
    
    Signed-off-by: Corinna Vinschen <corinna@vinschen.de>

Diff:
---
 winsup/cygwin/local_includes/lc_def_codesets.h | 342 +++++++++++++++++++++++++
 winsup/cygwin/nlsfuncs.cc                      | 156 +++--------
 2 files changed, 376 insertions(+), 122 deletions(-)

diff --git a/winsup/cygwin/local_includes/lc_def_codesets.h b/winsup/cygwin/local_includes/lc_def_codesets.h
new file mode 100644
index 000000000000..9c7230391e40
--- /dev/null
+++ b/winsup/cygwin/local_includes/lc_def_codesets.h
@@ -0,0 +1,342 @@
+/* This struct of default codesets has been generated by fetching
+   locale data from a Linux system using glibc-2.36-9.fc37.x86_64 on 2023-02-25 */
+struct default_codeset_t
+{
+  const char *locale;
+  const char *codeset;
+} default_codeset[] =
+{
+  { "aa_DJ", "ISO-8859-1" },
+  { "aa_ER", "UTF-8" },
+  { "aa_ER@saaho", "UTF-8" },
+  { "aa_ET", "UTF-8" },
+  { "af_ZA", "ISO-8859-1" },
+  { "agr_PE", "UTF-8" },
+  { "ak_GH", "UTF-8" },
+  { "am_ET", "UTF-8" },
+  { "an_ES", "ISO-8859-15" },
+  { "anp_IN", "UTF-8" },
+  { "ar_AE", "ISO-8859-6" },
+  { "ar_BH", "ISO-8859-6" },
+  { "ar_DZ", "ISO-8859-6" },
+  { "ar_EG", "ISO-8859-6" },
+  { "ar_IN", "UTF-8" },
+  { "ar_IQ", "ISO-8859-6" },
+  { "ar_JO", "ISO-8859-6" },
+  { "ar_KW", "ISO-8859-6" },
+  { "ar_LB", "ISO-8859-6" },
+  { "ar_LY", "ISO-8859-6" },
+  { "ar_MA", "ISO-8859-6" },
+  { "ar_OM", "ISO-8859-6" },
+  { "ar_QA", "ISO-8859-6" },
+  { "ar_SA", "ISO-8859-6" },
+  { "ar_SD", "ISO-8859-6" },
+  { "ar_SS", "UTF-8" },
+  { "ar_SY", "ISO-8859-6" },
+  { "ar_TN", "ISO-8859-6" },
+  { "ar_YE", "ISO-8859-6" },
+  { "as_IN", "UTF-8" },
+  { "ast_ES", "ISO-8859-15" },
+  { "ayc_PE", "UTF-8" },
+  { "az_AZ", "UTF-8" },
+  { "az_IR", "UTF-8" },
+  { "be_BY", "CP1251" },
+  { "be_BY@latin", "UTF-8" },
+  { "bem_ZM", "UTF-8" },
+  { "ber_DZ", "UTF-8" },
+  { "ber_MA", "UTF-8" },
+  { "bg_BG", "CP1251" },
+  { "bho_IN", "UTF-8" },
+  { "bho_NP", "UTF-8" },
+  { "bi_VU", "UTF-8" },
+  { "bn_BD", "UTF-8" },
+  { "bn_IN", "UTF-8" },
+  { "bo_CN", "UTF-8" },
+  { "bo_IN", "UTF-8" },
+  { "br_FR", "ISO-8859-1" },
+  { "br_FR@euro", "ISO-8859-15" },
+  { "brx_IN", "UTF-8" },
+  { "bs_BA", "ISO-8859-2" },
+  { "byn_ER", "UTF-8" },
+  { "ca_AD", "ISO-8859-15" },
+  { "ca_ES", "ISO-8859-1" },
+  { "ca_ES@euro", "ISO-8859-15" },
+  { "ca_ES@valencia", "UTF-8" },
+  { "ca_FR", "ISO-8859-15" },
+  { "ca_IT", "ISO-8859-15" },
+  { "ce_RU", "UTF-8" },
+  { "chr_US", "UTF-8" },
+  { "ckb_IQ", "UTF-8" },
+  { "cmn_TW", "UTF-8" },
+  { "crh_UA", "UTF-8" },
+  { "cs_CZ", "ISO-8859-2" },
+  { "csb_PL", "UTF-8" },
+  { "cv_RU", "UTF-8" },
+  { "cy_GB", "ISO-8859-14" },
+  { "da_DK", "ISO-8859-1" },
+  { "de_AT", "ISO-8859-1" },
+  { "de_AT@euro", "ISO-8859-15" },
+  { "de_BE", "ISO-8859-1" },
+  { "de_BE@euro", "ISO-8859-15" },
+  { "de_CH", "ISO-8859-1" },
+  { "de_DE", "ISO-8859-1" },
+  { "de_DE@euro", "ISO-8859-15" },
+  { "de_IT", "ISO-8859-1" },
+  { "de_LU", "ISO-8859-1" },
+  { "de_LU@euro", "ISO-8859-15" },
+  { "doi_IN", "UTF-8" },
+  { "dsb_DE", "UTF-8" },
+  { "dv_MV", "UTF-8" },
+  { "dz_BT", "UTF-8" },
+  { "el_CY", "ISO-8859-7" },
+  { "el_GR", "ISO-8859-7" },
+  { "el_GR@euro", "ISO-8859-7" },
+  { "en_AG", "UTF-8" },
+  { "en_AU", "ISO-8859-1" },
+  { "en_BW", "ISO-8859-1" },
+  { "en_CA", "ISO-8859-1" },
+  { "en_DK", "ISO-8859-1" },
+  { "en_GB", "ISO-8859-1" },
+  { "en_HK", "ISO-8859-1" },
+  { "en_IE", "ISO-8859-1" },
+  { "en_IE@euro", "ISO-8859-15" },
+  { "en_IL", "UTF-8" },
+  { "en_IN", "UTF-8" },
+  { "en_NG", "UTF-8" },
+  { "en_NZ", "ISO-8859-1" },
+  { "en_PH", "ISO-8859-1" },
+  { "en_SG", "ISO-8859-1" },
+  { "en_US", "ISO-8859-1" },
+  { "en_ZA", "ISO-8859-1" },
+  { "en_ZM", "UTF-8" },
+  { "en_ZW", "ISO-8859-1" },
+  { "es_AR", "ISO-8859-1" },
+  { "es_BO", "ISO-8859-1" },
+  { "es_CL", "ISO-8859-1" },
+  { "es_CO", "ISO-8859-1" },
+  { "es_CR", "ISO-8859-1" },
+  { "es_CU", "UTF-8" },
+  { "es_DO", "ISO-8859-1" },
+  { "es_EC", "ISO-8859-1" },
+  { "es_ES", "ISO-8859-1" },
+  { "es_ES@euro", "ISO-8859-15" },
+  { "es_GT", "ISO-8859-1" },
+  { "es_HN", "ISO-8859-1" },
+  { "es_MX", "ISO-8859-1" },
+  { "es_NI", "ISO-8859-1" },
+  { "es_PA", "ISO-8859-1" },
+  { "es_PE", "ISO-8859-1" },
+  { "es_PR", "ISO-8859-1" },
+  { "es_PY", "ISO-8859-1" },
+  { "es_SV", "ISO-8859-1" },
+  { "es_US", "ISO-8859-1" },
+  { "es_UY", "ISO-8859-1" },
+  { "es_VE", "ISO-8859-1" },
+  { "et_EE", "ISO-8859-1" },
+  { "eu_ES", "ISO-8859-1" },
+  { "eu_ES@euro", "ISO-8859-15" },
+  { "fa_IR", "UTF-8" },
+  { "ff_SN", "UTF-8" },
+  { "fi_FI", "ISO-8859-1" },
+  { "fi_FI@euro", "ISO-8859-15" },
+  { "fil_PH", "UTF-8" },
+  { "fo_FO", "ISO-8859-1" },
+  { "fr_BE", "ISO-8859-1" },
+  { "fr_BE@euro", "ISO-8859-15" },
+  { "fr_CA", "ISO-8859-1" },
+  { "fr_CH", "ISO-8859-1" },
+  { "fr_FR", "ISO-8859-1" },
+  { "fr_FR@euro", "ISO-8859-15" },
+  { "fr_LU", "ISO-8859-1" },
+  { "fr_LU@euro", "ISO-8859-15" },
+  { "fur_IT", "UTF-8" },
+  { "fy_DE", "UTF-8" },
+  { "fy_NL", "UTF-8" },
+  { "ga_IE", "ISO-8859-1" },
+  { "ga_IE@euro", "ISO-8859-15" },
+  { "gd_GB", "ISO-8859-15" },
+  { "gez_ER", "UTF-8" },
+  { "gez_ER@abegede", "UTF-8" },
+  { "gez_ET", "UTF-8" },
+  { "gez_ET@abegede", "UTF-8" },
+  { "gl_ES", "ISO-8859-1" },
+  { "gl_ES@euro", "ISO-8859-15" },
+  { "gu_IN", "UTF-8" },
+  { "gv_GB", "ISO-8859-1" },
+  { "ha_NG", "UTF-8" },
+  { "hak_TW", "UTF-8" },
+  { "he_IL", "ISO-8859-8" },
+  { "hi_IN", "UTF-8" },
+  { "hif_FJ", "UTF-8" },
+  { "hne_IN", "UTF-8" },
+  { "hr_HR", "ISO-8859-2" },
+  { "hsb_DE", "ISO-8859-2" },
+  { "ht_HT", "UTF-8" },
+  { "hu_HU", "ISO-8859-2" },
+  { "hy_AM", "UTF-8" },
+  { "ia_FR", "UTF-8" },
+  { "id_ID", "ISO-8859-1" },
+  { "ig_NG", "UTF-8" },
+  { "ik_CA", "UTF-8" },
+  { "is_IS", "ISO-8859-1" },
+  { "it_CH", "ISO-8859-1" },
+  { "it_IT", "ISO-8859-1" },
+  { "it_IT@euro", "ISO-8859-15" },
+  { "iu_CA", "UTF-8" },
+  { "ja_JP", "EUC-JP" },
+  { "ka_GE", "GEORGIAN-PS" },
+  { "kab_DZ", "UTF-8" },
+  { "kk_KZ", "PT154" },
+  { "kl_GL", "ISO-8859-1" },
+  { "km_KH", "UTF-8" },
+  { "kn_IN", "UTF-8" },
+  { "ko_KR", "EUC-KR" },
+  { "kok_IN", "UTF-8" },
+  { "ks_IN", "UTF-8" },
+  { "ks_IN@devanagar", "UTF-8" },
+  { "ku_TR", "ISO-8859-9" },
+  { "kw_GB", "ISO-8859-1" },
+  { "ky_KG", "UTF-8" },
+  { "lb_LU", "UTF-8" },
+  { "lg_UG", "ISO-8859-10" },
+  { "li_BE", "UTF-8" },
+  { "li_NL", "UTF-8" },
+  { "lij_IT", "UTF-8" },
+  { "ln_CD", "UTF-8" },
+  { "lo_LA", "UTF-8" },
+  { "lt_LT", "ISO-8859-13" },
+  { "lv_LV", "ISO-8859-13" },
+  { "lzh_TW", "UTF-8" },
+  { "mag_IN", "UTF-8" },
+  { "mai_IN", "UTF-8" },
+  { "mai_NP", "UTF-8" },
+  { "mfe_MU", "UTF-8" },
+  { "mg_MG", "ISO-8859-15" },
+  { "mhr_RU", "UTF-8" },
+  { "mi_NZ", "ISO-8859-13" },
+  { "miq_NI", "UTF-8" },
+  { "mjw_IN", "UTF-8" },
+  { "mk_MK", "ISO-8859-5" },
+  { "ml_IN", "UTF-8" },
+  { "mn_MN", "UTF-8" },
+  { "mni_IN", "UTF-8" },
+  { "mnw_MM", "UTF-8" },
+  { "mr_IN", "UTF-8" },
+  { "ms_MY", "ISO-8859-1" },
+  { "mt_MT", "ISO-8859-3" },
+  { "my_MM", "UTF-8" },
+  { "nan_TW", "UTF-8" },
+  { "nan_TW@latin", "UTF-8" },
+  { "nb_NO", "ISO-8859-1" },
+  { "nds_DE", "UTF-8" },
+  { "nds_NL", "UTF-8" },
+  { "ne_NP", "UTF-8" },
+  { "nhn_MX", "UTF-8" },
+  { "niu_NU", "UTF-8" },
+  { "niu_NZ", "UTF-8" },
+  { "nl_AW", "UTF-8" },
+  { "nl_BE", "ISO-8859-1" },
+  { "nl_BE@euro", "ISO-8859-15" },
+  { "nl_NL", "ISO-8859-1" },
+  { "nl_NL@euro", "ISO-8859-15" },
+  { "nn_NO", "ISO-8859-1" },
+  { "no_NO", "ISO-8859-1" },
+  { "nr_ZA", "UTF-8" },
+  { "nso_ZA", "UTF-8" },
+  { "oc_FR", "ISO-8859-1" },
+  { "om_ET", "UTF-8" },
+  { "om_KE", "ISO-8859-1" },
+  { "or_IN", "UTF-8" },
+  { "os_RU", "UTF-8" },
+  { "pa_IN", "UTF-8" },
+  { "pa_PK", "UTF-8" },
+  { "pap_AW", "UTF-8" },
+  { "pap_CW", "UTF-8" },
+  { "pl_PL", "ISO-8859-2" },
+  { "ps_AF", "UTF-8" },
+  { "pt_BR", "ISO-8859-1" },
+  { "pt_PT", "ISO-8859-1" },
+  { "pt_PT@euro", "ISO-8859-15" },
+  { "quz_PE", "UTF-8" },
+  { "raj_IN", "UTF-8" },
+  { "rif_MA", "UTF-8" },
+  { "ro_RO", "ISO-8859-2" },
+  { "ru_RU", "ISO-8859-5" },
+  { "ru_UA", "KOI8-U" },
+  { "rw_RW", "UTF-8" },
+  { "sa_IN", "UTF-8" },
+  { "sah_RU", "UTF-8" },
+  { "sat_IN", "UTF-8" },
+  { "sc_IT", "UTF-8" },
+  { "sd_IN", "UTF-8" },
+  { "sd_IN@devanagar", "UTF-8" },
+  { "se_NO", "UTF-8" },
+  { "sgs_LT", "UTF-8" },
+  { "shn_MM", "UTF-8" },
+  { "shs_CA", "UTF-8" },
+  { "si_LK", "UTF-8" },
+  { "sid_ET", "UTF-8" },
+  { "sk_SK", "ISO-8859-2" },
+  { "sl_SI", "ISO-8859-2" },
+  { "sm_WS", "UTF-8" },
+  { "so_DJ", "ISO-8859-1" },
+  { "so_ET", "UTF-8" },
+  { "so_KE", "ISO-8859-1" },
+  { "so_SO", "ISO-8859-1" },
+  { "sq_AL", "ISO-8859-1" },
+  { "sq_MK", "UTF-8" },
+  { "sr_ME", "UTF-8" },
+  { "sr_RS", "UTF-8" },
+  { "sr_RS@latin", "UTF-8" },
+  { "ss_ZA", "UTF-8" },
+  { "st_ZA", "ISO-8859-1" },
+  { "sv_FI", "ISO-8859-1" },
+  { "sv_FI@euro", "ISO-8859-15" },
+  { "sv_SE", "ISO-8859-1" },
+  { "sw_KE", "UTF-8" },
+  { "sw_TZ", "UTF-8" },
+  { "szl_PL", "UTF-8" },
+  { "ta_IN", "UTF-8" },
+  { "ta_LK", "UTF-8" },
+  { "te_IN", "UTF-8" },
+  { "tg_TJ", "KOI8-T" },
+  { "th_TH", "TIS-620" },
+  { "the_NP", "UTF-8" },
+  { "ti_ER", "UTF-8" },
+  { "ti_ET", "UTF-8" },
+  { "tig_ER", "UTF-8" },
+  { "tk_TM", "UTF-8" },
+  { "tl_PH", "ISO-8859-1" },
+  { "tn_ZA", "UTF-8" },
+  { "to_TO", "UTF-8" },
+  { "tpi_PG", "UTF-8" },
+  { "tr_CY", "ISO-8859-9" },
+  { "tr_TR", "ISO-8859-9" },
+  { "ts_ZA", "UTF-8" },
+  { "tt_RU", "UTF-8" },
+  { "tt_RU@iqtelif", "UTF-8" },
+  { "ug_CN", "UTF-8" },
+  { "uk_UA", "KOI8-U" },
+  { "unm_US", "UTF-8" },
+  { "ur_IN", "UTF-8" },
+  { "ur_PK", "UTF-8" },
+  { "uz_UZ", "ISO-8859-1" },
+  { "uz_UZ@cyrillic", "UTF-8" },
+  { "ve_ZA", "UTF-8" },
+  { "vi_VN", "UTF-8" },
+  { "wa_BE", "ISO-8859-1" },
+  { "wa_BE@euro", "ISO-8859-15" },
+  { "wae_CH", "UTF-8" },
+  { "wal_ET", "UTF-8" },
+  { "wo_SN", "UTF-8" },
+  { "xh_ZA", "ISO-8859-1" },
+  { "yi_US", "CP1255" },
+  { "yo_NG", "UTF-8" },
+  { "yue_HK", "UTF-8" },
+  { "yuw_PG", "UTF-8" },
+  { "zh_CN", "GB2312" },
+  { "zh_HK", "BIG5" },
+  { "zh_SG", "GB2312" },
+  { "zh_TW", "BIG5" },
+  { "zu_ZA", "ISO-8859-1" },
+};
diff --git a/winsup/cygwin/nlsfuncs.cc b/winsup/cygwin/nlsfuncs.cc
index c2f9c08c9fe1..8257c23b2f78 100644
--- a/winsup/cygwin/nlsfuncs.cc
+++ b/winsup/cygwin/nlsfuncs.cc
@@ -21,6 +21,7 @@ details. */
 #include "lc_msg.h"
 #include "lc_era.h"
 #include "lc_collelem.h"
+#include "lc_def_codesets.h"
 
 #define _LC(x)	&lc_##x##_ptr,lc_##x##_end-lc_##x##_ptr
 
@@ -474,7 +475,8 @@ __set_lc_time_from_win (const char *name,
 	 locale. */
       lc_era_t locale_key = { locale, NULL, NULL, NULL, NULL, NULL ,
 				      NULL, NULL, NULL, NULL, NULL };
-      lc_era_t *era = (lc_era_t *) bsearch ((void *) &locale_key, (void *) lc_era,
+      lc_era_t *era = (lc_era_t *) bsearch ((void *) &locale_key,
+					    (void *) lc_era,
 					    sizeof lc_era / sizeof *lc_era,
 					    sizeof *lc_era, locale_cmp);
 
@@ -1455,34 +1457,42 @@ strxfrm (char *__restrict s1, const char *__restrict s2, size_t sn)
 /* Fetch default ANSI codepage from locale info and generate a setlocale
    compatible character set code.  Called from newlib's setlocale(), if the
    charset isn't given explicitely in the POSIX compatible locale specifier. */
-
-/* FIXME: Check all locales against their Linux counterpart again and
-	  make sure the codeset conversion is correct.
-   FIXME: Perhaps, convert to locale names only.
-   FIXME: Perhaps, maintain a sorted list of Linux locales and their
-	  default codesets. */
 extern "C" void
-__set_charset_from_locale (const char *locale, char *charset)
+__set_charset_from_locale (const char *loc, char *charset)
 {
-  UINT cp;
   wchar_t win_locale[ENCODING_LEN + 1];
+  char locale[ENCODING_LEN + 1];
+  const char *modifier;
+  char *c;
+  UINT cp;
 
-  int ret = __get_rfc5646_from_locale (locale, win_locale);
-  wchar_t wbuf[9];
+  /* Cut out explicit codeset */
+  stpcpy (locale, loc);
+  modifier = strchr (loc, '@');
+  if ((c = strchr (locale, '.')))
+    stpcpy (c, modifier ?: "");
+
+  default_codeset_t srch_dc = { locale, NULL };
+  default_codeset_t *dc = (default_codeset_t *)
+	 bsearch ((void *) &srch_dc, (void *) default_codeset,
+		  sizeof default_codeset / sizeof *default_codeset,
+		  sizeof *default_codeset, locale_cmp);
+  if (dc)
+    {
+      stpcpy (charset, dc->codeset);
+      return;
+    }
 
   /* "C" locale, or invalid locale? */
-  if (ret <= 0)
+  if (__get_rfc5646_from_locale (locale, win_locale) <= 0)
     cp = 20127;
-  else if (!GetLocaleInfoEx (win_locale,
+  else if (GetLocaleInfoEx (win_locale,
 			    LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER,
 			    (PWCHAR) &cp, sizeof cp))
     cp = 0;
-  /* For simplicity, we still convert to LCID here. */
-  LCID lcid = LocaleNameToLCID (win_locale, 0);
   /* Translate codepage and lcid to a charset closely aligned with the default
      charsets defined in Glibc. */
   const char *cs;
-  const char *modifier = strchr (locale, '@') ?: "";
   switch (cp)
     {
     case 20127:
@@ -1504,131 +1514,33 @@ __set_charset_from_locale (const char *locale, char *charset)
       cs = "BIG5";
       break;
     case 1250:
-      if (lcid == 0x181a		/* sr_BA (Serbian/Bosnia
-						  and Herzegovina) */
-	  || lcid == 0x241a		/* sr_RS (Serbian/Serbia) */
-	  || lcid == 0x2c1a		/* sr_ME (Serbian/Montenegro)*/
-	  || lcid == 0x0442		/* tk_TM (Turkmen/Turkmenistan) */
-	  || !wcscmp (win_locale, L"sr-Latn-XK")) /* (Serbian/Kosovo) */
-	cs = "UTF-8";
-      else if (lcid == 0x041c)		/* sq_AL (Albanian/Albania) */
-	cs = "ISO-8859-1";
-      else
-	cs = "ISO-8859-2";
+      cs = "ISO-8859-2";
       break;
     case 1251:
-      if (lcid == 0x1c1a		/* sr_BA (Serbian Language/Bosnia
-						  and Herzegovina) */
-	  || lcid == 0x281a		/* sr_RS (Serbian Language/Serbia) */
-	  || lcid == 0x301a		/* sr_ME (Serbian Language/Montenegro)*/
-	  || lcid == 0x0440		/* ky_KG (Kyrgyz/Kyrgyzstan) */
-	  || lcid == 0x082c		/* az_AZ@cyrillic (Azerbaijani/Azerbaijan) */
-	  || lcid == 0x0843		/* uz_UZ (Uzbek/Uzbekistan) */
-					/* tt_RU (Tatar/Russia),
-						 IQTElif alphabet */
-	  || (lcid == 0x0444 && has_modifier ("@iqtelif"))
-	  || lcid == 0x0450		/* mn_MN (Mongolian/Mongolia) */
-	  || !wcscmp (win_locale, L"sr-Cyrl-XK")) /* (Serbian/Kosovo) */
-	cs = "UTF-8";
-      else if (lcid == 0x0423)		/* be_BY (Belarusian/Belarus) */
-	cs = has_modifier ("@latin") ? "UTF-8" : "CP1251";
-      else if (lcid == 0x0402		/* bg_BG (Bulgarian/Bulgaria) */
-	       || lcid == 0x0423)	/* be_BY (Belarusian/Belarus) */
-	cs = "CP1251";
-      else if (lcid == 0x0422		/* uk_UA (Ukrainian/Ukraine) */
-	      || !wcscmp (win_locale, L"ru-UA")) /* (Russian/Ukraine) */
-	cs = "KOI8-U";
-      else if (lcid == 0x0428)		/* tg_TJ (Tajik/Tajikistan) */
-	cs = "KOI8-T";
-      else
-	cs = "ISO-8859-5";
+      cs = "ISO-8859-5";
       break;
     case 1252:
-      if (lcid == 0x0452)		/* cy_GB (Welsh/Great Britain) */
-	cs = "ISO-8859-14";
-      else if (lcid == 0x4009		/* en_IN (English/India) */
-	       || lcid == 0x0867	/* ff_SN (Fulah/Senegal) */
-	       || lcid == 0x0464	/* fil_PH (Filipino/Philippines) */
-	       || lcid == 0x0462	/* fy_NL (Frisian/Netherlands) */
-	       || lcid == 0x0468	/* ha_NG (Hausa/Nigeria) */
-	       || lcid == 0x0475	/* haw_US (Hawaiian/United States) */
-	       || lcid == 0x0470	/* ig_NG (Igbo/Nigeria) */
-	       || lcid == 0x085d	/* iu_CA (Inuktitut/Canada) */
-	       || lcid == 0x046c	/* nso_ZA (Northern Sotho/South Africa) */
-	       || lcid == 0x0487	/* rw_RW (Kinyarwanda/Rwanda) */
-	       || lcid == 0x043b	/* se_NO (Northern Saami/Norway) */
-	       || lcid == 0x0832	/* tn_BW (Tswana/Botswana) */
-	       || lcid == 0x0432	/* tn_ZA (Tswana/South Africa) */
-	       || lcid == 0x0488	/* wo_SN (Wolof/Senegal) */
-	       || lcid == 0x046a	/* yo_NG (Yoruba/Nigeria) */
-	       || lcid == 0x085f)	/* ber_DZ (Tamazight/Algeria) */
-	cs = "UTF-8";
-      else if (lcid == 0x042e)		/* hsb_DE (Upper Sorbian/Germany) */
-	cs = "ISO-8859-2";
-      else if (lcid == 0x0491		/* gd_GB (Scots Gaelic/Great Britain) */
-	       || (has_modifier ("@euro")
-		   && GetLocaleInfoW (lcid, LOCALE_SINTLSYMBOL, wbuf, 9)
-		   && !wcsncmp (wbuf, L"EUR", 3)))
-	cs = "ISO-8859-15";
-      else
-	cs = "ISO-8859-1";
+      cs = "ISO-8859-1";
       break;
     case 1253:
       cs = "ISO-8859-7";
       break;
     case 1254:
-      if (lcid == 0x042c)		/* az_AZ (Azeri/Azerbaijan) */
-	cs = "UTF-8";
-      else if (lcid == 0x0443)		/* uz_UZ (Uzbek/Uzbekistan) */
-	cs = "ISO-8859-1";
-      else
-	cs = "ISO-8859-9";
+      cs = "ISO-8859-9";
       break;
     case 1255:
       cs = "ISO-8859-8";
       break;
     case 1256:
-      if (lcid == 0x0429		/* fa_IR (Persian/Iran) */
-	  || lcid == 0x0846		/* pa_PK (Punjabi/Pakistan) */
-	  || lcid == 0x0859		/* sd_PK (Sindhi/Pakistan) */
-	  || lcid == 0x0480		/* ug_CN (Uyghur/China) */
-	  || lcid == 0x0420)		/* ur_PK (Urdu/Pakistan) */
-	cs = "UTF-8";
-      else
-	cs = "ISO-8859-6";
+      cs = "ISO-8859-6";
       break;
     case 1257:
-      if (lcid == 0x0425)		/* et_EE (Estonian/Estonia) */
-	cs = "ISO-8859-15";
-      else
-	cs = "ISO-8859-13";
+      cs = "ISO-8859-13";
       break;
     case 1258:
     default:
-      if (lcid == 0x3c09		/* en_HK (English/Hong Kong) */
-	  || lcid == 0x200c		/* fr_RE (French/Réunion) */
-	  || lcid == 0x240c		/* fr_CD (French/Congo) */
-	  || lcid == 0x280c		/* fr_SN (French/Senegal) */
-	  || lcid == 0x2c0c		/* fr_CM (French/Cameroon) */
-	  || lcid == 0x300c		/* fr_CI (French/Ivory Coast) */
-	  || lcid == 0x340c		/* fr_ML (French/Mali) */
-	  || lcid == 0x380c		/* fr_MA (French/Morocco) */
-	  || lcid == 0x3c0c		/* fr_HT (French/Haiti) */
-	  || lcid == 0x0477		/* so_SO (Somali/Somali) */
-	  || lcid == 0x0430)		/* st_ZA (Sotho/South Africa) */
-	cs = "ISO-8859-1";
-      else if (lcid == 0x818)		/* ro_MD (Romanian/Moldovia) */
-	cs = "ISO-8859-2";
-      else if (lcid == 0x043a)		/* mt_MT (Maltese/Malta) */
-	cs = "ISO-8859-3";
-      else if (lcid == 0x0481)		/* mi_NZ (Maori/New Zealand) */
-	cs = "ISO-8859-13";
-      else if (lcid == 0x0437)		/* ka_GE (Georgian/Georgia) */
-	cs = "GEORGIAN-PS";
-      else if (lcid == 0x043f)		/* kk_KZ (Kazakh/Kazakhstan) */
-	cs = "PT154";
-      else
-	cs = "UTF-8";
+      cs = "UTF-8";
+      break;
     }
   stpcpy (charset, cs);
 }

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-02-25 15:25 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-02-25 15:25 [newlib-cygwin/main] Cygwin: locale: Set default charset from Linux locale -> codeset mapping Corinna Vinschen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).