From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2155) id 713683857B9B; Sat, 25 Feb 2023 15:25:09 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 713683857B9B DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=sourceware.org; s=default; t=1677338709; bh=lqanny5Wo2GKUOSc/XtwC+4O3EE/GET5Rq5thOuQ0x8=; h=From:To:Subject:Date:From; b=sWLqRR6t5s9EY4UHxijtJ2GuNE2mnAEvPVH6gkkG4cwYwQiK2Pmj3FgGqFpNptrID BuMZwK8LSGN/Hes5NMkSW9shmPQtEyJOl1wxvtumti2i8dYZ2DmR54NVAcED/sXECS u+LsBzopR/q/GTFMiAH2N7bic3f1GEcPwiipH/0c= Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable From: Corinna Vinschen To: cygwin-cvs@sourceware.org Subject: [newlib-cygwin/main] Cygwin: locale: Set default charset from Linux locale -> codeset mapping X-Act-Checkin: newlib-cygwin X-Git-Author: Corinna Vinschen X-Git-Refname: refs/heads/main X-Git-Oldrev: 7b591704b886ab60a6c31363bd776acafb32ed09 X-Git-Newrev: 2483e54be852e66d1a96a0da2af8324b51ef8b65 Message-Id: <20230225152509.713683857B9B@sourceware.org> Date: Sat, 25 Feb 2023 15:25:09 +0000 (GMT) List-Id: https://sourceware.org/git/gitweb.cgi?p=3Dnewlib-cygwin.git;h=3D2483e54be85= 2e66d1a96a0da2af8324b51ef8b65 commit 2483e54be852e66d1a96a0da2af8324b51ef8b65 Author: Corinna Vinschen AuthorDate: Sat Feb 25 16:11:54 2023 +0100 Commit: Corinna Vinschen CommitDate: Sat Feb 25 16:12:51 2023 +0100 Cygwin: locale: Set default charset from Linux locale -> codeset mapping =20 Generate lc_def_codeset.h header containing the default mapping from locale to codeset on Linux. Use this mapping in __set_charset_from_loc= ale in the first place. =20 For every locale not covered by this table, just map Windows codepages to equivalent codesets used on Linux/Unix, getting rid of LCIDs entirel= y. =20 Signed-off-by: Corinna Vinschen Diff: --- winsup/cygwin/local_includes/lc_def_codesets.h | 342 +++++++++++++++++++++= ++++ winsup/cygwin/nlsfuncs.cc | 156 +++-------- 2 files changed, 376 insertions(+), 122 deletions(-) diff --git a/winsup/cygwin/local_includes/lc_def_codesets.h b/winsup/cygwin= /local_includes/lc_def_codesets.h new file mode 100644 index 000000000000..9c7230391e40 --- /dev/null +++ b/winsup/cygwin/local_includes/lc_def_codesets.h @@ -0,0 +1,342 @@ +/* This struct of default codesets has been generated by fetching + locale data from a Linux system using glibc-2.36-9.fc37.x86_64 on 2023-= 02-25 */ +struct default_codeset_t +{ + const char *locale; + const char *codeset; +} default_codeset[] =3D +{ + { "aa_DJ", "ISO-8859-1" }, + { "aa_ER", "UTF-8" }, + { "aa_ER@saaho", "UTF-8" }, + { "aa_ET", "UTF-8" }, + { "af_ZA", "ISO-8859-1" }, + { "agr_PE", "UTF-8" }, + { "ak_GH", "UTF-8" }, + { "am_ET", "UTF-8" }, + { "an_ES", "ISO-8859-15" }, + { "anp_IN", "UTF-8" }, + { "ar_AE", "ISO-8859-6" }, + { "ar_BH", "ISO-8859-6" }, + { "ar_DZ", "ISO-8859-6" }, + { "ar_EG", "ISO-8859-6" }, + { "ar_IN", "UTF-8" }, + { "ar_IQ", "ISO-8859-6" }, + { "ar_JO", "ISO-8859-6" }, + { "ar_KW", "ISO-8859-6" }, + { "ar_LB", "ISO-8859-6" }, + { "ar_LY", "ISO-8859-6" }, + { "ar_MA", "ISO-8859-6" }, + { "ar_OM", "ISO-8859-6" }, + { "ar_QA", "ISO-8859-6" }, + { "ar_SA", "ISO-8859-6" }, + { "ar_SD", "ISO-8859-6" }, + { "ar_SS", "UTF-8" }, + { "ar_SY", "ISO-8859-6" }, + { "ar_TN", "ISO-8859-6" }, + { "ar_YE", "ISO-8859-6" }, + { "as_IN", "UTF-8" }, + { "ast_ES", "ISO-8859-15" }, + { "ayc_PE", "UTF-8" }, + { "az_AZ", "UTF-8" }, + { "az_IR", "UTF-8" }, + { "be_BY", "CP1251" }, + { "be_BY@latin", "UTF-8" }, + { "bem_ZM", "UTF-8" }, + { "ber_DZ", "UTF-8" }, + { "ber_MA", "UTF-8" }, + { "bg_BG", "CP1251" }, + { "bho_IN", "UTF-8" }, + { "bho_NP", "UTF-8" }, + { "bi_VU", "UTF-8" }, + { "bn_BD", "UTF-8" }, + { "bn_IN", "UTF-8" }, + { "bo_CN", "UTF-8" }, + { "bo_IN", "UTF-8" }, + { "br_FR", "ISO-8859-1" }, + { "br_FR@euro", "ISO-8859-15" }, + { "brx_IN", "UTF-8" }, + { "bs_BA", "ISO-8859-2" }, + { "byn_ER", "UTF-8" }, + { "ca_AD", "ISO-8859-15" }, + { "ca_ES", "ISO-8859-1" }, + { "ca_ES@euro", "ISO-8859-15" }, + { "ca_ES@valencia", "UTF-8" }, + { "ca_FR", "ISO-8859-15" }, + { "ca_IT", "ISO-8859-15" }, + { "ce_RU", "UTF-8" }, + { "chr_US", "UTF-8" }, + { "ckb_IQ", "UTF-8" }, + { "cmn_TW", "UTF-8" }, + { "crh_UA", "UTF-8" }, + { "cs_CZ", "ISO-8859-2" }, + { "csb_PL", "UTF-8" }, + { "cv_RU", "UTF-8" }, + { "cy_GB", "ISO-8859-14" }, + { "da_DK", "ISO-8859-1" }, + { "de_AT", "ISO-8859-1" }, + { "de_AT@euro", "ISO-8859-15" }, + { "de_BE", "ISO-8859-1" }, + { "de_BE@euro", "ISO-8859-15" }, + { "de_CH", "ISO-8859-1" }, + { "de_DE", "ISO-8859-1" }, + { "de_DE@euro", "ISO-8859-15" }, + { "de_IT", "ISO-8859-1" }, + { "de_LU", "ISO-8859-1" }, + { "de_LU@euro", "ISO-8859-15" }, + { "doi_IN", "UTF-8" }, + { "dsb_DE", "UTF-8" }, + { "dv_MV", "UTF-8" }, + { "dz_BT", "UTF-8" }, + { "el_CY", "ISO-8859-7" }, + { "el_GR", "ISO-8859-7" }, + { "el_GR@euro", "ISO-8859-7" }, + { "en_AG", "UTF-8" }, + { "en_AU", "ISO-8859-1" }, + { "en_BW", "ISO-8859-1" }, + { "en_CA", "ISO-8859-1" }, + { "en_DK", "ISO-8859-1" }, + { "en_GB", "ISO-8859-1" }, + { "en_HK", "ISO-8859-1" }, + { "en_IE", "ISO-8859-1" }, + { "en_IE@euro", "ISO-8859-15" }, + { "en_IL", "UTF-8" }, + { "en_IN", "UTF-8" }, + { "en_NG", "UTF-8" }, + { "en_NZ", "ISO-8859-1" }, + { "en_PH", "ISO-8859-1" }, + { "en_SG", "ISO-8859-1" }, + { "en_US", "ISO-8859-1" }, + { "en_ZA", "ISO-8859-1" }, + { "en_ZM", "UTF-8" }, + { "en_ZW", "ISO-8859-1" }, + { "es_AR", "ISO-8859-1" }, + { "es_BO", "ISO-8859-1" }, + { "es_CL", "ISO-8859-1" }, + { "es_CO", "ISO-8859-1" }, + { "es_CR", "ISO-8859-1" }, + { "es_CU", "UTF-8" }, + { "es_DO", "ISO-8859-1" }, + { "es_EC", "ISO-8859-1" }, + { "es_ES", "ISO-8859-1" }, + { "es_ES@euro", "ISO-8859-15" }, + { "es_GT", "ISO-8859-1" }, + { "es_HN", "ISO-8859-1" }, + { "es_MX", "ISO-8859-1" }, + { "es_NI", "ISO-8859-1" }, + { "es_PA", "ISO-8859-1" }, + { "es_PE", "ISO-8859-1" }, + { "es_PR", "ISO-8859-1" }, + { "es_PY", "ISO-8859-1" }, + { "es_SV", "ISO-8859-1" }, + { "es_US", "ISO-8859-1" }, + { "es_UY", "ISO-8859-1" }, + { "es_VE", "ISO-8859-1" }, + { "et_EE", "ISO-8859-1" }, + { "eu_ES", "ISO-8859-1" }, + { "eu_ES@euro", "ISO-8859-15" }, + { "fa_IR", "UTF-8" }, + { "ff_SN", "UTF-8" }, + { "fi_FI", "ISO-8859-1" }, + { "fi_FI@euro", "ISO-8859-15" }, + { "fil_PH", "UTF-8" }, + { "fo_FO", "ISO-8859-1" }, + { "fr_BE", "ISO-8859-1" }, + { "fr_BE@euro", "ISO-8859-15" }, + { "fr_CA", "ISO-8859-1" }, + { "fr_CH", "ISO-8859-1" }, + { "fr_FR", "ISO-8859-1" }, + { "fr_FR@euro", "ISO-8859-15" }, + { "fr_LU", "ISO-8859-1" }, + { "fr_LU@euro", "ISO-8859-15" }, + { "fur_IT", "UTF-8" }, + { "fy_DE", "UTF-8" }, + { "fy_NL", "UTF-8" }, + { "ga_IE", "ISO-8859-1" }, + { "ga_IE@euro", "ISO-8859-15" }, + { "gd_GB", "ISO-8859-15" }, + { "gez_ER", "UTF-8" }, + { "gez_ER@abegede", "UTF-8" }, + { "gez_ET", "UTF-8" }, + { "gez_ET@abegede", "UTF-8" }, + { "gl_ES", "ISO-8859-1" }, + { "gl_ES@euro", "ISO-8859-15" }, + { "gu_IN", "UTF-8" }, + { "gv_GB", "ISO-8859-1" }, + { "ha_NG", "UTF-8" }, + { "hak_TW", "UTF-8" }, + { "he_IL", "ISO-8859-8" }, + { "hi_IN", "UTF-8" }, + { "hif_FJ", "UTF-8" }, + { "hne_IN", "UTF-8" }, + { "hr_HR", "ISO-8859-2" }, + { "hsb_DE", "ISO-8859-2" }, + { "ht_HT", "UTF-8" }, + { "hu_HU", "ISO-8859-2" }, + { "hy_AM", "UTF-8" }, + { "ia_FR", "UTF-8" }, + { "id_ID", "ISO-8859-1" }, + { "ig_NG", "UTF-8" }, + { "ik_CA", "UTF-8" }, + { "is_IS", "ISO-8859-1" }, + { "it_CH", "ISO-8859-1" }, + { "it_IT", "ISO-8859-1" }, + { "it_IT@euro", "ISO-8859-15" }, + { "iu_CA", "UTF-8" }, + { "ja_JP", "EUC-JP" }, + { "ka_GE", "GEORGIAN-PS" }, + { "kab_DZ", "UTF-8" }, + { "kk_KZ", "PT154" }, + { "kl_GL", "ISO-8859-1" }, + { "km_KH", "UTF-8" }, + { "kn_IN", "UTF-8" }, + { "ko_KR", "EUC-KR" }, + { "kok_IN", "UTF-8" }, + { "ks_IN", "UTF-8" }, + { "ks_IN@devanagar", "UTF-8" }, + { "ku_TR", "ISO-8859-9" }, + { "kw_GB", "ISO-8859-1" }, + { "ky_KG", "UTF-8" }, + { "lb_LU", "UTF-8" }, + { "lg_UG", "ISO-8859-10" }, + { "li_BE", "UTF-8" }, + { "li_NL", "UTF-8" }, + { "lij_IT", "UTF-8" }, + { "ln_CD", "UTF-8" }, + { "lo_LA", "UTF-8" }, + { "lt_LT", "ISO-8859-13" }, + { "lv_LV", "ISO-8859-13" }, + { "lzh_TW", "UTF-8" }, + { "mag_IN", "UTF-8" }, + { "mai_IN", "UTF-8" }, + { "mai_NP", "UTF-8" }, + { "mfe_MU", "UTF-8" }, + { "mg_MG", "ISO-8859-15" }, + { "mhr_RU", "UTF-8" }, + { "mi_NZ", "ISO-8859-13" }, + { "miq_NI", "UTF-8" }, + { "mjw_IN", "UTF-8" }, + { "mk_MK", "ISO-8859-5" }, + { "ml_IN", "UTF-8" }, + { "mn_MN", "UTF-8" }, + { "mni_IN", "UTF-8" }, + { "mnw_MM", "UTF-8" }, + { "mr_IN", "UTF-8" }, + { "ms_MY", "ISO-8859-1" }, + { "mt_MT", "ISO-8859-3" }, + { "my_MM", "UTF-8" }, + { "nan_TW", "UTF-8" }, + { "nan_TW@latin", "UTF-8" }, + { "nb_NO", "ISO-8859-1" }, + { "nds_DE", "UTF-8" }, + { "nds_NL", "UTF-8" }, + { "ne_NP", "UTF-8" }, + { "nhn_MX", "UTF-8" }, + { "niu_NU", "UTF-8" }, + { "niu_NZ", "UTF-8" }, + { "nl_AW", "UTF-8" }, + { "nl_BE", "ISO-8859-1" }, + { "nl_BE@euro", "ISO-8859-15" }, + { "nl_NL", "ISO-8859-1" }, + { "nl_NL@euro", "ISO-8859-15" }, + { "nn_NO", "ISO-8859-1" }, + { "no_NO", "ISO-8859-1" }, + { "nr_ZA", "UTF-8" }, + { "nso_ZA", "UTF-8" }, + { "oc_FR", "ISO-8859-1" }, + { "om_ET", "UTF-8" }, + { "om_KE", "ISO-8859-1" }, + { "or_IN", "UTF-8" }, + { "os_RU", "UTF-8" }, + { "pa_IN", "UTF-8" }, + { "pa_PK", "UTF-8" }, + { "pap_AW", "UTF-8" }, + { "pap_CW", "UTF-8" }, + { "pl_PL", "ISO-8859-2" }, + { "ps_AF", "UTF-8" }, + { "pt_BR", "ISO-8859-1" }, + { "pt_PT", "ISO-8859-1" }, + { "pt_PT@euro", "ISO-8859-15" }, + { "quz_PE", "UTF-8" }, + { "raj_IN", "UTF-8" }, + { "rif_MA", "UTF-8" }, + { "ro_RO", "ISO-8859-2" }, + { "ru_RU", "ISO-8859-5" }, + { "ru_UA", "KOI8-U" }, + { "rw_RW", "UTF-8" }, + { "sa_IN", "UTF-8" }, + { "sah_RU", "UTF-8" }, + { "sat_IN", "UTF-8" }, + { "sc_IT", "UTF-8" }, + { "sd_IN", "UTF-8" }, + { "sd_IN@devanagar", "UTF-8" }, + { "se_NO", "UTF-8" }, + { "sgs_LT", "UTF-8" }, + { "shn_MM", "UTF-8" }, + { "shs_CA", "UTF-8" }, + { "si_LK", "UTF-8" }, + { "sid_ET", "UTF-8" }, + { "sk_SK", "ISO-8859-2" }, + { "sl_SI", "ISO-8859-2" }, + { "sm_WS", "UTF-8" }, + { "so_DJ", "ISO-8859-1" }, + { "so_ET", "UTF-8" }, + { "so_KE", "ISO-8859-1" }, + { "so_SO", "ISO-8859-1" }, + { "sq_AL", "ISO-8859-1" }, + { "sq_MK", "UTF-8" }, + { "sr_ME", "UTF-8" }, + { "sr_RS", "UTF-8" }, + { "sr_RS@latin", "UTF-8" }, + { "ss_ZA", "UTF-8" }, + { "st_ZA", "ISO-8859-1" }, + { "sv_FI", "ISO-8859-1" }, + { "sv_FI@euro", "ISO-8859-15" }, + { "sv_SE", "ISO-8859-1" }, + { "sw_KE", "UTF-8" }, + { "sw_TZ", "UTF-8" }, + { "szl_PL", "UTF-8" }, + { "ta_IN", "UTF-8" }, + { "ta_LK", "UTF-8" }, + { "te_IN", "UTF-8" }, + { "tg_TJ", "KOI8-T" }, + { "th_TH", "TIS-620" }, + { "the_NP", "UTF-8" }, + { "ti_ER", "UTF-8" }, + { "ti_ET", "UTF-8" }, + { "tig_ER", "UTF-8" }, + { "tk_TM", "UTF-8" }, + { "tl_PH", "ISO-8859-1" }, + { "tn_ZA", "UTF-8" }, + { "to_TO", "UTF-8" }, + { "tpi_PG", "UTF-8" }, + { "tr_CY", "ISO-8859-9" }, + { "tr_TR", "ISO-8859-9" }, + { "ts_ZA", "UTF-8" }, + { "tt_RU", "UTF-8" }, + { "tt_RU@iqtelif", "UTF-8" }, + { "ug_CN", "UTF-8" }, + { "uk_UA", "KOI8-U" }, + { "unm_US", "UTF-8" }, + { "ur_IN", "UTF-8" }, + { "ur_PK", "UTF-8" }, + { "uz_UZ", "ISO-8859-1" }, + { "uz_UZ@cyrillic", "UTF-8" }, + { "ve_ZA", "UTF-8" }, + { "vi_VN", "UTF-8" }, + { "wa_BE", "ISO-8859-1" }, + { "wa_BE@euro", "ISO-8859-15" }, + { "wae_CH", "UTF-8" }, + { "wal_ET", "UTF-8" }, + { "wo_SN", "UTF-8" }, + { "xh_ZA", "ISO-8859-1" }, + { "yi_US", "CP1255" }, + { "yo_NG", "UTF-8" }, + { "yue_HK", "UTF-8" }, + { "yuw_PG", "UTF-8" }, + { "zh_CN", "GB2312" }, + { "zh_HK", "BIG5" }, + { "zh_SG", "GB2312" }, + { "zh_TW", "BIG5" }, + { "zu_ZA", "ISO-8859-1" }, +}; diff --git a/winsup/cygwin/nlsfuncs.cc b/winsup/cygwin/nlsfuncs.cc index c2f9c08c9fe1..8257c23b2f78 100644 --- a/winsup/cygwin/nlsfuncs.cc +++ b/winsup/cygwin/nlsfuncs.cc @@ -21,6 +21,7 @@ details. */ #include "lc_msg.h" #include "lc_era.h" #include "lc_collelem.h" +#include "lc_def_codesets.h" =20 #define _LC(x) &lc_##x##_ptr,lc_##x##_end-lc_##x##_ptr =20 @@ -474,7 +475,8 @@ __set_lc_time_from_win (const char *name, locale. */ lc_era_t locale_key =3D { locale, NULL, NULL, NULL, NULL, NULL , NULL, NULL, NULL, NULL, NULL }; - lc_era_t *era =3D (lc_era_t *) bsearch ((void *) &locale_key, (void = *) lc_era, + lc_era_t *era =3D (lc_era_t *) bsearch ((void *) &locale_key, + (void *) lc_era, sizeof lc_era / sizeof *lc_era, sizeof *lc_era, locale_cmp); =20 @@ -1455,34 +1457,42 @@ strxfrm (char *__restrict s1, const char *__restric= t s2, size_t sn) /* Fetch default ANSI codepage from locale info and generate a setlocale compatible character set code. Called from newlib's setlocale(), if the charset isn't given explicitely in the POSIX compatible locale specifie= r. */ - -/* FIXME: Check all locales against their Linux counterpart again and - make sure the codeset conversion is correct. - FIXME: Perhaps, convert to locale names only. - FIXME: Perhaps, maintain a sorted list of Linux locales and their - default codesets. */ extern "C" void -__set_charset_from_locale (const char *locale, char *charset) +__set_charset_from_locale (const char *loc, char *charset) { - UINT cp; wchar_t win_locale[ENCODING_LEN + 1]; + char locale[ENCODING_LEN + 1]; + const char *modifier; + char *c; + UINT cp; =20 - int ret =3D __get_rfc5646_from_locale (locale, win_locale); - wchar_t wbuf[9]; + /* Cut out explicit codeset */ + stpcpy (locale, loc); + modifier =3D strchr (loc, '@'); + if ((c =3D strchr (locale, '.'))) + stpcpy (c, modifier ?: ""); + + default_codeset_t srch_dc =3D { locale, NULL }; + default_codeset_t *dc =3D (default_codeset_t *) + bsearch ((void *) &srch_dc, (void *) default_codeset, + sizeof default_codeset / sizeof *default_codeset, + sizeof *default_codeset, locale_cmp); + if (dc) + { + stpcpy (charset, dc->codeset); + return; + } =20 /* "C" locale, or invalid locale? */ - if (ret <=3D 0) + if (__get_rfc5646_from_locale (locale, win_locale) <=3D 0) cp =3D 20127; - else if (!GetLocaleInfoEx (win_locale, + else if (GetLocaleInfoEx (win_locale, LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER, (PWCHAR) &cp, sizeof cp)) cp =3D 0; - /* For simplicity, we still convert to LCID here. */ - LCID lcid =3D LocaleNameToLCID (win_locale, 0); /* Translate codepage and lcid to a charset closely aligned with the def= ault charsets defined in Glibc. */ const char *cs; - const char *modifier =3D strchr (locale, '@') ?: ""; switch (cp) { case 20127: @@ -1504,131 +1514,33 @@ __set_charset_from_locale (const char *locale, cha= r *charset) cs =3D "BIG5"; break; case 1250: - if (lcid =3D=3D 0x181a /* sr_BA (Serbian/Bosnia - and Herzegovina) */ - || lcid =3D=3D 0x241a /* sr_RS (Serbian/Serbia) */ - || lcid =3D=3D 0x2c1a /* sr_ME (Serbian/Montenegro)*/ - || lcid =3D=3D 0x0442 /* tk_TM (Turkmen/Turkmenistan) */ - || !wcscmp (win_locale, L"sr-Latn-XK")) /* (Serbian/Kosovo) */ - cs =3D "UTF-8"; - else if (lcid =3D=3D 0x041c) /* sq_AL (Albanian/Albania) */ - cs =3D "ISO-8859-1"; - else - cs =3D "ISO-8859-2"; + cs =3D "ISO-8859-2"; break; case 1251: - if (lcid =3D=3D 0x1c1a /* sr_BA (Serbian Language/Bosnia - and Herzegovina) */ - || lcid =3D=3D 0x281a /* sr_RS (Serbian Language/Serbia) */ - || lcid =3D=3D 0x301a /* sr_ME (Serbian Language/Montenegro)*/ - || lcid =3D=3D 0x0440 /* ky_KG (Kyrgyz/Kyrgyzstan) */ - || lcid =3D=3D 0x082c /* az_AZ@cyrillic (Azerbaijani/Azerbaijan) */ - || lcid =3D=3D 0x0843 /* uz_UZ (Uzbek/Uzbekistan) */ - /* tt_RU (Tatar/Russia), - IQTElif alphabet */ - || (lcid =3D=3D 0x0444 && has_modifier ("@iqtelif")) - || lcid =3D=3D 0x0450 /* mn_MN (Mongolian/Mongolia) */ - || !wcscmp (win_locale, L"sr-Cyrl-XK")) /* (Serbian/Kosovo) */ - cs =3D "UTF-8"; - else if (lcid =3D=3D 0x0423) /* be_BY (Belarusian/Belarus) */ - cs =3D has_modifier ("@latin") ? "UTF-8" : "CP1251"; - else if (lcid =3D=3D 0x0402 /* bg_BG (Bulgarian/Bulgaria) */ - || lcid =3D=3D 0x0423) /* be_BY (Belarusian/Belarus) */ - cs =3D "CP1251"; - else if (lcid =3D=3D 0x0422 /* uk_UA (Ukrainian/Ukraine) */ - || !wcscmp (win_locale, L"ru-UA")) /* (Russian/Ukraine) */ - cs =3D "KOI8-U"; - else if (lcid =3D=3D 0x0428) /* tg_TJ (Tajik/Tajikistan) */ - cs =3D "KOI8-T"; - else - cs =3D "ISO-8859-5"; + cs =3D "ISO-8859-5"; break; case 1252: - if (lcid =3D=3D 0x0452) /* cy_GB (Welsh/Great Britain) */ - cs =3D "ISO-8859-14"; - else if (lcid =3D=3D 0x4009 /* en_IN (English/India) */ - || lcid =3D=3D 0x0867 /* ff_SN (Fulah/Senegal) */ - || lcid =3D=3D 0x0464 /* fil_PH (Filipino/Philippines) */ - || lcid =3D=3D 0x0462 /* fy_NL (Frisian/Netherlands) */ - || lcid =3D=3D 0x0468 /* ha_NG (Hausa/Nigeria) */ - || lcid =3D=3D 0x0475 /* haw_US (Hawaiian/United States) */ - || lcid =3D=3D 0x0470 /* ig_NG (Igbo/Nigeria) */ - || lcid =3D=3D 0x085d /* iu_CA (Inuktitut/Canada) */ - || lcid =3D=3D 0x046c /* nso_ZA (Northern Sotho/South Africa) */ - || lcid =3D=3D 0x0487 /* rw_RW (Kinyarwanda/Rwanda) */ - || lcid =3D=3D 0x043b /* se_NO (Northern Saami/Norway) */ - || lcid =3D=3D 0x0832 /* tn_BW (Tswana/Botswana) */ - || lcid =3D=3D 0x0432 /* tn_ZA (Tswana/South Africa) */ - || lcid =3D=3D 0x0488 /* wo_SN (Wolof/Senegal) */ - || lcid =3D=3D 0x046a /* yo_NG (Yoruba/Nigeria) */ - || lcid =3D=3D 0x085f) /* ber_DZ (Tamazight/Algeria) */ - cs =3D "UTF-8"; - else if (lcid =3D=3D 0x042e) /* hsb_DE (Upper Sorbian/Germany) */ - cs =3D "ISO-8859-2"; - else if (lcid =3D=3D 0x0491 /* gd_GB (Scots Gaelic/Great Britain) */ - || (has_modifier ("@euro") - && GetLocaleInfoW (lcid, LOCALE_SINTLSYMBOL, wbuf, 9) - && !wcsncmp (wbuf, L"EUR", 3))) - cs =3D "ISO-8859-15"; - else - cs =3D "ISO-8859-1"; + cs =3D "ISO-8859-1"; break; case 1253: cs =3D "ISO-8859-7"; break; case 1254: - if (lcid =3D=3D 0x042c) /* az_AZ (Azeri/Azerbaijan) */ - cs =3D "UTF-8"; - else if (lcid =3D=3D 0x0443) /* uz_UZ (Uzbek/Uzbekistan) */ - cs =3D "ISO-8859-1"; - else - cs =3D "ISO-8859-9"; + cs =3D "ISO-8859-9"; break; case 1255: cs =3D "ISO-8859-8"; break; case 1256: - if (lcid =3D=3D 0x0429 /* fa_IR (Persian/Iran) */ - || lcid =3D=3D 0x0846 /* pa_PK (Punjabi/Pakistan) */ - || lcid =3D=3D 0x0859 /* sd_PK (Sindhi/Pakistan) */ - || lcid =3D=3D 0x0480 /* ug_CN (Uyghur/China) */ - || lcid =3D=3D 0x0420) /* ur_PK (Urdu/Pakistan) */ - cs =3D "UTF-8"; - else - cs =3D "ISO-8859-6"; + cs =3D "ISO-8859-6"; break; case 1257: - if (lcid =3D=3D 0x0425) /* et_EE (Estonian/Estonia) */ - cs =3D "ISO-8859-15"; - else - cs =3D "ISO-8859-13"; + cs =3D "ISO-8859-13"; break; case 1258: default: - if (lcid =3D=3D 0x3c09 /* en_HK (English/Hong Kong) */ - || lcid =3D=3D 0x200c /* fr_RE (French/R=C3=A9union) */ - || lcid =3D=3D 0x240c /* fr_CD (French/Congo) */ - || lcid =3D=3D 0x280c /* fr_SN (French/Senegal) */ - || lcid =3D=3D 0x2c0c /* fr_CM (French/Cameroon) */ - || lcid =3D=3D 0x300c /* fr_CI (French/Ivory Coast) */ - || lcid =3D=3D 0x340c /* fr_ML (French/Mali) */ - || lcid =3D=3D 0x380c /* fr_MA (French/Morocco) */ - || lcid =3D=3D 0x3c0c /* fr_HT (French/Haiti) */ - || lcid =3D=3D 0x0477 /* so_SO (Somali/Somali) */ - || lcid =3D=3D 0x0430) /* st_ZA (Sotho/South Africa) */ - cs =3D "ISO-8859-1"; - else if (lcid =3D=3D 0x818) /* ro_MD (Romanian/Moldovia) */ - cs =3D "ISO-8859-2"; - else if (lcid =3D=3D 0x043a) /* mt_MT (Maltese/Malta) */ - cs =3D "ISO-8859-3"; - else if (lcid =3D=3D 0x0481) /* mi_NZ (Maori/New Zealand) */ - cs =3D "ISO-8859-13"; - else if (lcid =3D=3D 0x0437) /* ka_GE (Georgian/Georgia) */ - cs =3D "GEORGIAN-PS"; - else if (lcid =3D=3D 0x043f) /* kk_KZ (Kazakh/Kazakhstan) */ - cs =3D "PT154"; - else - cs =3D "UTF-8"; + cs =3D "UTF-8"; + break; } stpcpy (charset, cs); }