From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2155) id 70CB9385417F; Fri, 24 Feb 2023 15:42:05 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 70CB9385417F DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=sourceware.org; s=default; t=1677253325; bh=97v0Sn0LEChCmkuQn5PHZU4iwyxmGjr5np0fM8eT5zg=; h=From:To:Subject:Date:From; b=pEyZ70aj+oiDba1Ln3bARayEgYp2KaUoFpU9BuEvOAB1o4vrca8Lh8tYGBatssjaM 2qZkhpDkjBLdG878OpVW29vYAonXaLp+39/3V7h1LJe1+9WNx6ymNcDalLSUa5JTU+ YjlH14xNeQflSSTEwgE5pIcc6U9AqHxaKikh8XEQ= Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable From: Corinna Vinschen To: cygwin-cvs@sourceware.org, newlib-cvs@sourceware.org Subject: [newlib-cygwin/main] Cygwin: support KOI8-T codeset X-Act-Checkin: newlib-cygwin X-Git-Author: Corinna Vinschen X-Git-Refname: refs/heads/main X-Git-Oldrev: ac405ab9bc07789d5576fd823cbaa7eef472b11b X-Git-Newrev: 89eb4bce152f93a9ace37bb7c67941a0e3bf19ae Message-Id: <20230224154205.70CB9385417F@sourceware.org> Date: Fri, 24 Feb 2023 15:42:05 +0000 (GMT) List-Id: https://sourceware.org/git/gitweb.cgi?p=3Dnewlib-cygwin.git;h=3D89eb4bce152= f93a9ace37bb7c67941a0e3bf19ae commit 89eb4bce152f93a9ace37bb7c67941a0e3bf19ae Author: Corinna Vinschen AuthorDate: Fri Feb 24 16:07:26 2023 +0100 Commit: Corinna Vinschen CommitDate: Fri Feb 24 16:40:58 2023 +0100 Cygwin: support KOI8-T codeset =20 Used on Linux as default codeset for Tajik. There's no matching Windows codepage, so fake it as CP103. =20 Signed-off-by: Corinna Vinschen Diff: --- newlib/libc/locale/locale.c | 15 ++++++---- newlib/libc/locale/nl_langinfo.c | 2 ++ newlib/libc/stdlib/sb_charsets.c | 60 ++++++++++++++++++++++++++----------= ---- winsup/cygwin/nlsfuncs.cc | 2 ++ winsup/utils/locale.cc | 1 + 5 files changed, 55 insertions(+), 25 deletions(-) diff --git a/newlib/libc/locale/locale.c b/newlib/libc/locale/locale.c index b0f6314ff49b..e58ba3dbef8c 100644 --- a/newlib/libc/locale/locale.c +++ b/newlib/libc/locale/locale.c @@ -50,10 +50,10 @@ but uses the UTF-8 charset. =20 The following charsets are recognized: <<"UTF-8">>, <<"JIS">>, <<"EUCJP">>, <<"SJIS">>, <<"KOI8-R">>, <<"KOI8-U">= >, -<<"GEORGIAN-PS">>, <<"PT154">>, <<"TIS-620">>, <<"ISO-8859-x">> with -1 <=3D x <=3D 16, or <<"CPxxx">> with xxx in [437, 720, 737, 775, 850, 852= , 855, -857, 858, 862, 866, 874, 932, 1125, 1250, 1251, 1252, 1253, 1254, 1255, 12= 56, -1257, 1258]. +<<"KOI8-T">>, <<"GEORGIAN-PS">>, <<"PT154">>, <<"TIS-620">>, <<"ISO-8859-x= ">> +with 1 <=3D x <=3D 16, or <<"CPxxx">> with xxx in [437, 720, 737, 775, 850= , 852, +855, 857, 858, 862, 866, 874, 932, 1125, 1250, 1251, 1252, 1253, 1254, 125= 5, +1256, 1257, 1258]. =20 Charsets are case insensitive. For instance, <<"EUCJP">> and <<"eucJP">> are equivalent. Charset names with dashes can also be written without @@ -769,7 +769,7 @@ restart: break; case 'K': case 'k': - /* KOI8-R, KOI8-U and the aliases without dash */ + /* KOI8-R, KOI8-U, KOI8-T and the aliases without dash */ if (strncasecmp (charset, "KOI8", 4)) FAIL; c =3D charset + 4; @@ -785,6 +785,11 @@ restart: val =3D 21866; strcpy (charset, "CP21866"); } + else if (*c =3D=3D 'T' || *c =3D=3D 't') + { + val =3D 103; + strcpy (charset, "CP103"); + } else FAIL; mbc_max =3D 1; diff --git a/newlib/libc/locale/nl_langinfo.c b/newlib/libc/locale/nl_langi= nfo.c index eb984912fc94..aaa1aef8648b 100644 --- a/newlib/libc/locale/nl_langinfo.c +++ b/newlib/libc/locale/nl_langinfo.c @@ -233,6 +233,8 @@ do_codeset: ret =3D "GEORGIAN-PS"; else if (strcmp (ret + 2, "102") =3D=3D 0) ret =3D "PT154"; + else if (strcmp (ret + 2, "103") =3D=3D 0) + ret =3D "KOI8-T"; } else if (ret[0] =3D=3D 'S'/*JIS*/) { diff --git a/newlib/libc/stdlib/sb_charsets.c b/newlib/libc/stdlib/sb_chars= ets.c index 961eb15736ff..4984a2fe35be 100644 --- a/newlib/libc/stdlib/sb_charsets.c +++ b/newlib/libc/stdlib/sb_charsets.c @@ -196,12 +196,12 @@ wchar_t __iso_8859_conv[14][0x60] =3D { #endif /* _MB_EXTENDED_CHARSETS_ISO */ =20 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS -/* Tables for the Windows default singlebyte ANSI codepage conversion.=20 +/* Tables for the Windows default singlebyte ANSI codepage conversion. The first index into the table is a value computed from the codepage value (function __cp_index), the second index is the value of the incoming character - 0x80. Values < 0x80 don't have to be converted anyway. */ -wchar_t __cp_conv[26][0x80] =3D { +wchar_t __cp_conv[27][0x80] =3D { /* CP437 */ { 0xc7, 0xfc, 0xe9, 0xe2, 0xe4, 0xe0, 0xe5, 0xe7, 0xea, 0xeb, 0xe8, 0xef, 0xee, 0xec, 0xc4, 0xc5, @@ -619,37 +619,54 @@ wchar_t __cp_conv[26][0x80] =3D { { 0x80, 0x81, 0x201a, 0x192, 0x201e, 0x2026, 0x2020, 0x2021, 0x2c6, 0x2030, 0x160, 0x2039, 0x152, 0x8d, 0x8e, 0x8f, 0x90, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, - 0x2dc, 0x2122, 0x161, 0x203a, 0x153, 0x9d, 0x9e, 0x178,=20 - 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,=20 - 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,=20 - 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,=20 - 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,=20 + 0x2dc, 0x2122, 0x161, 0x203a, 0x153, 0x9d, 0x9e, 0x178, + 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0x10d0, 0x10d1, 0x10d2, 0x10d3, 0x10d4, 0x10d5, 0x10d6, 0x10f1, 0x10d7, 0x10d8, 0x10d9, 0x10da, 0x10db, 0x10dc, 0x10f2, 0x10dd, 0x10de, 0x10df, 0x10e0, 0x10e1, 0x10e2, 0x10f3, 0x10e3, 0x10e4, 0x10e5, 0x10e6, 0x10e7, 0x10e8, 0x10e9, 0x10ea, 0x10eb, 0x10ec, 0x10ed, 0x10ee, 0x10f4, 0x10ef, 0x10f0, 0x10f5, 0xe6, 0xe7, - 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,=20 - 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,=20 + 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, + 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }, /* CP102 (PT154) Cyrillic-Asian charset, used as the default charset in the kk_KZ locale (Kazakh, Kazakhstan). */ { 0x496, 0x492, 0x4ee, 0x493, 0x201e, 0x2026, 0x4b6, 0x4ae, - 0x4b2, 0x4af, 0x4a0, 0x4e2, 0x4a2, 0x49a, 0x4ba, 0x4b8,=20 + 0x4b2, 0x4af, 0x4a0, 0x4e2, 0x4a2, 0x49a, 0x4ba, 0x4b8, 0x497, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, - 0x4b3, 0x4b7, 0x4a1, 0x4e3, 0x4a3, 0x49b, 0x4bb, 0x4b9,=20 + 0x4b3, 0x4b7, 0x4a1, 0x4e3, 0x4a3, 0x49b, 0x4bb, 0x4b9, 0xa0, 0x40e, 0x45e, 0x408, 0x4e8, 0x498, 0x4b0, 0xa7, 0x401, 0xa9, 0x4d8, 0xab, 0xac, 0x4ef, 0xae, 0x49c, 0xb0, 0x4b1, 0x406, 0x456, 0x499, 0x4e9, 0xb6, 0xb7, - 0x451, 0x2116, 0x4d9, 0xbb, 0x458, 0x4aa, 0x4ab, 0x49d,=20 - 0x410, 0x411, 0x412, 0x413, 0x414, 0x415, 0x416, 0x417,=20 - 0x418, 0x419, 0x41a, 0x41b, 0x41c, 0x41d, 0x41e, 0x41f,=20 - 0x420, 0x421, 0x422, 0x423, 0x424, 0x425, 0x426, 0x427,=20 - 0x428, 0x429, 0x42a, 0x42b, 0x42c, 0x42d, 0x42e, 0x42f,=20 - 0x430, 0x431, 0x432, 0x433, 0x434, 0x435, 0x436, 0x437,=20 - 0x438, 0x439, 0x43a, 0x43b, 0x43c, 0x43d, 0x43e, 0x43f,=20 - 0x440, 0x441, 0x442, 0x443, 0x444, 0x445, 0x446, 0x447,=20 - 0x448, 0x449, 0x44a, 0x44b, 0x44c, 0x44d, 0x44e, 0x44f } + 0x451, 0x2116, 0x4d9, 0xbb, 0x458, 0x4aa, 0x4ab, 0x49d, + 0x410, 0x411, 0x412, 0x413, 0x414, 0x415, 0x416, 0x417, + 0x418, 0x419, 0x41a, 0x41b, 0x41c, 0x41d, 0x41e, 0x41f, + 0x420, 0x421, 0x422, 0x423, 0x424, 0x425, 0x426, 0x427, + 0x428, 0x429, 0x42a, 0x42b, 0x42c, 0x42d, 0x42e, 0x42f, + 0x430, 0x431, 0x432, 0x433, 0x434, 0x435, 0x436, 0x437, + 0x438, 0x439, 0x43a, 0x43b, 0x43c, 0x43d, 0x43e, 0x43f, + 0x440, 0x441, 0x442, 0x443, 0x444, 0x445, 0x446, 0x447, + 0x448, 0x449, 0x44a, 0x44b, 0x44c, 0x44d, 0x44e, 0x44f }, + /* CP103 (KOI8-T) */ + { 0x049b, 0x0493, 0x201a, 0x0492, 0x201e, 0x2026, 0x2020, 0x2021, + 0x88, 0x2030, 0x04b3, 0x2039, 0x04b2, 0x04b7, 0x04b6, 0x8f, + 0x049a, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, + 0x98, 0x2122, 0x9a, 0x203a, 0x9c, 0x9d, 0x9e, 0x9f, + 0xa0, 0x04ef, 0x04ee, 0x0451, 0xa4, 0x04e3, 0xa6, 0xa7, + 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0x401, 0xb4, 0x04e2, 0xb6, 0xb7, + 0xb8, 0x2116, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xa9, + 0x44e, 0x430, 0x431, 0x446, 0x434, 0x435, 0x444, 0x433, + 0x445, 0x438, 0x439, 0x43a, 0x43b, 0x43c, 0x43d, 0x43e, + 0x43f, 0x44f, 0x440, 0x441, 0x442, 0x443, 0x436, 0x432, + 0x44c, 0x44b, 0x437, 0x448, 0x44d, 0x449, 0x447, 0x44a, + 0x42e, 0x410, 0x411, 0x426, 0x414, 0x415, 0x424, 0x413, + 0x425, 0x418, 0x419, 0x41a, 0x41b, 0x41c, 0x41d, 0x41e, + 0x41f, 0x42f, 0x420, 0x421, 0x422, 0x423, 0x416, 0x412, + 0x42c, 0x42b, 0x417, 0x428, 0x42d, 0x429, 0x427, 0x42a }, }; #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */ =20 @@ -776,6 +793,9 @@ __cp_val_index (int val) case 102: cp_idx =3D 25; break; + case 103: + cp_idx =3D 26; + break; default: cp_idx =3D -1; break; diff --git a/winsup/cygwin/nlsfuncs.cc b/winsup/cygwin/nlsfuncs.cc index 543f20e4437b..0d2764b05847 100644 --- a/winsup/cygwin/nlsfuncs.cc +++ b/winsup/cygwin/nlsfuncs.cc @@ -1510,6 +1510,8 @@ __set_charset_from_locale (const char *locale, char *= charset) cs =3D "CP1251"; else if (lcid =3D=3D 0x0422) /* uk_UA (Ukrainian/Ukraine) */ cs =3D "KOI8-U"; + else if (lcid =3D=3D 0x0428) /* tg_TJ (Tajik/Tajikistan) */ + cs =3D "KOI8-T"; else cs =3D "ISO-8859-5"; break; diff --git a/winsup/utils/locale.cc b/winsup/utils/locale.cc index 1d90550f343d..193e91e3fafc 100644 --- a/winsup/utils/locale.cc +++ b/winsup/utils/locale.cc @@ -486,6 +486,7 @@ print_charmaps () "ISO-8859-8", "ISO-8859-9", "KOI8-R", + "KOI8-T", "KOI8-U", "PT154", "SJIS",