public inbox for newlib-cvs@sourceware.org
help / color / mirror / Atom feed
* [newlib-cygwin/main] Cygwin: support KOI8-T codeset
@ 2023-02-24 15:42 Corinna Vinschen
  0 siblings, 0 replies; only message in thread
From: Corinna Vinschen @ 2023-02-24 15:42 UTC (permalink / raw)
  To: cygwin-cvs, newlib-cvs

https://sourceware.org/git/gitweb.cgi?p=newlib-cygwin.git;h=89eb4bce152f93a9ace37bb7c67941a0e3bf19ae

commit 89eb4bce152f93a9ace37bb7c67941a0e3bf19ae
Author:     Corinna Vinschen <corinna@vinschen.de>
AuthorDate: Fri Feb 24 16:07:26 2023 +0100
Commit:     Corinna Vinschen <corinna@vinschen.de>
CommitDate: Fri Feb 24 16:40:58 2023 +0100

    Cygwin: support KOI8-T codeset
    
    Used on Linux as default codeset for Tajik. There's no matching
    Windows codepage, so fake it as CP103.
    
    Signed-off-by: Corinna Vinschen <corinna@vinschen.de>

Diff:
---
 newlib/libc/locale/locale.c      | 15 ++++++----
 newlib/libc/locale/nl_langinfo.c |  2 ++
 newlib/libc/stdlib/sb_charsets.c | 60 ++++++++++++++++++++++++++--------------
 winsup/cygwin/nlsfuncs.cc        |  2 ++
 winsup/utils/locale.cc           |  1 +
 5 files changed, 55 insertions(+), 25 deletions(-)

diff --git a/newlib/libc/locale/locale.c b/newlib/libc/locale/locale.c
index b0f6314ff49b..e58ba3dbef8c 100644
--- a/newlib/libc/locale/locale.c
+++ b/newlib/libc/locale/locale.c
@@ -50,10 +50,10 @@ but uses the UTF-8 charset.
 
 The following charsets are recognized:
 <<"UTF-8">>, <<"JIS">>, <<"EUCJP">>, <<"SJIS">>, <<"KOI8-R">>, <<"KOI8-U">>,
-<<"GEORGIAN-PS">>, <<"PT154">>, <<"TIS-620">>, <<"ISO-8859-x">> with
-1 <= x <= 16, or <<"CPxxx">> with xxx in [437, 720, 737, 775, 850, 852, 855,
-857, 858, 862, 866, 874, 932, 1125, 1250, 1251, 1252, 1253, 1254, 1255, 1256,
-1257, 1258].
+<<"KOI8-T">>, <<"GEORGIAN-PS">>, <<"PT154">>, <<"TIS-620">>, <<"ISO-8859-x">>
+with 1 <= x <= 16, or <<"CPxxx">> with xxx in [437, 720, 737, 775, 850, 852,
+855, 857, 858, 862, 866, 874, 932, 1125, 1250, 1251, 1252, 1253, 1254, 1255,
+1256, 1257, 1258].
 
 Charsets are case insensitive.  For instance, <<"EUCJP">> and <<"eucJP">>
 are equivalent.  Charset names with dashes can also be written without
@@ -769,7 +769,7 @@ restart:
     break;
     case 'K':
     case 'k':
-      /* KOI8-R, KOI8-U and the aliases without dash */
+      /* KOI8-R, KOI8-U, KOI8-T and the aliases without dash */
       if (strncasecmp (charset, "KOI8", 4))
 	FAIL;
       c = charset + 4;
@@ -785,6 +785,11 @@ restart:
 	  val = 21866;
 	  strcpy (charset, "CP21866");
 	}
+      else if (*c == 'T' || *c == 't')
+	{
+	  val = 103;
+	  strcpy (charset, "CP103");
+	}
       else
 	FAIL;
       mbc_max = 1;
diff --git a/newlib/libc/locale/nl_langinfo.c b/newlib/libc/locale/nl_langinfo.c
index eb984912fc94..aaa1aef8648b 100644
--- a/newlib/libc/locale/nl_langinfo.c
+++ b/newlib/libc/locale/nl_langinfo.c
@@ -233,6 +233,8 @@ do_codeset:
 		      ret = "GEORGIAN-PS";
 		    else if (strcmp (ret + 2, "102") == 0)
 		      ret = "PT154";
+		    else if (strcmp (ret + 2, "103") == 0)
+		      ret = "KOI8-T";
 		  }
 		else if (ret[0] == 'S'/*JIS*/)
 		  {
diff --git a/newlib/libc/stdlib/sb_charsets.c b/newlib/libc/stdlib/sb_charsets.c
index 961eb15736ff..4984a2fe35be 100644
--- a/newlib/libc/stdlib/sb_charsets.c
+++ b/newlib/libc/stdlib/sb_charsets.c
@@ -196,12 +196,12 @@ wchar_t __iso_8859_conv[14][0x60] = {
 #endif /* _MB_EXTENDED_CHARSETS_ISO */
 
 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
-/* Tables for the Windows default singlebyte ANSI codepage conversion. 
+/* Tables for the Windows default singlebyte ANSI codepage conversion.
    The first index into the table is a value computed from the codepage
    value (function __cp_index), the second index is the value of the
    incoming character - 0x80.
    Values < 0x80 don't have to be converted anyway. */
-wchar_t __cp_conv[26][0x80] = {
+wchar_t __cp_conv[27][0x80] = {
   /* CP437 */
   { 0xc7, 0xfc, 0xe9, 0xe2, 0xe4, 0xe0, 0xe5, 0xe7,
     0xea, 0xeb, 0xe8, 0xef, 0xee, 0xec, 0xc4, 0xc5,
@@ -619,37 +619,54 @@ wchar_t __cp_conv[26][0x80] = {
   { 0x80, 0x81, 0x201a, 0x192, 0x201e, 0x2026, 0x2020, 0x2021,
     0x2c6, 0x2030, 0x160, 0x2039, 0x152, 0x8d, 0x8e, 0x8f,
     0x90, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
-    0x2dc, 0x2122, 0x161, 0x203a, 0x153, 0x9d, 0x9e, 0x178, 
-    0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 
-    0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 
-    0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 
-    0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 
+    0x2dc, 0x2122, 0x161, 0x203a, 0x153, 0x9d, 0x9e, 0x178,
+    0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
+    0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
+    0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
+    0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
     0x10d0, 0x10d1, 0x10d2, 0x10d3, 0x10d4, 0x10d5, 0x10d6, 0x10f1,
     0x10d7, 0x10d8, 0x10d9, 0x10da, 0x10db, 0x10dc, 0x10f2, 0x10dd,
     0x10de, 0x10df, 0x10e0, 0x10e1, 0x10e2, 0x10f3, 0x10e3, 0x10e4,
     0x10e5, 0x10e6, 0x10e7, 0x10e8, 0x10e9, 0x10ea, 0x10eb, 0x10ec,
     0x10ed, 0x10ee, 0x10f4, 0x10ef, 0x10f0, 0x10f5, 0xe6, 0xe7,
-    0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 
-    0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 
+    0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
+    0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
     0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff },
   /* CP102 (PT154) Cyrillic-Asian charset, used as the default charset in
      the kk_KZ locale (Kazakh, Kazakhstan). */
   { 0x496, 0x492, 0x4ee, 0x493, 0x201e, 0x2026, 0x4b6, 0x4ae,
-    0x4b2, 0x4af, 0x4a0, 0x4e2, 0x4a2, 0x49a, 0x4ba, 0x4b8, 
+    0x4b2, 0x4af, 0x4a0, 0x4e2, 0x4a2, 0x49a, 0x4ba, 0x4b8,
     0x497, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
-    0x4b3, 0x4b7, 0x4a1, 0x4e3, 0x4a3, 0x49b, 0x4bb, 0x4b9, 
+    0x4b3, 0x4b7, 0x4a1, 0x4e3, 0x4a3, 0x49b, 0x4bb, 0x4b9,
     0xa0, 0x40e, 0x45e, 0x408, 0x4e8, 0x498, 0x4b0, 0xa7,
     0x401, 0xa9, 0x4d8, 0xab, 0xac, 0x4ef, 0xae, 0x49c,
     0xb0, 0x4b1, 0x406, 0x456, 0x499, 0x4e9, 0xb6, 0xb7,
-    0x451, 0x2116, 0x4d9, 0xbb, 0x458, 0x4aa, 0x4ab, 0x49d, 
-    0x410, 0x411, 0x412, 0x413, 0x414, 0x415, 0x416, 0x417, 
-    0x418, 0x419, 0x41a, 0x41b, 0x41c, 0x41d, 0x41e, 0x41f, 
-    0x420, 0x421, 0x422, 0x423, 0x424, 0x425, 0x426, 0x427, 
-    0x428, 0x429, 0x42a, 0x42b, 0x42c, 0x42d, 0x42e, 0x42f, 
-    0x430, 0x431, 0x432, 0x433, 0x434, 0x435, 0x436, 0x437, 
-    0x438, 0x439, 0x43a, 0x43b, 0x43c, 0x43d, 0x43e, 0x43f, 
-    0x440, 0x441, 0x442, 0x443, 0x444, 0x445, 0x446, 0x447, 
-    0x448, 0x449, 0x44a, 0x44b, 0x44c, 0x44d, 0x44e, 0x44f }
+    0x451, 0x2116, 0x4d9, 0xbb, 0x458, 0x4aa, 0x4ab, 0x49d,
+    0x410, 0x411, 0x412, 0x413, 0x414, 0x415, 0x416, 0x417,
+    0x418, 0x419, 0x41a, 0x41b, 0x41c, 0x41d, 0x41e, 0x41f,
+    0x420, 0x421, 0x422, 0x423, 0x424, 0x425, 0x426, 0x427,
+    0x428, 0x429, 0x42a, 0x42b, 0x42c, 0x42d, 0x42e, 0x42f,
+    0x430, 0x431, 0x432, 0x433, 0x434, 0x435, 0x436, 0x437,
+    0x438, 0x439, 0x43a, 0x43b, 0x43c, 0x43d, 0x43e, 0x43f,
+    0x440, 0x441, 0x442, 0x443, 0x444, 0x445, 0x446, 0x447,
+    0x448, 0x449, 0x44a, 0x44b, 0x44c, 0x44d, 0x44e, 0x44f },
+  /* CP103 (KOI8-T) */
+  { 0x049b, 0x0493, 0x201a, 0x0492, 0x201e, 0x2026, 0x2020, 0x2021,
+    0x88, 0x2030, 0x04b3, 0x2039, 0x04b2, 0x04b7, 0x04b6, 0x8f,
+    0x049a, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
+    0x98, 0x2122, 0x9a, 0x203a, 0x9c, 0x9d, 0x9e, 0x9f,
+    0xa0, 0x04ef, 0x04ee, 0x0451, 0xa4, 0x04e3, 0xa6, 0xa7,
+    0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
+    0xb0, 0xb1, 0xb2, 0x401, 0xb4, 0x04e2, 0xb6, 0xb7,
+    0xb8, 0x2116, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xa9,
+    0x44e, 0x430, 0x431, 0x446, 0x434, 0x435, 0x444, 0x433,
+    0x445, 0x438, 0x439, 0x43a, 0x43b, 0x43c, 0x43d, 0x43e,
+    0x43f, 0x44f, 0x440, 0x441, 0x442, 0x443, 0x436, 0x432,
+    0x44c, 0x44b, 0x437, 0x448, 0x44d, 0x449, 0x447, 0x44a,
+    0x42e, 0x410, 0x411, 0x426, 0x414, 0x415, 0x424, 0x413,
+    0x425, 0x418, 0x419, 0x41a, 0x41b, 0x41c, 0x41d, 0x41e,
+    0x41f, 0x42f, 0x420, 0x421, 0x422, 0x423, 0x416, 0x412,
+    0x42c, 0x42b, 0x417, 0x428, 0x42d, 0x429, 0x427, 0x42a },
 };
 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
 
@@ -776,6 +793,9 @@ __cp_val_index (int val)
     case 102:
       cp_idx = 25;
       break;
+    case 103:
+      cp_idx = 26;
+      break;
     default:
       cp_idx = -1;
       break;
diff --git a/winsup/cygwin/nlsfuncs.cc b/winsup/cygwin/nlsfuncs.cc
index 543f20e4437b..0d2764b05847 100644
--- a/winsup/cygwin/nlsfuncs.cc
+++ b/winsup/cygwin/nlsfuncs.cc
@@ -1510,6 +1510,8 @@ __set_charset_from_locale (const char *locale, char *charset)
 	cs = "CP1251";
       else if (lcid == 0x0422)		/* uk_UA (Ukrainian/Ukraine) */
 	cs = "KOI8-U";
+      else if (lcid == 0x0428)		/* tg_TJ (Tajik/Tajikistan) */
+	cs = "KOI8-T";
       else
 	cs = "ISO-8859-5";
       break;
diff --git a/winsup/utils/locale.cc b/winsup/utils/locale.cc
index 1d90550f343d..193e91e3fafc 100644
--- a/winsup/utils/locale.cc
+++ b/winsup/utils/locale.cc
@@ -486,6 +486,7 @@ print_charmaps ()
     "ISO-8859-8",
     "ISO-8859-9",
     "KOI8-R",
+    "KOI8-T",
     "KOI8-U",
     "PT154",
     "SJIS",

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-02-24 15:42 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-02-24 15:42 [newlib-cygwin/main] Cygwin: support KOI8-T codeset Corinna Vinschen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).