public inbox for newlib-cvs@sourceware.org
help / color / mirror / Atom feed
From: Corinna Vinschen <corinna@sourceware.org>
To: cygwin-cvs@sourceware.org, newlib-cvs@sourceware.org
Subject: [newlib-cygwin/main] Cygwin: add support for GB18030 codeset
Date: Thu, 16 Mar 2023 17:25:20 +0000 (GMT)	[thread overview]
Message-ID: <20230316172520.A961B3858D28@sourceware.org> (raw)

https://sourceware.org/git/gitweb.cgi?p=newlib-cygwin.git;h=5da71b6059956a8f20a6be02e82867aa28aa3880

commit 5da71b6059956a8f20a6be02e82867aa28aa3880
Author:     Corinna Vinschen <corinna@vinschen.de>
AuthorDate: Thu Mar 16 18:25:09 2023 +0100
Commit:     Corinna Vinschen <corinna@vinschen.de>
CommitDate: Thu Mar 16 18:25:09 2023 +0100

    Cygwin: add support for GB18030 codeset
    
    Signed-off-by: Corinna Vinschen <corinna@vinschen.de>

Diff:
---
 newlib/libc/locale/locale.c    | 15 +++++++++++----
 newlib/libc/stdlib/local.h     |  2 ++
 winsup/cygwin/fhandler/proc.cc |  1 +
 winsup/cygwin/nlsfuncs.cc      |  4 ++--
 winsup/cygwin/release/3.5.0    |  2 ++
 winsup/cygwin/strfuncs.cc      | 16 +++++++++++++++-
 winsup/doc/new-features.xml    |  4 ++++
 7 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/newlib/libc/locale/locale.c b/newlib/libc/locale/locale.c
index e58ba3dbef8c..3772106e3ce7 100644
--- a/newlib/libc/locale/locale.c
+++ b/newlib/libc/locale/locale.c
@@ -65,8 +65,8 @@ build with multibyte support and support for all ISO and Windows Codepage.
 Otherwise all singlebyte charsets are simply mapped to ASCII.  Right now,
 only newlib for Cygwin is built with full charset support by default.
 Under Cygwin, this implementation additionally supports the charsets
-<<"GBK">>, <<"GB2312">>, <<"eucCN">>, <<"eucKR">>, and <<"Big5">>.  Cygwin
-does not support <<"JIS">>.
+<<"GB18030">>, <<"GBK">>, <<"GB2312">>, <<"eucCN">>, <<"eucKR">>, and
+<<"Big5">>.  Cygwin does not support <<"JIS">>.
 
 Cygwin additionally supports locales from the file
 /usr/share/locale/locale.alias.
@@ -657,7 +657,7 @@ restart:
 	}
 #ifdef __CYGWIN__
       /* Newlib does neither provide EUC-KR nor EUC-CN, and Cygwin's
-      	 implementation requires Windows support. */
+	 implementation requires Windows support. */
       else if (!strcasecmp (c, "KR"))
 	{
 	  strcpy (charset, "EUCKR");
@@ -817,12 +817,19 @@ restart:
 	 requires Windows support. */
       if (!strcasecmp (charset, "GBK")
 	  || !strcasecmp (charset, "GB2312"))
-      	{
+	{
 	  strcpy (charset, charset[2] == '2' ? "GB2312" : "GBK");
 	  mbc_max = 2;
 	  l_wctomb = __gbk_wctomb;
 	  l_mbtowc = __gbk_mbtowc;
 	}
+      else if (!strcasecmp (charset, "GB18030"))
+	{
+	  strcpy (charset, "GB18030");
+	  mbc_max = 4;
+	  l_wctomb = __gb18030_wctomb;
+	  l_mbtowc = __gb18030_mbtowc;
+	}
       else
 #endif /* __CYGWIN__ */
       /* GEORGIAN-PS and the alias without dash */
diff --git a/newlib/libc/stdlib/local.h b/newlib/libc/stdlib/local.h
index a96ed2cc4dae..26fd4f05dce9 100644
--- a/newlib/libc/stdlib/local.h
+++ b/newlib/libc/stdlib/local.h
@@ -24,6 +24,7 @@ wctomb_p __iso_wctomb (int val);
 wctomb_p __cp_wctomb (int val);
 #ifdef __CYGWIN__
 wctomb_f __gbk_wctomb;
+wctomb_f __gb18030_wctomb;
 wctomb_f __kr_wctomb;
 wctomb_f __big5_wctomb;
 #endif
@@ -45,6 +46,7 @@ mbtowc_p __iso_mbtowc (int val);
 mbtowc_p __cp_mbtowc (int val);
 #ifdef __CYGWIN__
 mbtowc_f __gbk_mbtowc;
+mbtowc_f __gb18030_mbtowc;
 mbtowc_f __kr_mbtowc;
 mbtowc_f __big5_mbtowc;
 #endif
diff --git a/winsup/cygwin/fhandler/proc.cc b/winsup/cygwin/fhandler/proc.cc
index bb32d64986d3..d5e3c60fcf1a 100644
--- a/winsup/cygwin/fhandler/proc.cc
+++ b/winsup/cygwin/fhandler/proc.cc
@@ -2323,6 +2323,7 @@ format_proc_codesets (void *, char *&destbuf)
 			 "EUC-CN\n"
 			 "EUC-JP\n"
 			 "EUC-KR\n"
+			 "GB18030\n"
 			 "GB2312\n"
 			 "GBK\n"
 			 "GEORGIAN-PS\n"
diff --git a/winsup/cygwin/nlsfuncs.cc b/winsup/cygwin/nlsfuncs.cc
index 95e291a283db..808bf6a87320 100644
--- a/winsup/cygwin/nlsfuncs.cc
+++ b/winsup/cygwin/nlsfuncs.cc
@@ -1578,8 +1578,8 @@ __eval_codepage_from_internal_charset ()
 	  break;
 	}
       break;
-    case 'G': /* GBK/GB2312 */
-      codepage = 936;
+    case 'G': /* GBK/GB2312/GB18030 */
+      codepage = (charset[2] == '1') ? 54936 : 936;
       break;
     case 'I': /* ISO-8859-x */
       codepage = strtoul (charset + 9, NULL, 10) + 28590;
diff --git a/winsup/cygwin/release/3.5.0 b/winsup/cygwin/release/3.5.0
index d6b24e5f9968..4123b8d32522 100644
--- a/winsup/cygwin/release/3.5.0
+++ b/winsup/cygwin/release/3.5.0
@@ -21,3 +21,5 @@ What's new:
   supported codesets and locales for all interested parties.  Locale(1)
   opens these files and uses the info for printing locale info like any
   other process could do.
+
+- Add support for GB18030 codeset.
diff --git a/winsup/cygwin/strfuncs.cc b/winsup/cygwin/strfuncs.cc
index 9324e155319f..57abf25649ec 100644
--- a/winsup/cygwin/strfuncs.cc
+++ b/winsup/cygwin/strfuncs.cc
@@ -245,7 +245,8 @@ mbsnrtowci(wint_t *dst, const char **src, size_t nms, size_t len, mbstate_t *ps)
    eucJP, the both most used Japanese charset encodings, this shouldn't
    be such a big problem. */
 
-/* GBK, eucKR, and Big5 conversions are not available so far in newlib. */
+/* GBK, GB18030, eucKR, and Big5 conversions are not available so far
+   in newlib. */
 
 static int
 __db_wctomb (struct _reent *r, char *s, wchar_t wchar, UINT cp)
@@ -325,6 +326,12 @@ __gbk_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *state)
   return __db_wctomb (r,s, wchar, 936);
 }
 
+extern "C" int
+__gb18030_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *state)
+{
+  return __db_wctomb (r,s, wchar, 54936);
+}
+
 extern "C" int
 __kr_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *state)
 {
@@ -482,6 +489,13 @@ __gbk_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
   return __db_mbtowc (r, pwc, s, n, 936, state);
 }
 
+extern "C" int
+__gb18030_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
+		  mbstate_t *state)
+{
+  return __db_mbtowc (r, pwc, s, n, 54936, state);
+}
+
 extern "C" int
 __kr_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
 	     mbstate_t *state)
diff --git a/winsup/doc/new-features.xml b/winsup/doc/new-features.xml
index 0858b01e5cfa..a958f16002ad 100644
--- a/winsup/doc/new-features.xml
+++ b/winsup/doc/new-features.xml
@@ -41,6 +41,10 @@ files and uses the info for printing locale info like any other process
 could do.
 </para></listitem>
 
+<listitem><para>
+Add support for GB18030 codeset.
+</para></listitem>
+
 </itemizedlist>
 
 </sect2>

                 reply	other threads:[~2023-03-16 17:25 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230316172520.A961B3858D28@sourceware.org \
    --to=corinna@sourceware.org \
    --cc=cygwin-cvs@sourceware.org \
    --cc=newlib-cvs@sourceware.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).