From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2155) id 8B73C3857C56; Tue, 8 Sep 2020 08:36:22 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 8B73C3857C56 Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Corinna Vinschen To: cygwin-cvs@sourceware.org Subject: [newlib-cygwin] Cygwin: pty: move codepage evaluation to nlsfuncs.cc X-Act-Checkin: newlib-cygwin X-Git-Author: Corinna Vinschen X-Git-Refname: refs/heads/master X-Git-Oldrev: 8d0ff0768f6c948feb1d9383c494217f886e6b17 X-Git-Newrev: eaed594d736259c42affa7cf58f12492cbc5fb67 Message-Id: <20200908083622.8B73C3857C56@sourceware.org> Date: Tue, 8 Sep 2020 08:36:22 +0000 (GMT) X-BeenThere: cygwin-cvs@cygwin.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Cygwin core component git logs List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 08 Sep 2020 08:36:22 -0000 https://sourceware.org/git/gitweb.cgi?p=newlib-cygwin.git;h=eaed594d736259c42affa7cf58f12492cbc5fb67 commit eaed594d736259c42affa7cf58f12492cbc5fb67 Author: Corinna Vinschen Date: Tue Sep 8 10:36:04 2020 +0200 Cygwin: pty: move codepage evaluation to nlsfuncs.cc The new function __eval_codepage_from_internal_charset is a simplified version of the former code in fhandler_tty.cc. It probably needs some extension, but the gist is to use knowledge of internals to be as quick as possible. Signed-off-by: Corinna Vinschen Diff: --- winsup/cygwin/fhandler_tty.cc | 191 ++---------------------------------------- winsup/cygwin/nlsfuncs.cc | 48 +++++++++++ 2 files changed, 57 insertions(+), 182 deletions(-) diff --git a/winsup/cygwin/fhandler_tty.cc b/winsup/cygwin/fhandler_tty.cc index 8bf39c3e6..6de591d9b 100644 --- a/winsup/cygwin/fhandler_tty.cc +++ b/winsup/cygwin/fhandler_tty.cc @@ -1614,8 +1614,8 @@ fhandler_pty_master::write (const void *ptr, size_t len) if (to_be_read_from_pcon () && get_ttyp ()->h_pseudo_console) { size_t nlen; - char *buf = convert_mb_str - (CP_UTF8, &nlen, get_ttyp ()->term_code_page, (const char *) ptr, len); + char *buf = convert_mb_str (CP_UTF8, &nlen, get_ttyp ()->term_code_page, + (const char *) ptr, len); WaitForSingleObject (input_mutex, INFINITE); @@ -1782,183 +1782,13 @@ fhandler_pty_common::set_close_on_exec (bool val) close_on_exec (val); } -/* This table is borrowed from mintty: charset.c */ -static const struct { - UINT cp; - const char *name; -} -cs_names[] = { - { CP_UTF8, "UTF-8"}, - { CP_UTF8, "UTF8"}, - { 20127, "ASCII"}, - { 20127, "US-ASCII"}, - { 20127, "ANSI_X3.4-1968"}, - { 20866, "KOI8-R"}, - { 20866, "KOI8R"}, - { 20866, "KOI8"}, - { 21866, "KOI8-U"}, - { 21866, "KOI8U"}, - { 20932, "EUCJP"}, - { 20932, "EUC-JP"}, - { 874, "TIS620"}, - { 874, "TIS-620"}, - { 932, "SJIS"}, - { 936, "GBK"}, - { 936, "GB2312"}, - { 936, "EUCCN"}, - { 936, "EUC-CN"}, - { 949, "EUCKR"}, - { 949, "EUC-KR"}, - { 950, "BIG5"}, - { 0, "NULL"} -}; - -static void -get_locale_from_env (char *locale) -{ - const char *env = NULL; - char lang[ENCODING_LEN + 1] = {0, }, country[ENCODING_LEN + 1] = {0, }; - env = getenv ("LC_ALL"); - if (env == NULL || !*env) - env = getenv ("LC_CTYPE"); - if (env == NULL || !*env) - env = getenv ("LANG"); - if (env == NULL || !*env) - { - if (GetLocaleInfo (LOCALE_CUSTOM_UI_DEFAULT, - LOCALE_SISO639LANGNAME, - lang, sizeof (lang))) - GetLocaleInfo (LOCALE_CUSTOM_UI_DEFAULT, - LOCALE_SISO3166CTRYNAME, - country, sizeof (country)); - else if (GetLocaleInfo (LOCALE_CUSTOM_DEFAULT, - LOCALE_SISO639LANGNAME, - lang, sizeof (lang))) - GetLocaleInfo (LOCALE_CUSTOM_DEFAULT, - LOCALE_SISO3166CTRYNAME, - country, sizeof (country)); - else if (GetLocaleInfo (LOCALE_USER_DEFAULT, - LOCALE_SISO639LANGNAME, - lang, sizeof (lang))) - GetLocaleInfo (LOCALE_USER_DEFAULT, - LOCALE_SISO3166CTRYNAME, - country, sizeof (country)); - else if (GetLocaleInfo (LOCALE_SYSTEM_DEFAULT, - LOCALE_SISO639LANGNAME, - lang, sizeof (lang))) - GetLocaleInfo (LOCALE_SYSTEM_DEFAULT, - LOCALE_SISO3166CTRYNAME, - country, sizeof (country)); - if (strlen (lang) && strlen (country)) - __small_sprintf (lang + strlen (lang), "_%s.UTF-8", country); - else - strcpy (lang , "C.UTF-8"); - env = lang; - } - strcpy (locale, env); -} - -static void -get_langinfo (char *locale_out, char *charset_out) -{ - /* Get locale from environment */ - char new_locale[ENCODING_LEN + 1]; - get_locale_from_env (new_locale); - - __locale_t loc; - memset (&loc, 0, sizeof (loc)); - const char *locale = __loadlocale (&loc, LC_CTYPE, new_locale); - if (!locale) - locale = "C"; - - const char *charset; - struct lc_ctype_T *lc_ctype = (struct lc_ctype_T *) loc.lc_cat[LC_CTYPE].ptr; - if (!lc_ctype) - charset = "ASCII"; - else - charset = lc_ctype->codeset; - - /* The following code is borrowed from nl_langinfo() - in newlib/libc/locale/nl_langinfo.c */ - /* Convert charset to Linux compatible codeset string. */ - if (charset[0] == 'A'/*SCII*/) - charset = "ANSI_X3.4-1968"; - else if (charset[0] == 'E') - { - if (strcmp (charset, "EUCJP") == 0) - charset = "EUC-JP"; - else if (strcmp (charset, "EUCKR") == 0) - charset = "EUC-KR"; - else if (strcmp (charset, "EUCCN") == 0) - charset = "GB2312"; - } - else if (charset[0] == 'C'/*Pxxxx*/) - { - if (strcmp (charset + 2, "874") == 0) - charset = "TIS-620"; - else if (strcmp (charset + 2, "20866") == 0) - charset = "KOI8-R"; - else if (strcmp (charset + 2, "21866") == 0) - charset = "KOI8-U"; - else if (strcmp (charset + 2, "101") == 0) - charset = "GEORGIAN-PS"; - else if (strcmp (charset + 2, "102") == 0) - charset = "PT154"; - } - else if (charset[0] == 'S'/*JIS*/) - { - /* Cygwin uses MSFT's implementation of SJIS, which differs - in some codepoints from the real thing, especially - 0x5c: yen sign instead of backslash, - 0x7e: overline instead of tilde. - We can't use the real SJIS since otherwise Win32 - pathnames would become invalid. OTOH, if we return - "SJIS" here, then libiconv will do mb<->wc conversion - differently to our internal functions. Therefore we - return what we really implement, CP932. This is handled - fine by libiconv. */ - charset = "CP932"; - } - - /* Set results */ - strcpy (locale_out, new_locale); - strcpy (charset_out, charset); -} - void fhandler_pty_slave::setup_locale (void) { - if (get_ttyp ()->term_code_page != 0) - return; + extern UINT __eval_codepage_from_internal_charset (); - char locale[ENCODING_LEN + 1] = "C"; - char charset[ENCODING_LEN + 1] = "ASCII"; - get_langinfo (locale, charset); - - /* Set terminal code page from locale */ - /* This code is borrowed from mintty: charset.c */ - get_ttyp ()->term_code_page = 20127; /* Default ASCII */ - char charset_u[ENCODING_LEN + 1] = {0, }; - for (int i=0; charset[i] && iterm_code_page = 28590 + iso; - } - else if (sscanf (charset_u, "CP%u", &cp) == 1) - get_ttyp ()->term_code_page = cp; - else - for (int i=0; cs_names[i].cp; i++) - if (strcasecmp (charset_u, cs_names[i].name) == 0) - { - get_ttyp ()->term_code_page = cs_names[i].cp; - break; - } + if (!get_ttyp ()->term_code_page) + get_ttyp ()->term_code_page = __eval_codepage_from_internal_charset (); } void @@ -1977,9 +1807,6 @@ fhandler_pty_slave::fixup_after_exec () if (!close_on_exec ()) fixup_after_fork (NULL); /* No parent handle required. */ - /* Set locale */ - setup_locale (); - /* Hook Console API */ #define DO_HOOK(module, name) \ if (!name##_Orig) \ @@ -2205,8 +2032,8 @@ fhandler_pty_master::pty_master_fwd_thread () state = 0; size_t nlen; - char *buf = convert_mb_str - (get_ttyp ()->term_code_page, &nlen, CP_UTF8, ptr, wlen); + char *buf = convert_mb_str (get_ttyp ()->term_code_page, + &nlen, CP_UTF8, ptr, wlen); ptr = buf; wlen = rlen = nlen; @@ -2228,8 +2055,8 @@ fhandler_pty_master::pty_master_fwd_thread () continue; } size_t nlen; - char *buf = convert_mb_str - (get_ttyp ()->term_code_page, &nlen, GetConsoleOutputCP (), ptr, wlen); + char *buf = convert_mb_str (get_ttyp ()->term_code_page, &nlen, + GetConsoleOutputCP (), ptr, wlen); ptr = buf; wlen = rlen = nlen; diff --git a/winsup/cygwin/nlsfuncs.cc b/winsup/cygwin/nlsfuncs.cc index 668d7eb9e..a518d2be3 100644 --- a/winsup/cygwin/nlsfuncs.cc +++ b/winsup/cygwin/nlsfuncs.cc @@ -1448,6 +1448,54 @@ __set_charset_from_locale (const char *locale, char *charset) stpcpy (charset, cs); } +/* Called from fhandler_tty::setup_locale. Set a codepage which reflects the + internal charset setting. This is *not* necessarily the Windows + codepage connected to a locale by default, so we have to set this + up explicitely. */ +UINT +__eval_codepage_from_internal_charset () +{ + const char *charset = __locale_charset (__get_global_locale ()); + UINT codepage = CP_UTF8; /* Default UTF8 */ + + /* The internal charset names are well defined, so we can use shortcuts. */ + switch (charset[0]) + { + case 'B': /* BIG5 */ + codepage = 950; + break; + case 'C': /* CPxxx */ + codepage = strtoul (charset + 2, NULL, 10); + break; + case 'E': /* EUCxx */ + switch (charset[3]) + { + case 'J': /* EUCJP */ + codepage = 20932; + break; + case 'K': /* EUCKR */ + codepage = 949; + break; + case 'C': /* EUCCN */ + codepage = 936; + break; + } + break; + case 'G': /* GBK/GB2312 */ + codepage = 936; + break; + case 'I': /* ISO-8859-x */ + codepage = strtoul (charset + 9, NULL, 10) + 28590; + break; + case 'S': /* SJIS */ + codepage = 932; + break; + default: /* All set to UTF8 already */ + break; + } + return codepage; +} + /* This function is called from newlib's loadlocale if the locale identifier was invalid, one way or the other. It looks for the file