From d5dff1690d1a228579eef472441d67fb6ef20b5e Mon Sep 17 00:00:00 2001 From: Corinna Vinschen Date: Fri, 4 Sep 2020 14:39:58 +0200 Subject: [PATCH] Cygwin: fetch codepage for pseudo console at initial_setlocale time drop locale checking in fhandler_tty code. Create function __eval_codepage_from_internal_charset called from internal_setlocale to store term_code_page in cygheap, rather than in tty. Drop now unneeded setup_locale calls during fork and execve. Signed-off-by: Corinna Vinschen --- winsup/cygwin/cygheap.h | 1 + winsup/cygwin/fhandler.h | 1 - winsup/cygwin/fhandler_tty.cc | 195 ++-------------------------------- winsup/cygwin/nlsfuncs.cc | 54 +++++++++- winsup/cygwin/spawn.cc | 12 --- 5 files changed, 60 insertions(+), 203 deletions(-) diff --git a/winsup/cygwin/cygheap.h b/winsup/cygwin/cygheap.h index 8877cc358c39..2b84f4252071 100644 --- a/winsup/cygwin/cygheap.h +++ b/winsup/cygwin/cygheap.h @@ -341,6 +341,7 @@ struct cygheap_debug struct cygheap_locale { mbtowc_p mbtowc; + UINT term_code_page; }; struct user_heap_info diff --git a/winsup/cygwin/fhandler.h b/winsup/cygwin/fhandler.h index b4ba9428aa90..af619df5f9b1 100644 --- a/winsup/cygwin/fhandler.h +++ b/winsup/cygwin/fhandler.h @@ -2336,7 +2336,6 @@ class fhandler_pty_slave: public fhandler_pty_common void set_switch_to_pcon (void); void reset_switch_to_pcon (void); void mask_switch_to_pcon_in (bool mask); - void setup_locale (void); }; #define __ptsname(buf, unit) __small_sprintf ((buf), "/dev/pty%d", (unit)) diff --git a/winsup/cygwin/fhandler_tty.cc b/winsup/cygwin/fhandler_tty.cc index 8bf39c3e6cf2..f207a0b27892 100644 --- a/winsup/cygwin/fhandler_tty.cc +++ b/winsup/cygwin/fhandler_tty.cc @@ -1614,8 +1614,9 @@ fhandler_pty_master::write (const void *ptr, size_t len) if (to_be_read_from_pcon () && get_ttyp ()->h_pseudo_console) { size_t nlen; - char *buf = convert_mb_str - (CP_UTF8, &nlen, get_ttyp ()->term_code_page, (const char *) ptr, len); + char *buf = convert_mb_str (CP_UTF8, &nlen, + cygheap->locale.term_code_page, + (const char *) ptr, len); WaitForSingleObject (input_mutex, INFINITE); @@ -1782,185 +1783,6 @@ fhandler_pty_common::set_close_on_exec (bool val) close_on_exec (val); } -/* This table is borrowed from mintty: charset.c */ -static const struct { - UINT cp; - const char *name; -} -cs_names[] = { - { CP_UTF8, "UTF-8"}, - { CP_UTF8, "UTF8"}, - { 20127, "ASCII"}, - { 20127, "US-ASCII"}, - { 20127, "ANSI_X3.4-1968"}, - { 20866, "KOI8-R"}, - { 20866, "KOI8R"}, - { 20866, "KOI8"}, - { 21866, "KOI8-U"}, - { 21866, "KOI8U"}, - { 20932, "EUCJP"}, - { 20932, "EUC-JP"}, - { 874, "TIS620"}, - { 874, "TIS-620"}, - { 932, "SJIS"}, - { 936, "GBK"}, - { 936, "GB2312"}, - { 936, "EUCCN"}, - { 936, "EUC-CN"}, - { 949, "EUCKR"}, - { 949, "EUC-KR"}, - { 950, "BIG5"}, - { 0, "NULL"} -}; - -static void -get_locale_from_env (char *locale) -{ - const char *env = NULL; - char lang[ENCODING_LEN + 1] = {0, }, country[ENCODING_LEN + 1] = {0, }; - env = getenv ("LC_ALL"); - if (env == NULL || !*env) - env = getenv ("LC_CTYPE"); - if (env == NULL || !*env) - env = getenv ("LANG"); - if (env == NULL || !*env) - { - if (GetLocaleInfo (LOCALE_CUSTOM_UI_DEFAULT, - LOCALE_SISO639LANGNAME, - lang, sizeof (lang))) - GetLocaleInfo (LOCALE_CUSTOM_UI_DEFAULT, - LOCALE_SISO3166CTRYNAME, - country, sizeof (country)); - else if (GetLocaleInfo (LOCALE_CUSTOM_DEFAULT, - LOCALE_SISO639LANGNAME, - lang, sizeof (lang))) - GetLocaleInfo (LOCALE_CUSTOM_DEFAULT, - LOCALE_SISO3166CTRYNAME, - country, sizeof (country)); - else if (GetLocaleInfo (LOCALE_USER_DEFAULT, - LOCALE_SISO639LANGNAME, - lang, sizeof (lang))) - GetLocaleInfo (LOCALE_USER_DEFAULT, - LOCALE_SISO3166CTRYNAME, - country, sizeof (country)); - else if (GetLocaleInfo (LOCALE_SYSTEM_DEFAULT, - LOCALE_SISO639LANGNAME, - lang, sizeof (lang))) - GetLocaleInfo (LOCALE_SYSTEM_DEFAULT, - LOCALE_SISO3166CTRYNAME, - country, sizeof (country)); - if (strlen (lang) && strlen (country)) - __small_sprintf (lang + strlen (lang), "_%s.UTF-8", country); - else - strcpy (lang , "C.UTF-8"); - env = lang; - } - strcpy (locale, env); -} - -static void -get_langinfo (char *locale_out, char *charset_out) -{ - /* Get locale from environment */ - char new_locale[ENCODING_LEN + 1]; - get_locale_from_env (new_locale); - - __locale_t loc; - memset (&loc, 0, sizeof (loc)); - const char *locale = __loadlocale (&loc, LC_CTYPE, new_locale); - if (!locale) - locale = "C"; - - const char *charset; - struct lc_ctype_T *lc_ctype = (struct lc_ctype_T *) loc.lc_cat[LC_CTYPE].ptr; - if (!lc_ctype) - charset = "ASCII"; - else - charset = lc_ctype->codeset; - - /* The following code is borrowed from nl_langinfo() - in newlib/libc/locale/nl_langinfo.c */ - /* Convert charset to Linux compatible codeset string. */ - if (charset[0] == 'A'/*SCII*/) - charset = "ANSI_X3.4-1968"; - else if (charset[0] == 'E') - { - if (strcmp (charset, "EUCJP") == 0) - charset = "EUC-JP"; - else if (strcmp (charset, "EUCKR") == 0) - charset = "EUC-KR"; - else if (strcmp (charset, "EUCCN") == 0) - charset = "GB2312"; - } - else if (charset[0] == 'C'/*Pxxxx*/) - { - if (strcmp (charset + 2, "874") == 0) - charset = "TIS-620"; - else if (strcmp (charset + 2, "20866") == 0) - charset = "KOI8-R"; - else if (strcmp (charset + 2, "21866") == 0) - charset = "KOI8-U"; - else if (strcmp (charset + 2, "101") == 0) - charset = "GEORGIAN-PS"; - else if (strcmp (charset + 2, "102") == 0) - charset = "PT154"; - } - else if (charset[0] == 'S'/*JIS*/) - { - /* Cygwin uses MSFT's implementation of SJIS, which differs - in some codepoints from the real thing, especially - 0x5c: yen sign instead of backslash, - 0x7e: overline instead of tilde. - We can't use the real SJIS since otherwise Win32 - pathnames would become invalid. OTOH, if we return - "SJIS" here, then libiconv will do mb<->wc conversion - differently to our internal functions. Therefore we - return what we really implement, CP932. This is handled - fine by libiconv. */ - charset = "CP932"; - } - - /* Set results */ - strcpy (locale_out, new_locale); - strcpy (charset_out, charset); -} - -void -fhandler_pty_slave::setup_locale (void) -{ - if (get_ttyp ()->term_code_page != 0) - return; - - char locale[ENCODING_LEN + 1] = "C"; - char charset[ENCODING_LEN + 1] = "ASCII"; - get_langinfo (locale, charset); - - /* Set terminal code page from locale */ - /* This code is borrowed from mintty: charset.c */ - get_ttyp ()->term_code_page = 20127; /* Default ASCII */ - char charset_u[ENCODING_LEN + 1] = {0, }; - for (int i=0; charset[i] && iterm_code_page = 28590 + iso; - } - else if (sscanf (charset_u, "CP%u", &cp) == 1) - get_ttyp ()->term_code_page = cp; - else - for (int i=0; cs_names[i].cp; i++) - if (strcasecmp (charset_u, cs_names[i].name) == 0) - { - get_ttyp ()->term_code_page = cs_names[i].cp; - break; - } -} - void fhandler_pty_slave::fixup_after_fork (HANDLE parent) { @@ -1977,9 +1799,6 @@ fhandler_pty_slave::fixup_after_exec () if (!close_on_exec ()) fixup_after_fork (NULL); /* No parent handle required. */ - /* Set locale */ - setup_locale (); - /* Hook Console API */ #define DO_HOOK(module, name) \ if (!name##_Orig) \ @@ -2205,8 +2024,8 @@ fhandler_pty_master::pty_master_fwd_thread () state = 0; size_t nlen; - char *buf = convert_mb_str - (get_ttyp ()->term_code_page, &nlen, CP_UTF8, ptr, wlen); + char *buf = convert_mb_str (cygheap->locale.term_code_page, + &nlen, CP_UTF8, ptr, wlen); ptr = buf; wlen = rlen = nlen; @@ -2228,8 +2047,8 @@ fhandler_pty_master::pty_master_fwd_thread () continue; } size_t nlen; - char *buf = convert_mb_str - (get_ttyp ()->term_code_page, &nlen, GetConsoleOutputCP (), ptr, wlen); + char *buf = convert_mb_str (cygheap->locale.term_code_page, &nlen, + GetConsoleOutputCP (), ptr, wlen); ptr = buf; wlen = rlen = nlen; diff --git a/winsup/cygwin/nlsfuncs.cc b/winsup/cygwin/nlsfuncs.cc index 668d7eb9e778..10a3a0705142 100644 --- a/winsup/cygwin/nlsfuncs.cc +++ b/winsup/cygwin/nlsfuncs.cc @@ -1448,6 +1448,53 @@ __set_charset_from_locale (const char *locale, char *charset) stpcpy (charset, cs); } +/* Called from internal_setlocale. Set a codepage which reflects the + internal charset setting. This is *not* necessarily the Windows + codepage connected to a locale by default, so we have to set this + up explicitely. */ +static UINT +__eval_codepage_from_internal_charset (const char *charset) +{ + UINT codepage = CP_UTF8; /* Default UTF8 */ + + /* The internal charset names are well defined, so we can use shortcuts. */ + switch (charset[0]) + { + case 'B': /* BIG5 */ + codepage = 950; + break; + case 'C': /* CPxxx */ + codepage = strtoul (charset + 2, NULL, 10); + break; + case 'E': /* EUCxx */ + switch (charset[3]) + { + case 'J': /* EUCJP */ + codepage = 20932; + break; + case 'K': /* EUCKR */ + codepage = 949; + break; + case 'C': /* EUCCN */ + codepage = 936; + break; + } + break; + case 'G': /* GBK/GB2312 */ + codepage = 936; + break; + case 'I': /* ISO-8859-x */ + codepage = strtoul (charset + 9, NULL, 10); + break; + case 'S': /* SJIS */ + codepage = 932; + break; + default: /* All set to UTF8 already */ + break; + } + return codepage; +} + /* This function is called from newlib's loadlocale if the locale identifier was invalid, one way or the other. It looks for the file @@ -1535,8 +1582,11 @@ internal_setlocale () if (cygheap->locale.mbtowc == __get_global_locale ()->mbtowc) return; - debug_printf ("Global charset set to %s", - __locale_charset (__get_global_locale ())); + const char *charset = __locale_charset (__get_global_locale ()); + debug_printf ("Global charset set to %s", charset); + /* Store codepage to be utilized by pseudo console code. */ + cygheap->locale.term_code_page = + __eval_codepage_from_internal_charset (charset); /* Fetch PATH and CWD and convert to wchar_t in previous charset. */ path = getenv ("PATH"); if (path && *path) /* $PATH can be potentially unset. */ diff --git a/winsup/cygwin/spawn.cc b/winsup/cygwin/spawn.cc index 92d190d1a764..02c4207abae9 100644 --- a/winsup/cygwin/spawn.cc +++ b/winsup/cygwin/spawn.cc @@ -619,18 +619,6 @@ child_info_spawn::worker (const char *prog_arg, const char *const *argv, } } - if (!iscygwin ()) - { - cfd.rewind (); - while (cfd.next () >= 0) - if (cfd->get_major () == DEV_PTYS_MAJOR) - { - fhandler_pty_slave *ptys = - (fhandler_pty_slave *)(fhandler_base *) cfd; - ptys->setup_locale (); - } - } - /* Set up needed handles for stdio */ si.dwFlags = STARTF_USESTDHANDLES; si.hStdInput = handle ((in__stdin < 0 ? 0 : in__stdin), false); -- 2.26.2