public inbox for cygwin-patches@cygwin.com
 help / color / mirror / Atom feed
* [PATCH v2 0/2] Cygwin: pty: Changes regarding charset conversion.
@ 2020-09-09 13:40 Takashi Yano
  2020-09-09 13:40 ` [PATCH v2 1/2] Cygwin: pty: Revise convert_mb_str() function Takashi Yano
  2020-09-09 13:40 ` [PATCH v2 2/2] Cygwin: pty: Fix input charset for non-cygwin apps with disable_pcon Takashi Yano
  0 siblings, 2 replies; 4+ messages in thread
From: Takashi Yano @ 2020-09-09 13:40 UTC (permalink / raw)
  To: cygwin-patches

Takashi Yano (2):
  Cygwin: pty: Revise convert_mb_str() function.
  Cygwin: pty: Fix input charset for non-cygwin apps with disable_pcon.

 winsup/cygwin/fhandler_tty.cc | 130 +++++++++++++++++++++++-----------
 1 file changed, 88 insertions(+), 42 deletions(-)

-- 
2.28.0


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH v2 1/2] Cygwin: pty: Revise convert_mb_str() function.
  2020-09-09 13:40 [PATCH v2 0/2] Cygwin: pty: Changes regarding charset conversion Takashi Yano
@ 2020-09-09 13:40 ` Takashi Yano
  2020-09-09 14:12   ` Corinna Vinschen
  2020-09-09 13:40 ` [PATCH v2 2/2] Cygwin: pty: Fix input charset for non-cygwin apps with disable_pcon Takashi Yano
  1 sibling, 1 reply; 4+ messages in thread
From: Takashi Yano @ 2020-09-09 13:40 UTC (permalink / raw)
  To: cygwin-patches

- Use tmp_pathbuf instead of HeapAlloc()/HeapFree().
- Remove mb_str_free() function.
- Consider the case where the multibyte string stops in the middle
  of a multibyte char.
---
 winsup/cygwin/fhandler_tty.cc | 118 ++++++++++++++++++++++------------
 1 file changed, 77 insertions(+), 41 deletions(-)

diff --git a/winsup/cygwin/fhandler_tty.cc b/winsup/cygwin/fhandler_tty.cc
index 6de591d9b..c4b7fc61d 100644
--- a/winsup/cygwin/fhandler_tty.cc
+++ b/winsup/cygwin/fhandler_tty.cc
@@ -26,6 +26,7 @@ details. */
 #include <asm/socket.h>
 #include "cygwait.h"
 #include "registry.h"
+#include "tls_pbuf.h"
 
 #ifndef PROC_THREAD_ATTRIBUTE_PSEUDOCONSOLE
 #define PROC_THREAD_ATTRIBUTE_PSEUDOCONSOLE 0x00020016
@@ -116,40 +117,72 @@ CreateProcessW_Hooked
   return CreateProcessW_Orig (n, c, pa, ta, inh, f, e, d, si, pi);
 }
 
-static char *
-convert_mb_str (UINT cp_to, size_t *len_to,
-		UINT cp_from, const char *ptr_from, size_t len_from)
+static void
+convert_mb_str (UINT cp_to, char *ptr_to, size_t *len_to,
+		UINT cp_from, const char *ptr_from, size_t len_from,
+		mbstate_t *stat)
 {
-  char *buf;
   size_t nlen;
+  tmp_pathbuf tp;
   if (cp_to != cp_from)
     {
-      size_t wlen =
-	MultiByteToWideChar (cp_from, 0, ptr_from, len_from, NULL, 0);
-      wchar_t *wbuf = (wchar_t *)
-	HeapAlloc (GetProcessHeap (), 0, wlen * sizeof (wchar_t));
-      wlen =
-	MultiByteToWideChar (cp_from, 0, ptr_from, len_from, wbuf, wlen);
-      nlen = WideCharToMultiByte (cp_to, 0, wbuf, wlen, NULL, 0, NULL, NULL);
-      buf = (char *) HeapAlloc (GetProcessHeap (), 0, nlen);
-      nlen = WideCharToMultiByte (cp_to, 0, wbuf, wlen, buf, nlen, NULL, NULL);
-      HeapFree (GetProcessHeap (), 0, wbuf);
+      wchar_t *wbuf = tp.w_get ();
+      int wlen = 0;
+      if (cp_from == 65000) /* UTF-7 */
+	/* MB_ERR_INVALID_CHARS does not work properly for UTF-7.
+	   Therefore, just convert string without checking */
+	wlen = MultiByteToWideChar (cp_from, 0, ptr_from, len_from,
+				    wbuf, NT_MAX_PATH);
+      else
+	{
+	  char *tmpbuf = tp.c_get ();
+	  memcpy (tmpbuf, stat->__value.__wchb, stat->__count);
+	  if (stat->__count + len_from > NT_MAX_PATH)
+	    len_from = NT_MAX_PATH - stat->__count;
+	  memcpy (tmpbuf + stat->__count, ptr_from, len_from);
+	  int total_len = stat->__count + len_from;
+	  stat->__count = 0;
+	  int mblen = 0;
+	  for (const char *p = tmpbuf; p < tmpbuf + total_len; p += mblen)
+	    /* Max bytes in multibyte char is 4. */
+	    for (mblen = 1; mblen <= 4; mblen ++)
+	      {
+		/* Try conversion */
+		int l = MultiByteToWideChar (cp_from, MB_ERR_INVALID_CHARS,
+					     p, mblen,
+					     wbuf + wlen, NT_MAX_PATH - wlen);
+		if (l)
+		  { /* Conversion Success */
+		    wlen += l;
+		    break;
+		  }
+		else if (mblen == 4)
+		  { /* Conversion Fail */
+		    l = MultiByteToWideChar (cp_from, 0, p, 1,
+					     wbuf + wlen, NT_MAX_PATH - wlen);
+		    wlen += l;
+		    mblen = 1;
+		    break;
+		  }
+		else if (p + mblen == tmpbuf + total_len)
+		  { /* Multibyte char incomplete */
+		    memcpy (stat->__value.__wchb, p, mblen);
+		    stat->__count = mblen;
+		    break;
+		  }
+		/* Retry conversion with extended length */
+	      }
+	}
+      nlen = WideCharToMultiByte (cp_to, 0, wbuf, wlen,
+				  ptr_to, *len_to, NULL, NULL);
     }
   else
     {
       /* Just copy */
-      buf = (char *) HeapAlloc (GetProcessHeap (), 0, len_from);
-      memcpy (buf, ptr_from, len_from);
-      nlen = len_from;
+      nlen = min (*len_to, len_from);
+      memcpy (ptr_to, ptr_from, nlen);
     }
   *len_to = nlen;
-  return buf;
-}
-
-static void
-mb_str_free (char *ptr)
-{
-  HeapFree (GetProcessHeap (), 0, ptr);
 }
 
 static bool
@@ -1613,9 +1646,13 @@ fhandler_pty_master::write (const void *ptr, size_t len)
      if current application is native console application. */
   if (to_be_read_from_pcon () && get_ttyp ()->h_pseudo_console)
     {
-      size_t nlen;
-      char *buf = convert_mb_str (CP_UTF8, &nlen, get_ttyp ()->term_code_page,
-				  (const char *) ptr, len);
+      tmp_pathbuf tp;
+      char *buf = tp.c_get ();
+      static mbstate_t mbstat;
+      size_t nlen = NT_MAX_PATH;
+      convert_mb_str (CP_UTF8, buf, &nlen,
+		      get_ttyp ()->term_code_page, (const char *) ptr, len,
+		      &mbstat);
 
       WaitForSingleObject (input_mutex, INFINITE);
 
@@ -1650,7 +1687,6 @@ fhandler_pty_master::write (const void *ptr, size_t len)
 	      get_ttyp ()->pcon_start = false;
 	    }
 	  ReleaseMutex (input_mutex);
-	  mb_str_free (buf);
 	  return len;
 	}
 
@@ -1658,7 +1694,6 @@ fhandler_pty_master::write (const void *ptr, size_t len)
 
       ReleaseMutex (input_mutex);
 
-      mb_str_free (buf);
       return len;
     }
 
@@ -1975,13 +2010,16 @@ DWORD
 fhandler_pty_master::pty_master_fwd_thread ()
 {
   DWORD rlen;
-  char outbuf[65536];
+  tmp_pathbuf tp;
+  char *outbuf = tp.c_get ();
+  char *mbbuf = tp.c_get ();
+  static mbstate_t mbstat;
 
   termios_printf ("Started.");
   for (;;)
     {
       get_ttyp ()->pcon_last_time = GetTickCount ();
-      if (!ReadFile (from_slave, outbuf, sizeof outbuf, &rlen, NULL))
+      if (!ReadFile (from_slave, outbuf, NT_MAX_PATH, &rlen, NULL))
 	{
 	  termios_printf ("ReadFile for forwarding failed, %E");
 	  break;
@@ -2031,11 +2069,11 @@ fhandler_pty_master::pty_master_fwd_thread ()
 	    else
 	      state = 0;
 
-	  size_t nlen;
-	  char *buf = convert_mb_str (get_ttyp ()->term_code_page,
-				      &nlen, CP_UTF8, ptr, wlen);
+	  size_t nlen = NT_MAX_PATH;
+	  convert_mb_str (get_ttyp ()->term_code_page, mbbuf, &nlen,
+			  CP_UTF8, ptr, wlen, &mbstat);
 
-	  ptr = buf;
+	  ptr = mbbuf;
 	  wlen = rlen = nlen;
 
 	  /* OPOST processing was already done in pseudo console,
@@ -2051,14 +2089,13 @@ fhandler_pty_master::pty_master_fwd_thread ()
 	      ptr += written;
 	      wlen = (rlen -= written);
 	    }
-	  mb_str_free (buf);
 	  continue;
 	}
-      size_t nlen;
-      char *buf = convert_mb_str (get_ttyp ()->term_code_page, &nlen,
-				  GetConsoleOutputCP (), ptr, wlen);
+      size_t nlen = NT_MAX_PATH;
+      convert_mb_str (get_ttyp ()->term_code_page, mbbuf, &nlen,
+		      GetConsoleOutputCP (), ptr, wlen, &mbstat);
 
-      ptr = buf;
+      ptr = mbbuf;
       wlen = rlen = nlen;
       acquire_output_mutex (INFINITE);
       while (rlen>0)
@@ -2072,7 +2109,6 @@ fhandler_pty_master::pty_master_fwd_thread ()
 	  wlen = (rlen -= wlen);
 	}
       release_output_mutex ();
-      mb_str_free (buf);
     }
   return 0;
 }
-- 
2.28.0


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH v2 2/2] Cygwin: pty: Fix input charset for non-cygwin apps with disable_pcon.
  2020-09-09 13:40 [PATCH v2 0/2] Cygwin: pty: Changes regarding charset conversion Takashi Yano
  2020-09-09 13:40 ` [PATCH v2 1/2] Cygwin: pty: Revise convert_mb_str() function Takashi Yano
@ 2020-09-09 13:40 ` Takashi Yano
  1 sibling, 0 replies; 4+ messages in thread
From: Takashi Yano @ 2020-09-09 13:40 UTC (permalink / raw)
  To: cygwin-patches

- If the non-cygwin apps is executed under pseudo console disabled,
  multibyte input for the apps are garbled. This patch fixes the
  issue.
---
 winsup/cygwin/fhandler_tty.cc | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/winsup/cygwin/fhandler_tty.cc b/winsup/cygwin/fhandler_tty.cc
index c4b7fc61d..701381ea6 100644
--- a/winsup/cygwin/fhandler_tty.cc
+++ b/winsup/cygwin/fhandler_tty.cc
@@ -304,8 +304,18 @@ fhandler_pty_master::accept_input ()
   bytes_left = eat_readahead (-1);
 
   HANDLE write_to = get_output_handle ();
+  tmp_pathbuf tp;
+  char *mbbuf = tp.c_get ();
   if (to_be_read_from_pcon ())
-    write_to = to_slave;
+    {
+      write_to = to_slave;
+      static mbstate_t mbstat;
+      size_t nlen = NT_MAX_PATH;
+      convert_mb_str (GetConsoleCP (), mbbuf, &nlen,
+		      get_ttyp ()->term_code_page, p, bytes_left, &mbstat);
+      p = mbbuf;
+      bytes_left = nlen;
+    }
 
   if (!bytes_left)
     {
-- 
2.28.0


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v2 1/2] Cygwin: pty: Revise convert_mb_str() function.
  2020-09-09 13:40 ` [PATCH v2 1/2] Cygwin: pty: Revise convert_mb_str() function Takashi Yano
@ 2020-09-09 14:12   ` Corinna Vinschen
  0 siblings, 0 replies; 4+ messages in thread
From: Corinna Vinschen @ 2020-09-09 14:12 UTC (permalink / raw)
  To: cygwin-patches

On Sep  9 22:40, Takashi Yano via Cygwin-patches wrote:
> - Use tmp_pathbuf instead of HeapAlloc()/HeapFree().
> - Remove mb_str_free() function.
> - Consider the case where the multibyte string stops in the middle
>   of a multibyte char.
> ---
>  winsup/cygwin/fhandler_tty.cc | 118 ++++++++++++++++++++++------------
>  1 file changed, 77 insertions(+), 41 deletions(-)
> 
> diff --git a/winsup/cygwin/fhandler_tty.cc b/winsup/cygwin/fhandler_tty.cc
> index 6de591d9b..c4b7fc61d 100644
> --- a/winsup/cygwin/fhandler_tty.cc
> +++ b/winsup/cygwin/fhandler_tty.cc
> @@ -26,6 +26,7 @@ details. */
>  #include <asm/socket.h>
>  #include "cygwait.h"
>  #include "registry.h"
> +#include "tls_pbuf.h"
>  
>  #ifndef PROC_THREAD_ATTRIBUTE_PSEUDOCONSOLE
>  #define PROC_THREAD_ATTRIBUTE_PSEUDOCONSOLE 0x00020016
> @@ -116,40 +117,72 @@ CreateProcessW_Hooked
>    return CreateProcessW_Orig (n, c, pa, ta, inh, f, e, d, si, pi);
>  }
>  
> -static char *
> -convert_mb_str (UINT cp_to, size_t *len_to,
> -		UINT cp_from, const char *ptr_from, size_t len_from)
> +static void
> +convert_mb_str (UINT cp_to, char *ptr_to, size_t *len_to,
> +		UINT cp_from, const char *ptr_from, size_t len_from,
> +		mbstate_t *stat)

Heh, it's funny to use mbstate_t together with Windows functions :)
However, the var name `stat' makes me itch.  It looks too close to
struct stat and function stat.  What about "mbp" or something along
these lines?

>  {
> -  char *buf;
>    size_t nlen;
> +  tmp_pathbuf tp;
>    if (cp_to != cp_from)
>      {
> -      size_t wlen =
> -	MultiByteToWideChar (cp_from, 0, ptr_from, len_from, NULL, 0);
> -      wchar_t *wbuf = (wchar_t *)
> -	HeapAlloc (GetProcessHeap (), 0, wlen * sizeof (wchar_t));
> -      wlen =
> -	MultiByteToWideChar (cp_from, 0, ptr_from, len_from, wbuf, wlen);
> -      nlen = WideCharToMultiByte (cp_to, 0, wbuf, wlen, NULL, 0, NULL, NULL);
> -      buf = (char *) HeapAlloc (GetProcessHeap (), 0, nlen);
> -      nlen = WideCharToMultiByte (cp_to, 0, wbuf, wlen, buf, nlen, NULL, NULL);
> -      HeapFree (GetProcessHeap (), 0, wbuf);
> +      wchar_t *wbuf = tp.w_get ();
> +      int wlen = 0;
> +      if (cp_from == 65000) /* UTF-7 */

CP_UTF7?

> +	/* MB_ERR_INVALID_CHARS does not work properly for UTF-7.
> +	   Therefore, just convert string without checking */
> +	wlen = MultiByteToWideChar (cp_from, 0, ptr_from, len_from,
> +				    wbuf, NT_MAX_PATH);
> +      else
> +	{
> +	  char *tmpbuf = tp.c_get ();
> +	  memcpy (tmpbuf, stat->__value.__wchb, stat->__count);
> +	  if (stat->__count + len_from > NT_MAX_PATH)
> +	    len_from = NT_MAX_PATH - stat->__count;
> +	  memcpy (tmpbuf + stat->__count, ptr_from, len_from);
> +	  int total_len = stat->__count + len_from;
> +	  stat->__count = 0;
> +	  int mblen = 0;
> +	  for (const char *p = tmpbuf; p < tmpbuf + total_len; p += mblen)
> +	    /* Max bytes in multibyte char is 4. */
> +	    for (mblen = 1; mblen <= 4; mblen ++)
> +	      {
> +		/* Try conversion */
> +		int l = MultiByteToWideChar (cp_from, MB_ERR_INVALID_CHARS,
> +					     p, mblen,
> +					     wbuf + wlen, NT_MAX_PATH - wlen);
> +		if (l)
> +		  { /* Conversion Success */
> +		    wlen += l;
> +		    break;
> +		  }
> +		else if (mblen == 4)
> +		  { /* Conversion Fail */
> +		    l = MultiByteToWideChar (cp_from, 0, p, 1,
> +					     wbuf + wlen, NT_MAX_PATH - wlen);
> +		    wlen += l;
> +		    mblen = 1;
> +		    break;
> +		  }
> +		else if (p + mblen == tmpbuf + total_len)
> +		  { /* Multibyte char incomplete */
> +		    memcpy (stat->__value.__wchb, p, mblen);
> +		    stat->__count = mblen;
> +		    break;
> +		  }
> +		/* Retry conversion with extended length */
> +	      }
> +	}
> +      nlen = WideCharToMultiByte (cp_to, 0, wbuf, wlen,
> +				  ptr_to, *len_to, NULL, NULL);
>      }
>    else
>      {
>        /* Just copy */
> -      buf = (char *) HeapAlloc (GetProcessHeap (), 0, len_from);
> -      memcpy (buf, ptr_from, len_from);
> -      nlen = len_from;
> +      nlen = min (*len_to, len_from);
> +      memcpy (ptr_to, ptr_from, nlen);

if the *caller* checks for cp_to != cp_from, this memcpy could go
away and the caller could just use the already existing buffer.

Other than that, LGTM.


Thanks,
Corinna

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2020-09-09 14:12 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-09 13:40 [PATCH v2 0/2] Cygwin: pty: Changes regarding charset conversion Takashi Yano
2020-09-09 13:40 ` [PATCH v2 1/2] Cygwin: pty: Revise convert_mb_str() function Takashi Yano
2020-09-09 14:12   ` Corinna Vinschen
2020-09-09 13:40 ` [PATCH v2 2/2] Cygwin: pty: Fix input charset for non-cygwin apps with disable_pcon Takashi Yano

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).