public inbox for cygwin-cvs@sourceware.org
help / color / mirror / Atom feed
* [newlib-cygwin/main] Cygwin: Add ISO C11 functions c16rtomb, c32rtomb, mbrtoc16, mbrtoc32.
@ 2023-07-31 20:52 Corinna Vinschen
  0 siblings, 0 replies; only message in thread
From: Corinna Vinschen @ 2023-07-31 20:52 UTC (permalink / raw)
  To: cygwin-cvs

https://sourceware.org/git/gitweb.cgi?p=newlib-cygwin.git;h=4f258c55e87fe4c0ea7d5703c32890b63295bc23

commit 4f258c55e87fe4c0ea7d5703c32890b63295bc23
Author:     Corinna Vinschen <corinna@vinschen.de>
AuthorDate: Mon Jul 31 22:38:28 2023 +0200
Commit:     Corinna Vinschen <corinna@vinschen.de>
CommitDate: Mon Jul 31 22:39:09 2023 +0200

    Cygwin: Add ISO C11 functions c16rtomb, c32rtomb, mbrtoc16, mbrtoc32.
    
    Add uchar.h accordingly.
    
    For the c32 functions, use the internal functions wirtomb and mbrtowi
    as base, and convert wirtomb and mbrtowi to inline functions calling
    the c32 functions.
    
    Signed-off-by: Corinna Vinschen <corinna@vinschen.de>

Diff:
---
 winsup/cygwin/cygwin.din               |   4 ++
 winsup/cygwin/include/cygwin/version.h |   1 +
 winsup/cygwin/include/uchar.h          |  22 +++++++
 winsup/cygwin/local_includes/wchar.h   |  15 ++++-
 winsup/cygwin/release/3.5.0            |   2 +
 winsup/cygwin/strfuncs.cc              | 101 ++++++++++++++++++++++++++-------
 winsup/doc/new-features.xml            |   4 ++
 7 files changed, 127 insertions(+), 22 deletions(-)

diff --git a/winsup/cygwin/cygwin.din b/winsup/cygwin/cygwin.din
index b654661e9efe..ee646fddc862 100644
--- a/winsup/cygwin/cygwin.din
+++ b/winsup/cygwin/cygwin.din
@@ -268,6 +268,8 @@ bindresvport_sa = cygwin_bindresvport_sa SIGFE
 bsearch NOSIGFE
 btowc NOSIGFE
 bzero NOSIGFE
+c16rtomb NOSIGFE
+c32rtomb NOSIGFE
 cabs NOSIGFE
 cabsf NOSIGFE
 cabsl NOSIGFE
@@ -936,6 +938,8 @@ malloc_trim SIGFE
 malloc_usable_size SIGFE
 mallopt SIGFE
 mblen NOSIGFE
+mbrtoc16 NOSIGFE
+mbrtoc32 NOSIGFE
 mbrlen NOSIGFE
 mbrtowc NOSIGFE
 mbsinit NOSIGFE
diff --git a/winsup/cygwin/include/cygwin/version.h b/winsup/cygwin/include/cygwin/version.h
index 188dc6d08b0b..7bc3e5ec3b25 100644
--- a/winsup/cygwin/include/cygwin/version.h
+++ b/winsup/cygwin/include/cygwin/version.h
@@ -481,6 +481,7 @@ details. */
   345: Reinstantiate _alloca.
   346: (Belatedly) add posix_spawn_file_actions_addchdir_np,
        posix_spawn_file_actions_addfchdir_np.
+  347: Add c16rtomb, c32rtomb, mbrtoc16, mbrtoc32.
 
   Note that we forgot to bump the api for ualarm, strtoll, strtoull,
   sigaltstack, sethostname. */
diff --git a/winsup/cygwin/include/uchar.h b/winsup/cygwin/include/uchar.h
new file mode 100644
index 000000000000..84020553700d
--- /dev/null
+++ b/winsup/cygwin/include/uchar.h
@@ -0,0 +1,22 @@
+#ifndef _UCHAR_H
+#define	_UCHAR_H
+
+#include <sys/cdefs.h>
+#include <wchar.h>
+
+typedef	__uint16_t	char16_t;
+typedef	__uint32_t	char32_t;
+
+__BEGIN_DECLS
+
+size_t	c16rtomb(char * __restrict, char16_t, mbstate_t * __restrict);
+size_t	mbrtoc16(char16_t * __restrict, const char * __restrict, size_t,
+		 mbstate_t * __restrict);
+
+size_t	c32rtomb(char * __restrict, char32_t, mbstate_t * __restrict);
+size_t	mbrtoc32(char32_t * __restrict, const char * __restrict, size_t,
+		 mbstate_t * __restrict);
+
+__END_DECLS
+
+#endif /* _UCHAR_H */
diff --git a/winsup/cygwin/local_includes/wchar.h b/winsup/cygwin/local_includes/wchar.h
index ecf489cd588d..606559a6abc8 100644
--- a/winsup/cygwin/local_includes/wchar.h
+++ b/winsup/cygwin/local_includes/wchar.h
@@ -13,6 +13,7 @@ details. */
 
 /* Internal headers from newlib */
 #include "../locale/setlocale.h"
+#include <uchar.h>
 
 #define ENCODING_LEN 31
 
@@ -46,11 +47,19 @@ void wcintowcs (wchar_t *, wint_t *, size_t);
 
 /* replacement function for wcrtomb, converting a UTF-32 char to a
    multibyte string. */
-size_t wirtomb (char *, wint_t, mbstate_t *);
+static inline size_t
+wirtomb (char *s, wint_t wc, mbstate_t *ps)
+{
+  return c32rtomb (s,(char32_t) wc, ps);
+}
 
 /* replacement function for mbrtowc, returning a wint_t representing
-   a UTF-32 value. Defined in strfuncs.cc */
-extern size_t mbrtowi (wint_t *, const char *, size_t, mbstate_t *);
+   a UTF-32 value. */
+static inline size_t
+mbrtowi (wint_t *pwc, const char *s, size_t n, mbstate_t *ps)
+{
+  return mbrtoc32 ((char32_t *) pwc, s, n, ps);
+}
 
 /* replacement function for mbsnrtowcs, returning a wint_t representing
    a UTF-32 value. Defined in strfuncs.cc.
diff --git a/winsup/cygwin/release/3.5.0 b/winsup/cygwin/release/3.5.0
index c8485b18c507..7c27e1bb8ca7 100644
--- a/winsup/cygwin/release/3.5.0
+++ b/winsup/cygwin/release/3.5.0
@@ -27,6 +27,8 @@ What's new:
 - New API calls: posix_spawn_file_actions_addchdir_np,
   posix_spawn_file_actions_addfchdir_np.
 
+- New API calls: c16rtomb, c32rtomb, mbrtoc16, mbrtoc32.
+
 What changed:
 -------------
 
diff --git a/winsup/cygwin/strfuncs.cc b/winsup/cygwin/strfuncs.cc
index 43ce93ed22da..770d40ed8f2f 100644
--- a/winsup/cygwin/strfuncs.cc
+++ b/winsup/cygwin/strfuncs.cc
@@ -10,6 +10,7 @@ details. */
 #include <stdlib.h>
 #include <sys/param.h>
 #include <wchar.h>
+#include <uchar.h>
 #include <ntdll.h>
 #include "path.h"
 #include "fhandler.h"
@@ -132,26 +133,30 @@ wcintowcs (wchar_t *dest, wint_t *src, size_t len)
 /* replacement function for wcrtomb, converting a UTF-32 char to a
    multibyte string. */
 extern "C" size_t
-wirtomb (char *s, wint_t wi, mbstate_t *ps)
+c32rtomb (char *s, char32_t wc, mbstate_t *ps)
 {
-    wchar_t wc[3] = { (wchar_t) wi, '\0', '\0' };
-    const wchar_t *wcp = wc;
-    size_t nwc = 1;
+    /* If s is NULL, behave as if s pointed to an internal buffer and wc
+       was a null wide character (L'').  wcrtomb will do that for us*/
+    if (wc <= 0xffff || !s)
+      return wcrtomb (s, (wchar_t) wc, ps);
+
+    wchar_t wc_arr[2];
+    const wchar_t *wcp = wc_arr;
+
+    wc -= 0x10000;
+    wc_arr[0] = (wc >> 10) + 0xd800;
+    wc_arr[1] = (wc & 0x3ff) + 0xdc00;
+    return wcsnrtombs (s, &wcp, 2, SIZE_MAX, ps);
+}
 
-    if (wi >= 0x10000)
-      {
-	wi -= 0x10000;
-	wc[0] = (wi >> 10) + 0xd800;
-	wc[1] = (wi & 0x3ff) + 0xdc00;
-	nwc = 2;
-      }
-    return wcsnrtombs (s, &wcp, nwc, SIZE_MAX, ps);
+extern "C" size_t
+c16rtomb (char *s, char16_t wc, mbstate_t *ps)
+{
+  return wcrtomb (s, (wchar_t) wc, ps);
 }
 
-/* replacement function for mbrtowc, returning a wint_t representing
-   a UTF-32 value. */
 extern "C" size_t
-mbrtowi (wint_t *pwi, const char *s, size_t n, mbstate_t *ps)
+mbrtoc32 (char32_t *pwc, const char *s, size_t n, mbstate_t *ps)
 {
   size_t len, len2;
   wchar_t w1, w2;
@@ -159,8 +164,8 @@ mbrtowi (wint_t *pwi, const char *s, size_t n, mbstate_t *ps)
   len = mbrtowc (&w1, s, n, ps);
   if (len == (size_t) -1 || len == (size_t) -2)
     return len;
-  if (pwi)
-    *pwi = w1;
+  if (pwc && s)
+    *pwc = w1;
   /* Convert surrogate pair to wint_t value */
   if (len > 0 && w1 >= 0xd800 && w1 <= 0xdbff)
     {
@@ -170,8 +175,8 @@ mbrtowi (wint_t *pwi, const char *s, size_t n, mbstate_t *ps)
       if (len2 > 0 && w2 >= 0xdc00 && w2 <= 0xdfff)
 	{
 	  len += len2;
-	  if (pwi)
-	    *pwi = (((w1 & 0x3ff) << 10) | (w2 & 0x3ff)) + 0x10000;
+	  if (pwc && s)
+	    *pwc = (((w1 & 0x3ff) << 10) | (w2 & 0x3ff)) + 0x10000;
 	}
       else
 	{
@@ -182,6 +187,64 @@ mbrtowi (wint_t *pwi, const char *s, size_t n, mbstate_t *ps)
   return len;
 }
 
+/* Like mbrtowc, but we already defined how to return a surrogate, and
+   the definition of mbrtoc16 differes from that.
+   Return the high surrogate with a return value representing the length
+   of the entire multibyte sequence, and in the next call return the low
+   surrogate with a return value of -3. */
+extern "C" size_t
+mbrtoc16 (char16_t *pwc, const char *s, size_t n, mbstate_t *ps)
+{
+  int retval = 0;
+  struct _reent *reent = _REENT;
+  wchar_t wc;
+
+  if (ps == NULL)
+    {
+      _REENT_CHECK_MISC(reent);
+      ps = &(_REENT_MBRTOWC_STATE(reent));
+    }
+
+  if (s == NULL)
+    retval = __MBTOWC (reent, NULL, "", 1, ps);
+  else if (ps->__count == 0xdc00)
+    {
+      /* Return stored second half of the surrogate. */
+      if (pwc)
+	*pwc = ps->__value.__wch;
+      ps->__count = 0;
+      return -3;
+    }
+  else
+    retval = __MBTOWC (reent, &wc, s, n, ps);
+
+  if (retval == -1)
+    goto ilseq;
+
+  if (pwc)
+    *pwc = wc;
+  /* Did we catch the first half of a surrogate? */
+  if (wc >= 0xd800 && wc <= 0xdbff)
+    {
+      if (n <= (size_t) retval)
+	goto ilseq;
+      int r2 = __MBTOWC (reent, &wc, s + retval, n, ps);
+      if (r2 == -1)
+	goto ilseq;
+      /* Store second half of the surrogate in state, and return the
+	 length of the entire multibyte sequence. */
+      ps->__count = 0xdc00;
+      ps->__value.__wch = wc;
+      retval += r2;
+    }
+  return (size_t)retval;
+
+ilseq:
+  ps->__count = 0;
+  _REENT_ERRNO(reent) = EILSEQ;
+  return (size_t)(-1);
+}
+
 extern "C" size_t
 mbsnrtowci(wint_t *dst, const char **src, size_t nms, size_t len, mbstate_t *ps)
 {
diff --git a/winsup/doc/new-features.xml b/winsup/doc/new-features.xml
index f2e7a07731d0..b6d3e3a30a36 100644
--- a/winsup/doc/new-features.xml
+++ b/winsup/doc/new-features.xml
@@ -50,6 +50,10 @@ Add support for GB18030 codeset.
   posix_spawn_file_actions_addfchdir_np.
 </para></listitem>
 
+<listitem><para>
+- New API calls: c16rtomb, c32rtomb, mbrtoc16, mbrtoc32.
+</para></listitem>
+
 </itemizedlist>
 
 </sect2>

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-07-31 20:52 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-07-31 20:52 [newlib-cygwin/main] Cygwin: Add ISO C11 functions c16rtomb, c32rtomb, mbrtoc16, mbrtoc32 Corinna Vinschen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).