public inbox for cygwin-cvs@sourceware.org
help / color / mirror / Atom feed
* [newlib-cygwin/main] Cygwin: Add ISO C2X functions c8rtomb, mbrtoc8
@ 2023-08-02 15:04 Corinna Vinschen
  0 siblings, 0 replies; only message in thread
From: Corinna Vinschen @ 2023-08-02 15:04 UTC (permalink / raw)
  To: cygwin-cvs

https://sourceware.org/git/gitweb.cgi?p=newlib-cygwin.git;h=c49bc478b4a7e7d00a0c8540b8d6b6e99453443e

commit c49bc478b4a7e7d00a0c8540b8d6b6e99453443e
Author:     Corinna Vinschen <corinna@vinschen.de>
AuthorDate: Wed Aug 2 16:55:52 2023 +0200
Commit:     Corinna Vinschen <corinna@vinschen.de>
CommitDate: Wed Aug 2 16:56:24 2023 +0200

    Cygwin: Add ISO C2X functions c8rtomb, mbrtoc8
    
    Signed-off-by: Corinna Vinschen <corinna@vinschen.de>

Diff:
---
 winsup/cygwin/include/cygwin/version.h |   3 +-
 winsup/cygwin/include/uchar.h          |  14 ++-
 winsup/cygwin/release/3.5.0            |   2 +-
 winsup/cygwin/strfuncs.cc              | 166 +++++++++++++++++++++++++++++++++
 winsup/doc/new-features.xml            |   6 +-
 5 files changed, 184 insertions(+), 7 deletions(-)

diff --git a/winsup/cygwin/include/cygwin/version.h b/winsup/cygwin/include/cygwin/version.h
index 7bc3e5ec3b25..833de646c563 100644
--- a/winsup/cygwin/include/cygwin/version.h
+++ b/winsup/cygwin/include/cygwin/version.h
@@ -482,12 +482,13 @@ details. */
   346: (Belatedly) add posix_spawn_file_actions_addchdir_np,
        posix_spawn_file_actions_addfchdir_np.
   347: Add c16rtomb, c32rtomb, mbrtoc16, mbrtoc32.
+  348: Add c8rtomb, mbrtoc.
 
   Note that we forgot to bump the api for ualarm, strtoll, strtoull,
   sigaltstack, sethostname. */
 
 #define CYGWIN_VERSION_API_MAJOR 0
-#define CYGWIN_VERSION_API_MINOR 346
+#define CYGWIN_VERSION_API_MINOR 348
 
 /* There is also a compatibity version number associated with the shared memory
    regions.  It is incremented when incompatible changes are made to the shared
diff --git a/winsup/cygwin/include/uchar.h b/winsup/cygwin/include/uchar.h
index bf865ff16e7f..ed548ac7360e 100644
--- a/winsup/cygwin/include/uchar.h
+++ b/winsup/cygwin/include/uchar.h
@@ -4,8 +4,11 @@
 #include <sys/cdefs.h>
 #include <wchar.h>
 
-typedef	__uint16_t	char16_t;
-typedef	__uint32_t	char32_t;
+/* Either C2x or if C++ doesn't already define char8_t */
+#if __ISO_C_VISIBLE >= 2020 && !defined (__cpp_char8_t)
+typedef unsigned char		char8_t;
+#endif
+
 /* C++11 already defines those types. */
 #if !defined (__cplusplus) || (__cplusplus - 0 < 201103L)
 typedef	__uint_least16_t	char16_t;
@@ -14,6 +17,13 @@ typedef	__uint_least32_t	char32_t;
 
 __BEGIN_DECLS
 
+/* Either C2x or if C++ defines char8_t */
+#if __ISO_C_VISIBLE >= 2020 || defined (__cpp_char8_t)
+size_t  c8rtomb(char * __restrict, char8_t, mbstate_t * __restrict);
+size_t	mbrtoc8(char8_t * __restrict, const char * __restrict, size_t,
+		mbstate_t * __restrict);
+#endif
+
 size_t	c16rtomb(char * __restrict, char16_t, mbstate_t * __restrict);
 size_t	mbrtoc16(char16_t * __restrict, const char * __restrict, size_t,
 		 mbstate_t * __restrict);
diff --git a/winsup/cygwin/release/3.5.0 b/winsup/cygwin/release/3.5.0
index 7c27e1bb8ca7..d71de50de536 100644
--- a/winsup/cygwin/release/3.5.0
+++ b/winsup/cygwin/release/3.5.0
@@ -27,7 +27,7 @@ What's new:
 - New API calls: posix_spawn_file_actions_addchdir_np,
   posix_spawn_file_actions_addfchdir_np.
 
-- New API calls: c16rtomb, c32rtomb, mbrtoc16, mbrtoc32.
+- New API calls: c8rtomb, c16rtomb, c32rtomb, mbrtoc8, mbrtoc16, mbrtoc32.
 
 What changed:
 -------------
diff --git a/winsup/cygwin/strfuncs.cc b/winsup/cygwin/strfuncs.cc
index 770d40ed8f2f..cbcd0ec464ee 100644
--- a/winsup/cygwin/strfuncs.cc
+++ b/winsup/cygwin/strfuncs.cc
@@ -155,6 +155,103 @@ c16rtomb (char *s, char16_t wc, mbstate_t *ps)
   return wcrtomb (s, (wchar_t) wc, ps);
 }
 
+extern "C" size_t
+c8rtomb (char *s, char8_t c8, mbstate_t *ps)
+{
+  struct _reent *reent = _REENT;
+  char32_t wc;
+
+  if (ps == NULL)
+    {
+      _REENT_CHECK_MISC(reent);
+      ps = &(_REENT_MBRTOWC_STATE(reent));
+    }
+
+  if (s == NULL)
+    {
+      ps->__count = 0;
+      return 1;
+    }
+  if ((ps->__count & 0xff00) != 0xc800)
+    {
+      switch (c8)
+	{
+	case 0 ... 0x7f:	/* single octet */
+	  ps->__count = 0;
+	  wc = c8;
+	  break;
+	case 0xc2 ... 0xf4:	/* valid lead byte */
+	  ps->__count = 0xc801;
+	  ps->__value.__wchb[0] = c8;
+	  return 0;
+	default:
+	  goto ilseq;
+	}
+    }
+  else
+    {
+      /* We already collected something... */
+      int idx = ps->__count & 0x3;
+      char8_t &c1 = ps->__value.__wchb[0];
+      char8_t &c2 = ps->__value.__wchb[1];
+      char8_t &c3 = ps->__value.__wchb[2];
+
+      switch (idx)
+	{
+	  case 1:
+	    /* Annoyingly complex check for validity for 2nd octet. */
+	    if (c8 <= 0x7f || c8 >= 0xc0)
+	      goto ilseq;
+	    if (c1 == 0xe0 && c8 <= 0x9f)
+	      goto ilseq;
+	    if (c1 == 0xed && c8 >= 0xa0)
+	      goto ilseq;
+	    if (c1 == 0xf0 && c8 <= 0x8f)
+	      goto ilseq;
+	    if (c1 == 0xf4 && c8 >= 0x90)
+	      goto ilseq;
+	    if (c1 >= 0xe0)
+	      {
+		ps->__count = 0xc802;
+		c2 = c8;
+		return 0;
+	      }
+	    wc =   ((c1 & 0x1f) << 6)
+		 |  (c8 & 0x3f);
+	    break;
+	  case 2:
+	    if (c8 <= 0x7f || c8 >= 0xc0)
+	      goto ilseq;
+	    if (c1 >= 0xf0)
+	      {
+		ps->__count = 0xc803;
+		c3 = c8;
+		return 0;
+	      }
+	    wc =   ((c1 & 0x0f) << 12)
+		 | ((c2 & 0x3f) <<  6)
+		 |  (c8 & 0x3f);
+	    break;
+	  case 3:
+	    if (c8 <= 0x7f || c8 >= 0xc0)
+	      goto ilseq;
+	    wc =   ((c1 & 0x07) << 18)
+		 | ((c2 & 0x3f) << 12)
+		 | ((c3 & 0x3f) <<  6)
+		 |  (c8 & 0x3f);
+	    break;
+	  default: /* Shouldn't happen */
+	    goto ilseq;
+	}
+    }
+  ps->__count = 0;
+  return c32rtomb (s, wc, ps);
+ilseq:
+  ps->__count = 0;
+  _REENT_ERRNO(reent) = EILSEQ;
+  return (size_t)(-1);
+}
+
 extern "C" size_t
 mbrtoc32 (char32_t *pwc, const char *s, size_t n, mbstate_t *ps)
 {
@@ -245,6 +342,75 @@ ilseq:
   return (size_t)(-1);
 }
 
+extern "C" size_t
+mbrtoc8 (char8_t *pc8, const char *s, size_t n, mbstate_t *ps)
+{
+  struct _reent *reent = _REENT;
+  size_t len;
+  char32_t wc;
+
+  if (ps == NULL)
+    {
+      _REENT_CHECK_MISC(reent);
+      ps = &(_REENT_MBRTOWC_STATE(reent));
+    }
+
+  if (s == NULL)
+    {
+      if (ps)
+	ps->__count = 0;
+      return 1;
+    }
+  else if ((ps->__count & 0xff00) == 0xc800)
+    {
+      /* Return next utf-8 octet in line. */
+      int idx = ps->__count & 0x3;
+
+      if (pc8)
+	*pc8 = ps->__value.__wchb[--idx];
+      if (idx == 0)
+	ps->__count = 0;
+      return -3;
+    }
+  len = mbrtoc32 (&wc, s, n, ps);
+  if (len > 0)
+    {
+      /* octets stored back to front for easier indexing */
+      switch (wc)
+	{
+	case 0 ... 0x7f:
+	  ps->__value.__wchb[0] = wc;
+	  ps->__count = 0;
+	  break;
+	case 0x80 ... 0x7ff:
+	  ps->__value.__wchb[1] = 0xc0 | ((wc & 0x7c0) >> 6);
+	  ps->__value.__wchb[0] = 0x80 |  (wc &  0x3f);
+	  ps->__count = 0xc800 | 1;
+	  break;
+	case 0x800 ... 0xffff:
+	  ps->__value.__wchb[2] = 0xe0 | ((wc & 0xf000) >> 12);
+	  ps->__value.__wchb[1] = 0x80 | ((wc &  0xfc0) >> 6);
+	  ps->__value.__wchb[0] = 0x80 |  (wc &   0x3f);
+	  ps->__count = 0xc800 | 2;
+	  break;
+	case 0x10000 ... 0x10ffff:
+	  ps->__value.__wchb[3] = 0xf0 | ((wc & 0x1c0000) >> 18);
+	  ps->__value.__wchb[2] = 0x80 | ((wc &  0x3f000) >> 12);
+	  ps->__value.__wchb[1] = 0x80 | ((wc &    0xfc0) >> 6);
+	  ps->__value.__wchb[0] = 0x80 |  (wc &     0x3f);
+	  ps->__count = 0xc800 | 3;
+	  break;
+	default:
+	  ps->__count = 0;
+	  _REENT_ERRNO(reent) = EILSEQ;
+	  return (size_t)(-1);
+	}
+      if (pc8)
+	*pc8 = ps->__value.__wchb[ps->__count & 0x3];
+    }
+  return len;
+}
+
 extern "C" size_t
 mbsnrtowci(wint_t *dst, const char **src, size_t nms, size_t len, mbstate_t *ps)
 {
diff --git a/winsup/doc/new-features.xml b/winsup/doc/new-features.xml
index b6d3e3a30a36..14644aa85ebe 100644
--- a/winsup/doc/new-features.xml
+++ b/winsup/doc/new-features.xml
@@ -46,12 +46,12 @@ Add support for GB18030 codeset.
 </para></listitem>
 
 <listitem><para>
-- New API calls: posix_spawn_file_actions_addchdir_np,
-  posix_spawn_file_actions_addfchdir_np.
+New API calls: posix_spawn_file_actions_addchdir_np,
+posix_spawn_file_actions_addfchdir_np.
 </para></listitem>
 
 <listitem><para>
-- New API calls: c16rtomb, c32rtomb, mbrtoc16, mbrtoc32.
+New API calls: c8rtomb, c16rtomb, c32rtomb, mbrtoc8, mbrtoc16, mbrtoc32.
 </para></listitem>
 
 </itemizedlist>

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-08-02 15:04 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-08-02 15:04 [newlib-cygwin/main] Cygwin: Add ISO C2X functions c8rtomb, mbrtoc8 Corinna Vinschen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).