public inbox for cygwin-cvs@sourceware.org
help / color / mirror / Atom feed
* [newlib-cygwin/main] Cygwin: Add ISO C2X functions c8rtomb, mbrtoc8
@ 2023-08-02 15:04 Corinna Vinschen
0 siblings, 0 replies; only message in thread
From: Corinna Vinschen @ 2023-08-02 15:04 UTC (permalink / raw)
To: cygwin-cvs
https://sourceware.org/git/gitweb.cgi?p=newlib-cygwin.git;h=c49bc478b4a7e7d00a0c8540b8d6b6e99453443e
commit c49bc478b4a7e7d00a0c8540b8d6b6e99453443e
Author: Corinna Vinschen <corinna@vinschen.de>
AuthorDate: Wed Aug 2 16:55:52 2023 +0200
Commit: Corinna Vinschen <corinna@vinschen.de>
CommitDate: Wed Aug 2 16:56:24 2023 +0200
Cygwin: Add ISO C2X functions c8rtomb, mbrtoc8
Signed-off-by: Corinna Vinschen <corinna@vinschen.de>
Diff:
---
winsup/cygwin/include/cygwin/version.h | 3 +-
winsup/cygwin/include/uchar.h | 14 ++-
winsup/cygwin/release/3.5.0 | 2 +-
winsup/cygwin/strfuncs.cc | 166 +++++++++++++++++++++++++++++++++
winsup/doc/new-features.xml | 6 +-
5 files changed, 184 insertions(+), 7 deletions(-)
diff --git a/winsup/cygwin/include/cygwin/version.h b/winsup/cygwin/include/cygwin/version.h
index 7bc3e5ec3b25..833de646c563 100644
--- a/winsup/cygwin/include/cygwin/version.h
+++ b/winsup/cygwin/include/cygwin/version.h
@@ -482,12 +482,13 @@ details. */
346: (Belatedly) add posix_spawn_file_actions_addchdir_np,
posix_spawn_file_actions_addfchdir_np.
347: Add c16rtomb, c32rtomb, mbrtoc16, mbrtoc32.
+ 348: Add c8rtomb, mbrtoc.
Note that we forgot to bump the api for ualarm, strtoll, strtoull,
sigaltstack, sethostname. */
#define CYGWIN_VERSION_API_MAJOR 0
-#define CYGWIN_VERSION_API_MINOR 346
+#define CYGWIN_VERSION_API_MINOR 348
/* There is also a compatibity version number associated with the shared memory
regions. It is incremented when incompatible changes are made to the shared
diff --git a/winsup/cygwin/include/uchar.h b/winsup/cygwin/include/uchar.h
index bf865ff16e7f..ed548ac7360e 100644
--- a/winsup/cygwin/include/uchar.h
+++ b/winsup/cygwin/include/uchar.h
@@ -4,8 +4,11 @@
#include <sys/cdefs.h>
#include <wchar.h>
-typedef __uint16_t char16_t;
-typedef __uint32_t char32_t;
+/* Either C2x or if C++ doesn't already define char8_t */
+#if __ISO_C_VISIBLE >= 2020 && !defined (__cpp_char8_t)
+typedef unsigned char char8_t;
+#endif
+
/* C++11 already defines those types. */
#if !defined (__cplusplus) || (__cplusplus - 0 < 201103L)
typedef __uint_least16_t char16_t;
@@ -14,6 +17,13 @@ typedef __uint_least32_t char32_t;
__BEGIN_DECLS
+/* Either C2x or if C++ defines char8_t */
+#if __ISO_C_VISIBLE >= 2020 || defined (__cpp_char8_t)
+size_t c8rtomb(char * __restrict, char8_t, mbstate_t * __restrict);
+size_t mbrtoc8(char8_t * __restrict, const char * __restrict, size_t,
+ mbstate_t * __restrict);
+#endif
+
size_t c16rtomb(char * __restrict, char16_t, mbstate_t * __restrict);
size_t mbrtoc16(char16_t * __restrict, const char * __restrict, size_t,
mbstate_t * __restrict);
diff --git a/winsup/cygwin/release/3.5.0 b/winsup/cygwin/release/3.5.0
index 7c27e1bb8ca7..d71de50de536 100644
--- a/winsup/cygwin/release/3.5.0
+++ b/winsup/cygwin/release/3.5.0
@@ -27,7 +27,7 @@ What's new:
- New API calls: posix_spawn_file_actions_addchdir_np,
posix_spawn_file_actions_addfchdir_np.
-- New API calls: c16rtomb, c32rtomb, mbrtoc16, mbrtoc32.
+- New API calls: c8rtomb, c16rtomb, c32rtomb, mbrtoc8, mbrtoc16, mbrtoc32.
What changed:
-------------
diff --git a/winsup/cygwin/strfuncs.cc b/winsup/cygwin/strfuncs.cc
index 770d40ed8f2f..cbcd0ec464ee 100644
--- a/winsup/cygwin/strfuncs.cc
+++ b/winsup/cygwin/strfuncs.cc
@@ -155,6 +155,103 @@ c16rtomb (char *s, char16_t wc, mbstate_t *ps)
return wcrtomb (s, (wchar_t) wc, ps);
}
+extern "C" size_t
+c8rtomb (char *s, char8_t c8, mbstate_t *ps)
+{
+ struct _reent *reent = _REENT;
+ char32_t wc;
+
+ if (ps == NULL)
+ {
+ _REENT_CHECK_MISC(reent);
+ ps = &(_REENT_MBRTOWC_STATE(reent));
+ }
+
+ if (s == NULL)
+ {
+ ps->__count = 0;
+ return 1;
+ }
+ if ((ps->__count & 0xff00) != 0xc800)
+ {
+ switch (c8)
+ {
+ case 0 ... 0x7f: /* single octet */
+ ps->__count = 0;
+ wc = c8;
+ break;
+ case 0xc2 ... 0xf4: /* valid lead byte */
+ ps->__count = 0xc801;
+ ps->__value.__wchb[0] = c8;
+ return 0;
+ default:
+ goto ilseq;
+ }
+ }
+ else
+ {
+ /* We already collected something... */
+ int idx = ps->__count & 0x3;
+ char8_t &c1 = ps->__value.__wchb[0];
+ char8_t &c2 = ps->__value.__wchb[1];
+ char8_t &c3 = ps->__value.__wchb[2];
+
+ switch (idx)
+ {
+ case 1:
+ /* Annoyingly complex check for validity for 2nd octet. */
+ if (c8 <= 0x7f || c8 >= 0xc0)
+ goto ilseq;
+ if (c1 == 0xe0 && c8 <= 0x9f)
+ goto ilseq;
+ if (c1 == 0xed && c8 >= 0xa0)
+ goto ilseq;
+ if (c1 == 0xf0 && c8 <= 0x8f)
+ goto ilseq;
+ if (c1 == 0xf4 && c8 >= 0x90)
+ goto ilseq;
+ if (c1 >= 0xe0)
+ {
+ ps->__count = 0xc802;
+ c2 = c8;
+ return 0;
+ }
+ wc = ((c1 & 0x1f) << 6)
+ | (c8 & 0x3f);
+ break;
+ case 2:
+ if (c8 <= 0x7f || c8 >= 0xc0)
+ goto ilseq;
+ if (c1 >= 0xf0)
+ {
+ ps->__count = 0xc803;
+ c3 = c8;
+ return 0;
+ }
+ wc = ((c1 & 0x0f) << 12)
+ | ((c2 & 0x3f) << 6)
+ | (c8 & 0x3f);
+ break;
+ case 3:
+ if (c8 <= 0x7f || c8 >= 0xc0)
+ goto ilseq;
+ wc = ((c1 & 0x07) << 18)
+ | ((c2 & 0x3f) << 12)
+ | ((c3 & 0x3f) << 6)
+ | (c8 & 0x3f);
+ break;
+ default: /* Shouldn't happen */
+ goto ilseq;
+ }
+ }
+ ps->__count = 0;
+ return c32rtomb (s, wc, ps);
+ilseq:
+ ps->__count = 0;
+ _REENT_ERRNO(reent) = EILSEQ;
+ return (size_t)(-1);
+}
+
extern "C" size_t
mbrtoc32 (char32_t *pwc, const char *s, size_t n, mbstate_t *ps)
{
@@ -245,6 +342,75 @@ ilseq:
return (size_t)(-1);
}
+extern "C" size_t
+mbrtoc8 (char8_t *pc8, const char *s, size_t n, mbstate_t *ps)
+{
+ struct _reent *reent = _REENT;
+ size_t len;
+ char32_t wc;
+
+ if (ps == NULL)
+ {
+ _REENT_CHECK_MISC(reent);
+ ps = &(_REENT_MBRTOWC_STATE(reent));
+ }
+
+ if (s == NULL)
+ {
+ if (ps)
+ ps->__count = 0;
+ return 1;
+ }
+ else if ((ps->__count & 0xff00) == 0xc800)
+ {
+ /* Return next utf-8 octet in line. */
+ int idx = ps->__count & 0x3;
+
+ if (pc8)
+ *pc8 = ps->__value.__wchb[--idx];
+ if (idx == 0)
+ ps->__count = 0;
+ return -3;
+ }
+ len = mbrtoc32 (&wc, s, n, ps);
+ if (len > 0)
+ {
+ /* octets stored back to front for easier indexing */
+ switch (wc)
+ {
+ case 0 ... 0x7f:
+ ps->__value.__wchb[0] = wc;
+ ps->__count = 0;
+ break;
+ case 0x80 ... 0x7ff:
+ ps->__value.__wchb[1] = 0xc0 | ((wc & 0x7c0) >> 6);
+ ps->__value.__wchb[0] = 0x80 | (wc & 0x3f);
+ ps->__count = 0xc800 | 1;
+ break;
+ case 0x800 ... 0xffff:
+ ps->__value.__wchb[2] = 0xe0 | ((wc & 0xf000) >> 12);
+ ps->__value.__wchb[1] = 0x80 | ((wc & 0xfc0) >> 6);
+ ps->__value.__wchb[0] = 0x80 | (wc & 0x3f);
+ ps->__count = 0xc800 | 2;
+ break;
+ case 0x10000 ... 0x10ffff:
+ ps->__value.__wchb[3] = 0xf0 | ((wc & 0x1c0000) >> 18);
+ ps->__value.__wchb[2] = 0x80 | ((wc & 0x3f000) >> 12);
+ ps->__value.__wchb[1] = 0x80 | ((wc & 0xfc0) >> 6);
+ ps->__value.__wchb[0] = 0x80 | (wc & 0x3f);
+ ps->__count = 0xc800 | 3;
+ break;
+ default:
+ ps->__count = 0;
+ _REENT_ERRNO(reent) = EILSEQ;
+ return (size_t)(-1);
+ }
+ if (pc8)
+ *pc8 = ps->__value.__wchb[ps->__count & 0x3];
+ }
+ return len;
+}
+
extern "C" size_t
mbsnrtowci(wint_t *dst, const char **src, size_t nms, size_t len, mbstate_t *ps)
{
diff --git a/winsup/doc/new-features.xml b/winsup/doc/new-features.xml
index b6d3e3a30a36..14644aa85ebe 100644
--- a/winsup/doc/new-features.xml
+++ b/winsup/doc/new-features.xml
@@ -46,12 +46,12 @@ Add support for GB18030 codeset.
</para></listitem>
<listitem><para>
-- New API calls: posix_spawn_file_actions_addchdir_np,
- posix_spawn_file_actions_addfchdir_np.
+New API calls: posix_spawn_file_actions_addchdir_np,
+posix_spawn_file_actions_addfchdir_np.
</para></listitem>
<listitem><para>
-- New API calls: c16rtomb, c32rtomb, mbrtoc16, mbrtoc32.
+New API calls: c8rtomb, c16rtomb, c32rtomb, mbrtoc8, mbrtoc16, mbrtoc32.
</para></listitem>
</itemizedlist>
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2023-08-02 15:04 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-08-02 15:04 [newlib-cygwin/main] Cygwin: Add ISO C2X functions c8rtomb, mbrtoc8 Corinna Vinschen
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).