[newlib-cygwin] use generated character data

public inbox for newlib-cvs@sourceware.org
help / color / mirror / Atom feed

* [newlib-cygwin] use generated character data
@ 2018-03-12 10:40 Corinna Vinschen
  0 siblings, 0 replies; only message in thread
From: Corinna Vinschen @ 2018-03-12 10:40 UTC (permalink / raw)
  To: newlib-cvs

https://sourceware.org/git/gitweb.cgi?p=newlib-cygwin.git;h=41f72ab4d7c404b8ac2a5e1187d79164992e4483

commit 41f72ab4d7c404b8ac2a5e1187d79164992e4483
Author: Thomas Wolff <mintty@users.noreply.github.com>
Date:   Fri Mar 9 13:30:33 2018 +0100

    use generated character data
    
    The tow* functions use an included case conversion table which can be
    generated from Unicode data.
    The isw* functions use a character categories table (provided by
    categories.c) which can be generated from Unicode data.
    Delegation between current-locale and specific-locale-dependent functions
    was reverted towards the generic locale-dependent functions (*_l.c);
    this is however only relevant on systems with non-Unicode wide character
    locales, thus not on Cygwin.

Diff:
---
 newlib/libc/ctype/Makefile.am   |   1 +
 newlib/libc/ctype/Makefile.in   |  12 +-
 newlib/libc/ctype/categories.c  |  39 +++
 newlib/libc/ctype/categories.h  |   7 +
 newlib/libc/ctype/iswalnum.c    |   2 +-
 newlib/libc/ctype/iswalnum_l.c  |  19 +-
 newlib/libc/ctype/iswalpha.c    | 370 +----------------------------
 newlib/libc/ctype/iswalpha_l.c  |  17 +-
 newlib/libc/ctype/iswblank.c    |  19 +-
 newlib/libc/ctype/iswblank_l.c  |  16 +-
 newlib/libc/ctype/iswcntrl.c    |  17 +-
 newlib/libc/ctype/iswcntrl_l.c  |  16 +-
 newlib/libc/ctype/iswctype_l.c  |  37 ++-
 newlib/libc/ctype/iswdigit.c    |   3 +-
 newlib/libc/ctype/iswdigit_l.c  |   2 +-
 newlib/libc/ctype/iswgraph.c    |   3 +-
 newlib/libc/ctype/iswgraph_l.c  |  19 +-
 newlib/libc/ctype/iswlower.c    |   4 +-
 newlib/libc/ctype/iswlower_l.c  |  16 +-
 newlib/libc/ctype/iswprint.c    | 433 +--------------------------------
 newlib/libc/ctype/iswprint_l.c  |  17 +-
 newlib/libc/ctype/iswpunct.c    |   7 +-
 newlib/libc/ctype/iswpunct_l.c  |  22 +-
 newlib/libc/ctype/iswspace.c    |  20 +-
 newlib/libc/ctype/iswspace_l.c  |  17 +-
 newlib/libc/ctype/iswupper.c    |   6 +-
 newlib/libc/ctype/iswupper_l.c  |  16 +-
 newlib/libc/ctype/iswxdigit.c   |   6 +-
 newlib/libc/ctype/jp2uc.c       |  51 +++-
 newlib/libc/ctype/local.h       |  19 +-
 newlib/libc/ctype/towctrans.c   |  16 +-
 newlib/libc/ctype/towctrans_l.c |  97 +++++++-
 newlib/libc/ctype/towlower.c    | 500 +-------------------------------------
 newlib/libc/ctype/towlower_l.c  |   7 +-
 newlib/libc/ctype/towupper.c    | 515 +---------------------------------------
 newlib/libc/ctype/towupper_l.c  |   8 +-
 newlib/libc/ctype/utf8alpha.h   | 355 ---------------------------
 newlib/libc/ctype/utf8print.h   | 389 ------------------------------
 38 files changed, 439 insertions(+), 2681 deletions(-)

diff --git a/newlib/libc/ctype/Makefile.am b/newlib/libc/ctype/Makefile.am
index 8986935..fa6a70d 100644
--- a/newlib/libc/ctype/Makefile.am
+++ b/newlib/libc/ctype/Makefile.am
@@ -24,6 +24,7 @@ if ELIX_LEVEL_1
 ELIX_SOURCES =
 else
 ELIX_SOURCES = \
+	categories.c	\
 	isalnum_l.c	\
 	isalpha_l.c	\
 	isascii.c 	\
diff --git a/newlib/libc/ctype/Makefile.in b/newlib/libc/ctype/Makefile.in
index 2b23317..9932a94 100644
--- a/newlib/libc/ctype/Makefile.in
+++ b/newlib/libc/ctype/Makefile.in
@@ -79,7 +79,8 @@ am__objects_1 = lib_a-ctype_.$(OBJEXT) lib_a-isalnum.$(OBJEXT) \
 	lib_a-ispunct.$(OBJEXT) lib_a-isspace.$(OBJEXT) \
 	lib_a-isxdigit.$(OBJEXT) lib_a-tolower.$(OBJEXT) \
 	lib_a-toupper.$(OBJEXT)
-@ELIX_LEVEL_1_FALSE@am__objects_2 = lib_a-isalnum_l.$(OBJEXT) \
+@ELIX_LEVEL_1_FALSE@am__objects_2 = lib_a-categories.$(OBJEXT) \
+@ELIX_LEVEL_1_FALSE@	lib_a-isalnum_l.$(OBJEXT) \
 @ELIX_LEVEL_1_FALSE@	lib_a-isalpha_l.$(OBJEXT) \
 @ELIX_LEVEL_1_FALSE@	lib_a-isascii.$(OBJEXT) \
 @ELIX_LEVEL_1_FALSE@	lib_a-isascii_l.$(OBJEXT) \
@@ -142,7 +143,7 @@ libctype_la_LIBADD =
 am__objects_3 = ctype_.lo isalnum.lo isalpha.lo iscntrl.lo isdigit.lo \
 	islower.lo isupper.lo isprint.lo ispunct.lo isspace.lo \
 	isxdigit.lo tolower.lo toupper.lo
-@ELIX_LEVEL_1_FALSE@am__objects_4 = isalnum_l.lo isalpha_l.lo \
+@ELIX_LEVEL_1_FALSE@am__objects_4 = categories.lo isalnum_l.lo isalpha_l.lo \
 @ELIX_LEVEL_1_FALSE@	isascii.lo isascii_l.lo isblank.lo \
 @ELIX_LEVEL_1_FALSE@	isblank_l.lo iscntrl_l.lo isdigit_l.lo \
 @ELIX_LEVEL_1_FALSE@	islower_l.lo isupper_l.lo isprint_l.lo \
@@ -351,6 +352,7 @@ GENERAL_SOURCES = \
 	toupper.c
 
 @ELIX_LEVEL_1_FALSE@ELIX_SOURCES = \
+@ELIX_LEVEL_1_FALSE@	categories.c	\
 @ELIX_LEVEL_1_FALSE@	isalnum_l.c	\
 @ELIX_LEVEL_1_FALSE@	isalpha_l.c	\
 @ELIX_LEVEL_1_FALSE@	isascii.c 	\
@@ -609,6 +611,12 @@ lib_a-toupper.o: toupper.c
 lib_a-toupper.obj: toupper.c
 	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-toupper.obj `if test -f 'toupper.c'; then $(CYGPATH_W) 'toupper.c'; else $(CYGPATH_W) '$(srcdir)/toupper.c'; fi`
 
+lib_a-categories.o: categories.c
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-categories.o `test -f 'categories.c' || echo '$(srcdir)/'`categories.c
+
+lib_a-categories.obj: categories.c
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-categories.obj `if test -f 'categories.c'; then $(CYGPATH_W) 'categories.c'; else $(CYGPATH_W) '$(srcdir)/categories.c'; fi`
+
 lib_a-isalnum_l.o: isalnum_l.c
 	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-isalnum_l.o `test -f 'isalnum_l.c' || echo '$(srcdir)/'`isalnum_l.c
 
diff --git a/newlib/libc/ctype/categories.c b/newlib/libc/ctype/categories.c
new file mode 100644
index 0000000..db285d7
--- /dev/null
+++ b/newlib/libc/ctype/categories.c
@@ -0,0 +1,39 @@
+#include <wctype.h>
+#include "categories.h"
+
+struct _category {
+  enum category cat: 11;
+  unsigned int first: 21;
+  unsigned short delta;
+} __attribute__((packed));
+
+static const struct _category categories[] = {
+#include "categories.t"
+};
+
+static enum category
+bisearch_cat(wint_t ucs, const struct _category *table, int max)
+{
+  int min = 0;
+  int mid;
+
+  if (ucs < table[0].first || ucs > table[max].first + table[max].delta)
+    return 0;
+  while (max >= min)
+    {
+      mid = (min + max) / 2;
+      if (ucs > table[mid].first + table[mid].delta)
+	min = mid + 1;
+      else if (ucs < table[mid].first)
+	max = mid - 1;
+      else
+	return table[mid].cat;
+    }
+  return -1;
+}
+
+enum category category(wint_t ucs)
+{
+  return bisearch_cat(ucs, categories,
+		      sizeof(categories) / sizeof(*categories) - 1);
+}
diff --git a/newlib/libc/ctype/categories.h b/newlib/libc/ctype/categories.h
new file mode 100644
index 0000000..271038e
--- /dev/null
+++ b/newlib/libc/ctype/categories.h
@@ -0,0 +1,7 @@
+/* category data */
+
+enum category {
+#include "categories.cat"
+};
+
+extern enum category category(wint_t ucs);
diff --git a/newlib/libc/ctype/iswalnum.c b/newlib/libc/ctype/iswalnum.c
index 45273a8..7b2cac7 100644
--- a/newlib/libc/ctype/iswalnum.c
+++ b/newlib/libc/ctype/iswalnum.c
@@ -39,5 +39,5 @@ No supporting OS subroutines are required.
 int
 iswalnum (wint_t c)
 {
-  return (iswalpha (c) || iswdigit (c));
+  return iswalnum_l (c, 0);
 }
diff --git a/newlib/libc/ctype/iswalnum_l.c b/newlib/libc/ctype/iswalnum_l.c
index e4ab3dd..8802273 100644
--- a/newlib/libc/ctype/iswalnum_l.c
+++ b/newlib/libc/ctype/iswalnum_l.c
@@ -1,10 +1,23 @@
+/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
 #include <_ansi.h>
+#include <ctype.h>
 #include <wctype.h>
+#include "local.h"
+#include "categories.h"
 
 int
 iswalnum_l (wint_t c, struct __locale_t *locale)
 {
-  /* We're using a locale-independent representation of upper/lower case
-     based on Unicode data.  Thus, the locale doesn't matter. */
-  return iswalpha (c) || iswdigit (c);
+#ifdef _MB_CAPABLE
+  //return iswalpha (c) || iswdigit (c);
+  c = _jp2uc_l (c, locale);
+  enum category cat = category (c);
+  return cat == CAT_LC || cat == CAT_Lu || cat == CAT_Ll || cat == CAT_Lt
+      || cat == CAT_Lm || cat == CAT_Lo
+      || cat == CAT_Nl // Letter_Number
+      || cat == CAT_Nd // Decimal_Number
+      ;
+#else
+  return c < (wint_t)0x100 ? isalnum (c) : 0;
+#endif /* _MB_CAPABLE */
 }
diff --git a/newlib/libc/ctype/iswalpha.c b/newlib/libc/ctype/iswalpha.c
index 2906cd1..3928772 100644
--- a/newlib/libc/ctype/iswalpha.c
+++ b/newlib/libc/ctype/iswalpha.c
@@ -1,5 +1,6 @@
 /* Copyright (c) 2002 Red Hat Incorporated.
    All rights reserved.
+   Modified (m) 2017 Thomas Wolff to refer to generated Unicode data tables.
 
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are met:
@@ -63,377 +64,10 @@ PORTABILITY
 No supporting OS subroutines are required.
 */
 #include <_ansi.h>
-#include <newlib.h>
 #include <wctype.h>
-#include <string.h>
-#include <ctype.h>
-#include "local.h"
-
-#ifdef _MB_CAPABLE
-#include "utf8alpha.h"
-#endif /* _MB_CAPABLE */
 
 int
 iswalpha (wint_t c)
 {
-#ifdef _MB_CAPABLE
-  unsigned const char *table;
-  unsigned char *ptr;
-  unsigned char ctmp;
-  int size;
-  wint_t x;
-
-  c = _jp2uc (c);
-
-  /* Based on and tested against Unicode 5.2
-     See utf8alpha.h for a description how to fetch the data. */
-  x = (c >> 8);
-  /* for some large sections, all characters are alphabetic so handle them here */
-  if ((x >= 0x34 && x <= 0x4c) ||
-      (x >= 0x4e && x <= 0x9e) ||
-      (x >= 0xac && x <= 0xd6) ||
-      (x >= 0x120 && x <= 0x122) ||
-      (x >= 0x130 && x <= 0x133) ||
-      (x >= 0x200 && x <= 0x2a5) ||
-      (x >= 0x2a7 && x <= 0x2b6))
-    return 1;
-  
-  switch (x)
-    {
-    case 0x00:
-      table = u0;
-      size = sizeof(u0);
-      break;
-    case 0x01:
-    case 0x11:
-    case 0x15:
-    case 0x1e:
-    case 0xa0:
-    case 0xa1:
-    case 0xa2:
-    case 0xa3:
-    case 0xa5:
-    case 0xf9:
-    case 0xfc:
-    case 0x2f8:
-    case 0x2f9:
-      return 1;
-    case 0x02:
-      table = u2;
-      size = sizeof(u2);
-      break;
-    case 0x03:
-      table = u3;
-      size = sizeof(u3);
-      break;
-    case 0x04:
-      table = u4;
-      size = sizeof(u4);
-      break;
-    case 0x05:
-      table = u5;
-      size = sizeof(u5);
-      break;
-    case 0x06:
-      table = u6;
-      size = sizeof(u6);
-      break;
-    case 0x07:
-      table = u7;
-      size = sizeof(u7);
-      break;
-    case 0x08:
-      table = u8;
-      size = sizeof(u8);
-      break;
-    case 0x09:
-      table = u9;
-      size = sizeof(u9);
-      break;
-    case 0x0a:
-      table = ua;
-      size = sizeof(ua);
-      break;
-    case 0x0b:
-      table = ub;
-      size = sizeof(ub);
-      break;
-    case 0x0c:
-      table = uc;
-      size = sizeof(uc);
-      break;
-    case 0x0d:
-      table = ud;
-      size = sizeof(ud);
-      break;
-    case 0x0e:
-      table = ue;
-      size = sizeof(ue);
-      break;
-    case 0x0f:
-      table = uf;
-      size = sizeof(uf);
-      break;
-    case 0x10:
-      table = u10;
-      size = sizeof(u10);
-      break;
-    case 0x12:
-      table = u12;
-      size = sizeof(u12);
-      break;
-    case 0x13:
-      table = u13;
-      size = sizeof(u13);
-      break;
-    case 0x14:
-      table = u14;
-      size = sizeof(u14);
-      break;
-    case 0x16:
-      table = u16;
-      size = sizeof(u16);
-      break;
-    case 0x17:
-      table = u17;
-      size = sizeof(u17);
-      break;
-    case 0x18:
-      table = u18;
-      size = sizeof(u18);
-      break;
-    case 0x19:
-      table = u19;
-      size = sizeof(u19);
-      break;
-    case 0x1a:
-      table = u1a;
-      size = sizeof(u1a);
-      break;
-    case 0x1b:
-      table = u1b;
-      size = sizeof(u1b);
-      break;
-    case 0x1c:
-      table = u1c;
-      size = sizeof(u1c);
-      break;
-    case 0x1d:
-      table = u1d;
-      size = sizeof(u1d);
-      break;
-    case 0x1f:
-      table = u1f;
-      size = sizeof(u1f);
-      break;
-    case 0x20:
-      table = u20;
-      size = sizeof(u20);
-      break;
-    case 0x21:
-      table = u21;
-      size = sizeof(u21);
-      break;
-    case 0x24:
-      table = u24;
-      size = sizeof(u24);
-      break;
-    case 0x2c:
-      table = u2c;
-      size = sizeof(u2c);
-      break;
-    case 0x2d:
-      table = u2d;
-      size = sizeof(u2d);
-      break;
-    case 0x2e:
-      table = u2e;
-      size = sizeof(u2e);
-      break;
-    case 0x30:
-      table = u30;
-      size = sizeof(u30);
-      break;
-    case 0x31:
-      table = u31;
-      size = sizeof(u31);
-      break;
-    case 0x4d:
-      table = u4d;
-      size = sizeof(u4d);
-      break;
-    case 0x9f:
-      table = u9f;
-      size = sizeof(u9f);
-      break;
-    case 0xa4:
-      table = ua4;
-      size = sizeof(ua4);
-      break;
-    case 0xa6:
-      table = ua6;
-      size = sizeof(ua6);
-      break;
-    case 0xa7:
-      table = ua7;
-      size = sizeof(ua7);
-      break;
-    case 0xa8:
-      table = ua8;
-      size = sizeof(ua8);
-      break;
-    case 0xa9:
-      table = ua9;
-      size = sizeof(ua9);
-      break;
-    case 0xaa:
-      table = uaa;
-      size = sizeof(uaa);
-      break;
-    case 0xab:
-      table = uab;
-      size = sizeof(uab);
-      break;
-    case 0xd7:
-      table = ud7;
-      size = sizeof(ud7);
-      break;
-    case 0xfa:
-      table = ufa;
-      size = sizeof(ufa);
-      break;
-    case 0xfb:
-      table = ufb;
-      size = sizeof(ufb);
-      break;
-    case 0xfd:
-      table = ufd;
-      size = sizeof(ufd);
-      break;
-    case 0xfe:
-      table = ufe;
-      size = sizeof(ufe);
-      break;
-    case 0xff:
-      table = uff;
-      size = sizeof(uff);
-      break;
-    case 0x100:
-      table = u100;
-      size = sizeof(u100);
-      break;
-    case 0x101:
-      table = u101;
-      size = sizeof(u101);
-      break;
-    case 0x102:
-      table = u102;
-      size = sizeof(u102);
-      break;
-    case 0x103:
-      table = u103;
-      size = sizeof(u103);
-      break;
-    case 0x104:
-      table = u104;
-      size = sizeof(u104);
-      break;
-    case 0x108:
-      table = u108;
-      size = sizeof(u108);
-      break;
-    case 0x109:
-      table = u109;
-      size = sizeof(u109);
-      break;
-    case 0x10a:
-      table = u10a;
-      size = sizeof(u10a);
-      break;
-    case 0x10b:
-      table = u10b;
-      size = sizeof(u10b);
-      break;
-    case 0x10c:
-      table = u10c;
-      size = sizeof(u10c);
-      break;
-    case 0x110:
-      table = u110;
-      size = sizeof(u110);
-      break;
-    case 0x123:
-      table = u123;
-      size = sizeof(u123);
-      break;
-    case 0x124:
-      table = u124;
-      size = sizeof(u124);
-      break;
-    case 0x134:
-      table = u134;
-      size = sizeof(u134);
-      break;
-    case 0x1d4:
-      table = u1d4;
-      size = sizeof(u1d4);
-      break;
-    case 0x1d5:
-      table = u1d5;
-      size = sizeof(u1d5);
-      break;
-    case 0x1d6:
-      table = u1d6;
-      size = sizeof(u1d6);
-      break;
-    case 0x1d7:
-      table = u1d7;
-      size = sizeof(u1d7);
-      break;
-    case 0x1f1:
-      table = u1f1;
-      size = sizeof(u1f1);
-      break;
-    case 0x2a6:
-      table = u2a6;
-      size = sizeof(u2a6);
-      break;
-    case 0x2b7:
-      table = u2b7;
-      size = sizeof(u2b7);
-      break;
-    case 0x2fa:
-      table = u2fa;
-      size = sizeof(u2fa);
-      break;
-    default:
-      return 0;
-    }
-  /* we have narrowed down to a section of 256 characters to check */
-  /* now check if c matches the alphabetic wide-chars within that section */
-  ptr = (unsigned char *)table;
-  ctmp = (unsigned char)c;
-  while (ptr < table + size)
-    {
-      if (ctmp == *ptr)
-	return 1;
-      if (ctmp < *ptr)
-	return 0;
-      /* otherwise c > *ptr */
-      /* look for 0x0 as next element which indicates a range */
-      ++ptr;
-      if (ptr < table + size - 1 && *ptr == 0x0)
-	{
-	  /* we have a range..see if c falls within range */
-	  ++ptr;
-	  if (ctmp <= *ptr)
-	    return 1;
-	  ++ptr;
-	}
-    }
-  /* not in table */
-  return 0;
-#else
-  return (c < (wint_t)0x100 ? isalpha (c) : 0);
-#endif /* _MB_CAPABLE */
+  return iswalpha_l (c, 0);
 }
diff --git a/newlib/libc/ctype/iswalpha_l.c b/newlib/libc/ctype/iswalpha_l.c
index efcb95a..922983e 100644
--- a/newlib/libc/ctype/iswalpha_l.c
+++ b/newlib/libc/ctype/iswalpha_l.c
@@ -1,10 +1,21 @@
+/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
 #include <_ansi.h>
+#include <ctype.h>
 #include <wctype.h>
+#include "local.h"
+#include "categories.h"
 
 int
 iswalpha_l (wint_t c, struct __locale_t *locale)
 {
-  /* We're using a locale-independent representation of upper/lower case
-     based on Unicode data.  Thus, the locale doesn't matter. */
-  return iswalpha (c);
+#ifdef _MB_CAPABLE
+  c = _jp2uc_l (c, locale);
+  enum category cat = category (c);
+  return cat == CAT_LC || cat == CAT_Lu || cat == CAT_Ll || cat == CAT_Lt
+      || cat == CAT_Lm || cat == CAT_Lo
+      || cat == CAT_Nl // Letter_Number
+      ;
+#else
+  return c < (wint_t)0x100 ? isalpha (c) : 0;
+#endif /* _MB_CAPABLE */
 }
diff --git a/newlib/libc/ctype/iswblank.c b/newlib/libc/ctype/iswblank.c
index ef91572..31779d2 100644
--- a/newlib/libc/ctype/iswblank.c
+++ b/newlib/libc/ctype/iswblank.c
@@ -1,5 +1,6 @@
 /* Copyright (c) 2002 Red Hat Incorporated.
    All rights reserved.
+   Modified (m) 2017 Thomas Wolff to refer to generated Unicode data tables.
 
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are met:
@@ -62,26 +63,10 @@ PORTABILITY
 No supporting OS subroutines are required.
 */
 #include <_ansi.h>
-#include <newlib.h>
 #include <wctype.h>
-#include <ctype.h>
-#include <string.h>
-#include "local.h"
 
 int
 iswblank (wint_t c)
 {
-#ifdef _MB_CAPABLE
-  c = _jp2uc (c);
-  /* Based on Unicode 5.2.  Control char 09, plus all characters
-     from general category "Zs", which are not marked as decomposition
-     type "noBreak". */
-  return (c == 0x0009 || c == 0x0020 ||
-	  c == 0x1680 || c == 0x180e ||
-	  (c >= 0x2000 && c <= 0x2006) ||
-	  (c >= 0x2008 && c <= 0x200a) ||
-	  c == 0x205f || c == 0x3000);
-#else
-  return (c < 0x100 ? isblank (c) : 0);
-#endif /* _MB_CAPABLE */
+  return iswblank_l (c, 0);
 }
diff --git a/newlib/libc/ctype/iswblank_l.c b/newlib/libc/ctype/iswblank_l.c
index 6960693..b27ed82 100644
--- a/newlib/libc/ctype/iswblank_l.c
+++ b/newlib/libc/ctype/iswblank_l.c
@@ -1,10 +1,20 @@
+/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
 #include <_ansi.h>
+#include <ctype.h>
 #include <wctype.h>
+#include "local.h"
+#include "categories.h"
 
 int
 iswblank_l (wint_t c, struct __locale_t *locale)
 {
-  /* We're using a locale-independent representation of upper/lower case
-     based on Unicode data.  Thus, the locale doesn't matter. */
-  return iswblank (c);
+#ifdef _MB_CAPABLE
+  c = _jp2uc_l (c, locale);
+  enum category cat = category (c);
+  // exclude "<noBreak>"?
+  return cat == CAT_Zs
+      || c == '\t';
+#else
+  return c < 0x100 ? isblank (c) : 0;
+#endif /* _MB_CAPABLE */
 }
diff --git a/newlib/libc/ctype/iswcntrl.c b/newlib/libc/ctype/iswcntrl.c
index 249a0a8..d4b0147 100644
--- a/newlib/libc/ctype/iswcntrl.c
+++ b/newlib/libc/ctype/iswcntrl.c
@@ -1,5 +1,6 @@
 /* Copyright (c) 2002 Red Hat Incorporated.
    All rights reserved.
+   Modified (m) 2017 Thomas Wolff to refer to generated Unicode data tables.
 
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are met:
@@ -62,24 +63,10 @@ PORTABILITY
 No supporting OS subroutines are required.
 */
 #include <_ansi.h>
-#include <newlib.h>
 #include <wctype.h>
-#include <ctype.h>
-#include <string.h>
-#include "local.h"
 
 int
 iswcntrl (wint_t c)
 {
-#ifdef _MB_CAPABLE
-  c = _jp2uc (c);
-
-  /* Based on Unicode 5.2.  All characters from general category "Cc", "Zl",
-     and "Zp".  */
-  return ((c >= 0x0000 && c <= 0x001f) || 
-	  (c >= 0x007f && c <= 0x009f) ||
-	  c == 0x2028 || c == 0x2029);
-#else
-  return (c < 0x100 ? iscntrl (c) : 0);
-#endif /* _MB_CAPABLE */
+  return iswcntrl_l (c, 0);
 }
diff --git a/newlib/libc/ctype/iswcntrl_l.c b/newlib/libc/ctype/iswcntrl_l.c
index 37caba8..6a900a7 100644
--- a/newlib/libc/ctype/iswcntrl_l.c
+++ b/newlib/libc/ctype/iswcntrl_l.c
@@ -1,10 +1,20 @@
+/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
 #include <_ansi.h>
+#include <ctype.h>
 #include <wctype.h>
+#include "local.h"
+#include "categories.h"
 
 int
 iswcntrl_l (wint_t c, struct __locale_t *locale)
 {
-  /* We're using a locale-independent representation of upper/lower case
-     based on Unicode data.  Thus, the locale doesn't matter. */
-  return iswcntrl (c);
+#ifdef _MB_CAPABLE
+  c = _jp2uc_l (c, locale);
+  enum category cat = category (c);
+  return cat == CAT_Cc
+      || cat == CAT_Zl || cat == CAT_Zp // Line/Paragraph Separator
+      ;
+#else
+  return c < 0x100 ? iscntrl (c) : 0;
+#endif /* _MB_CAPABLE */
 }
diff --git a/newlib/libc/ctype/iswctype_l.c b/newlib/libc/ctype/iswctype_l.c
index d9e7b2e..506972d 100644
--- a/newlib/libc/ctype/iswctype_l.c
+++ b/newlib/libc/ctype/iswctype_l.c
@@ -1,10 +1,41 @@
+/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
 #include <_ansi.h>
 #include <wctype.h>
+#include "local.h"
 
 int
 iswctype_l (wint_t c, wctype_t desc, struct __locale_t *locale)
 {
-  /* We're using a locale-independent representation of upper/lower case
-     based on Unicode data.  Thus, the locale doesn't matter. */
-  return iswctype (c, desc);
+  switch (desc)
+    {
+    case WC_ALNUM:
+      return iswalnum_l (c, locale);
+    case WC_ALPHA:
+      return iswalpha_l (c, locale);
+    case WC_BLANK:
+      return iswblank_l (c, locale);
+    case WC_CNTRL:
+      return iswcntrl_l (c, locale);
+    case WC_DIGIT:
+      return iswdigit_l (c, locale);
+    case WC_GRAPH:
+      return iswgraph_l (c, locale);
+    case WC_LOWER:
+      return iswlower_l (c, locale);
+    case WC_PRINT:
+      return iswprint_l (c, locale);
+    case WC_PUNCT:
+      return iswpunct_l (c, locale);
+    case WC_SPACE:
+      return iswspace_l (c, locale);
+    case WC_UPPER:
+      return iswupper_l (c, locale);
+    case WC_XDIGIT:
+      return iswxdigit_l (c, locale);
+    default:
+      return 0; /* eliminate warning */
+    }
+
+  /* otherwise unknown */
+  return 0;
 }
diff --git a/newlib/libc/ctype/iswdigit.c b/newlib/libc/ctype/iswdigit.c
index 2b26141..d3562f8 100644
--- a/newlib/libc/ctype/iswdigit.c
+++ b/newlib/libc/ctype/iswdigit.c
@@ -38,5 +38,6 @@ No supporting OS subroutines are required.
 int
 iswdigit (wint_t c)
 {
-  return (c >= (wint_t)'0' && c <= (wint_t)'9');
+  return c >= (wint_t)'0' && c <= (wint_t)'9';
+  // category (c) == CAT_Nd not to be included as of C-99
 }
diff --git a/newlib/libc/ctype/iswdigit_l.c b/newlib/libc/ctype/iswdigit_l.c
index 98dd94e..29de9d3 100644
--- a/newlib/libc/ctype/iswdigit_l.c
+++ b/newlib/libc/ctype/iswdigit_l.c
@@ -4,5 +4,5 @@
 int
 iswdigit_l (wint_t c, struct __locale_t *locale)
 {
-  return (c >= (wint_t)'0' && c <= (wint_t)'9');
+  return c >= (wint_t)'0' && c <= (wint_t)'9';
 }
diff --git a/newlib/libc/ctype/iswgraph.c b/newlib/libc/ctype/iswgraph.c
index e0df4aa..bb21c21 100644
--- a/newlib/libc/ctype/iswgraph.c
+++ b/newlib/libc/ctype/iswgraph.c
@@ -1,5 +1,6 @@
 /* Copyright (c) 2002 Red Hat Incorporated.
    All rights reserved.
+   Modified (m) 2017 Thomas Wolff to refer to generated Unicode data tables.
 
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are met:
@@ -67,5 +68,5 @@ No supporting OS subroutines are required.
 int
 iswgraph (wint_t c)
 {
-  return (iswprint (c) && !iswspace (c));
+  return iswgraph_l (c, 0);
 }
diff --git a/newlib/libc/ctype/iswgraph_l.c b/newlib/libc/ctype/iswgraph_l.c
index 9803c18..b8a5866 100644
--- a/newlib/libc/ctype/iswgraph_l.c
+++ b/newlib/libc/ctype/iswgraph_l.c
@@ -1,10 +1,23 @@
+/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
 #include <_ansi.h>
 #include <wctype.h>
+#include "local.h"
+#include "categories.h"
 
 int
 iswgraph_l (wint_t c, struct __locale_t *locale)
 {
-  /* We're using a locale-independent representation of upper/lower case
-     based on Unicode data.  Thus, the locale doesn't matter. */
-  return iswprint (c) && !iswspace (c);
+#ifdef _MB_CAPABLE
+  //return iswprint (c, locale) && !iswspace (c, locale);
+  c = _jp2uc_l (c, locale);
+  enum category cat = category (c);
+  return cat != -1
+      && cat != CAT_Cc && cat != CAT_Cf
+      && cat != CAT_Cs // Surrogate
+      && cat != CAT_Zs
+      && cat != CAT_Zl && cat != CAT_Zp // Line/Paragraph Separator
+      ;
+#else
+  return iswprint_l (c, locale) && !iswspace_l (c, locale);
+#endif /* _MB_CAPABLE */
 }
diff --git a/newlib/libc/ctype/iswlower.c b/newlib/libc/ctype/iswlower.c
index 8b38835..e1d926b 100644
--- a/newlib/libc/ctype/iswlower.c
+++ b/newlib/libc/ctype/iswlower.c
@@ -17,7 +17,7 @@ SYNOPSIS
 
 DESCRIPTION
 <<iswlower>> is a function which classifies wide-character values that
-have uppercase translations.
+are categorized as lowercase.
 
 <<iswlower_l>> is like <<iswlower>> but performs the check based on the
 locale specified by the locale object locale.  If <[locale]> is
@@ -38,5 +38,5 @@ No supporting OS subroutines are required.
 int
 iswlower (wint_t c)
 {
-	return (towupper (c) != c);
+  return iswlower_l (c, 0);
 }
diff --git a/newlib/libc/ctype/iswlower_l.c b/newlib/libc/ctype/iswlower_l.c
index d69615b..6446133 100644
--- a/newlib/libc/ctype/iswlower_l.c
+++ b/newlib/libc/ctype/iswlower_l.c
@@ -1,10 +1,20 @@
+/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
 #include <_ansi.h>
+#include <ctype.h>
 #include <wctype.h>
+#include "local.h"
+#include "categories.h"
 
 int
 iswlower_l (wint_t c, struct __locale_t *locale)
 {
-  /* We're using a locale-independent representation of upper/lower case
-     based on Unicode data.  Thus, the locale doesn't matter. */
-  return (towupper (c) != c);
+#ifdef _MB_CAPABLE
+  c = _jp2uc_l (c, locale);
+  // The wide-character class "lower" contains at least those characters wc
+  // which are equal to towlower(wc) and different from towupper(wc).
+  enum category cat = category (c);
+  return cat == CAT_Ll || (cat == CAT_LC && towlower (c) == c);
+#else
+  return c < 0x100 ? islower (c) : 0;
+#endif /* _MB_CAPABLE */
 }
diff --git a/newlib/libc/ctype/iswprint.c b/newlib/libc/ctype/iswprint.c
index c6050b5..5e468fe 100644
--- a/newlib/libc/ctype/iswprint.c
+++ b/newlib/libc/ctype/iswprint.c
@@ -1,5 +1,6 @@
 /* Copyright (c) 2002 Red Hat Incorporated.
    All rights reserved.
+   Modified (m) 2017 Thomas Wolff to refer to generated Unicode data tables.
 
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are met:
@@ -62,440 +63,10 @@ PORTABILITY
 No supporting OS subroutines are required.
 */
 #include <_ansi.h>
-#include <newlib.h>
 #include <wctype.h>
-#include <string.h>
-#include <ctype.h>
-#include "local.h"
-
-#ifdef _MB_CAPABLE
-#include "utf8print.h"
-#endif /* _MB_CAPABLE */
 
 int
 iswprint (wint_t c)
 {
-#ifdef _MB_CAPABLE
-  unsigned const char *table;
-  unsigned char *ptr;
-  unsigned char ctmp;
-  int size;
-  wint_t x;
-  
-  c = _jp2uc (c);
-
-  /* Based on and tested against Unicode 5.2
-     See utf8print.h for a description how to fetch the data. */
-  x = (c >> 8);
-  /* for some large sections, all characters are printuation so handle them here */
-  if ((x >= 0x33 && x <= 0x4c) ||
-      (x >= 0x4e && x <= 0x9e) ||
-      (x >= 0xa0 && x <= 0xa3) ||
-      (x >= 0xac && x <= 0xd6) ||
-      (x >= 0xe0 && x <= 0xf9) ||
-      (x >= 0x120 && x <= 0x122) ||
-      (x >= 0x130 && x <= 0x133) ||
-      (x >= 0x200 && x <= 0x2a5) ||
-      (x >= 0x2a7 && x <= 0x2b6) ||
-      (x >= 0xf00 && x <= 0xffe) ||
-      (x >= 0x1000 && x <= 0x10fe))
-    return 1;
-  
-  switch (x)
-    {
-    case 0x01:
-    case 0x02:
-    case 0x04:
-    case 0x11:
-    case 0x14:
-    case 0x15:
-    case 0x1e:
-    case 0x22:
-    case 0x25:
-    case 0x28:
-    case 0x29:
-    case 0x2a:
-    case 0xa5:
-    case 0xfc:
-    case 0x2f8:
-    case 0x2f9:
-      return 1;
-    case 0x00:
-      table = u0;
-      size = sizeof(u0);
-      break;
-    case 0x03:
-      table = u3;
-      size = sizeof(u3);
-      break;
-    case 0x05:
-      table = u5;
-      size = sizeof(u5);
-      break;
-    case 0x06:
-      table = u6;
-      size = sizeof(u6);
-      break;
-    case 0x07:
-      table = u7;
-      size = sizeof(u7);
-      break;
-    case 0x08:
-      table = u8;
-      size = sizeof(u8);
-      break;
-    case 0x09:
-      table = u9;
-      size = sizeof(u9);
-      break;
-    case 0x0a:
-      table = ua;
-      size = sizeof(ua);
-      break;
-    case 0x0b:
-      table = ub;
-      size = sizeof(ub);
-      break;
-    case 0x0c:
-      table = uc;
-      size = sizeof(uc);
-      break;
-    case 0x0d:
-      table = ud;
-      size = sizeof(ud);
-      break;
-    case 0x0e:
-      table = ue;
-      size = sizeof(ue);
-      break;
-    case 0x0f:
-      table = uf;
-      size = sizeof(uf);
-      break;
-    case 0x10:
-      table = u10;
-      size = sizeof(u10);
-      break;
-    case 0x12:
-      table = u12;
-      size = sizeof(u12);
-      break;
-    case 0x13:
-      table = u13;
-      size = sizeof(u13);
-      break;
-    case 0x16:
-      table = u16;
-      size = sizeof(u16);
-      break;
-    case 0x17:
-      table = u17;
-      size = sizeof(u17);
-      break;
-    case 0x18:
-      table = u18;
-      size = sizeof(u18);
-      break;
-    case 0x19:
-      table = u19;
-      size = sizeof(u19);
-      break;
-    case 0x1a:
-      table = u1a;
-      size = sizeof(u1a);
-      break;
-    case 0x1b:
-      table = u1b;
-      size = sizeof(u1b);
-      break;
-    case 0x1c:
-      table = u1c;
-      size = sizeof(u1c);
-      break;
-    case 0x1d:
-      table = u1d;
-      size = sizeof(u1d);
-      break;
-    case 0x1f:
-      table = u1f;
-      size = sizeof(u1f);
-      break;
-    case 0x20:
-      table = u20;
-      size = sizeof(u20);
-      break;
-    case 0x21:
-      table = u21;
-      size = sizeof(u21);
-      break;
-    case 0x23:
-      table = u23;
-      size = sizeof(u23);
-      break;
-    case 0x24:
-      table = u24;
-      size = sizeof(u24);
-      break;
-    case 0x26:
-      table = u26;
-      size = sizeof(u26);
-      break;
-    case 0x27:
-      table = u27;
-      size = sizeof(u27);
-      break;
-    case 0x2b:
-      table = u2b;
-      size = sizeof(u2b);
-      break;
-    case 0x2c:
-      table = u2c;
-      size = sizeof(u2c);
-      break;
-    case 0x2d:
-      table = u2d;
-      size = sizeof(u2d);
-      break;
-    case 0x2e:
-      table = u2e;
-      size = sizeof(u2e);
-      break;
-    case 0x2f:
-      table = u2f;
-      size = sizeof(u2f);
-      break;
-    case 0x30:
-      table = u30;
-      size = sizeof(u30);
-      break;
-    case 0x31:
-      table = u31;
-      size = sizeof(u31);
-      break;
-    case 0x32:
-      table = u32;
-      size = sizeof(u32);
-      break;
-    case 0x4d:
-      table = u4d;
-      size = sizeof(u4d);
-      break;
-    case 0x9f:
-      table = u9f;
-      size = sizeof(u9f);
-      break;
-    case 0xa4:
-      table = ua4;
-      size = sizeof(ua4);
-      break;
-    case 0xa6:
-      table = ua6;
-      size = sizeof(ua6);
-      break;
-    case 0xa7:
-      table = ua7;
-      size = sizeof(ua7);
-      break;
-    case 0xa8:
-      table = ua8;
-      size = sizeof(ua8);
-      break;
-    case 0xa9:
-      table = ua9;
-      size = sizeof(ua9);
-      break;
-    case 0xaa:
-      table = uaa;
-      size = sizeof(uaa);
-      break;
-    case 0xab:
-      table = uab;
-      size = sizeof(uab);
-      break;
-    case 0xd7:
-      table = ud7;
-      size = sizeof(ud7);
-      break;
-    case 0xfa:
-      table = ufa;
-      size = sizeof(ufa);
-      break;
-    case 0xfb:
-      table = ufb;
-      size = sizeof(ufb);
-      break;
-    case 0xfd:
-      table = ufd;
-      size = sizeof(ufd);
-      break;
-    case 0xfe:
-      table = ufe;
-      size = sizeof(ufe);
-      break;
-    case 0xff:
-      table = uff;
-      size = sizeof(uff);
-      break;
-    case 0x100:
-      table = u100;
-      size = sizeof(u100);
-      break;
-    case 0x101:
-      table = u101;
-      size = sizeof(u101);
-      break;
-    case 0x102:
-      table = u102;
-      size = sizeof(u102);
-      break;
-    case 0x103:
-      table = u103;
-      size = sizeof(u103);
-      break;
-    case 0x104:
-      table = u104;
-      size = sizeof(u104);
-      break;
-    case 0x108:
-      table = u108;
-      size = sizeof(u108);
-      break;
-    case 0x109:
-      table = u109;
-      size = sizeof(u109);
-      break;
-    case 0x10a:
-      table = u10a;
-      size = sizeof(u10a);
-      break;
-    case 0x10b:
-      table = u10b;
-      size = sizeof(u10b);
-      break;
-    case 0x10c:
-      table = u10c;
-      size = sizeof(u10c);
-      break;
-    case 0x10e:
-      table = u10e;
-      size = sizeof(u10e);
-      break;
-    case 0x110:
-      table = u110;
-      size = sizeof(u110);
-      break;
-    case 0x123:
-      table = u123;
-      size = sizeof(u123);
-      break;
-    case 0x124:
-      table = u124;
-      size = sizeof(u124);
-      break;
-    case 0x134:
-      table = u134;
-      size = sizeof(u134);
-      break;
-    case 0x1d0:
-      table = u1d0;
-      size = sizeof(u1d0);
-      break;
-    case 0x1d1:
-      table = u1d1;
-      size = sizeof(u1d1);
-      break;
-    case 0x1d2:
-      table = u1d2;
-      size = sizeof(u1d2);
-      break;
-    case 0x1d3:
-      table = u1d3;
-      size = sizeof(u1d3);
-      break;
-    case 0x1d4:
-      table = u1d4;
-      size = sizeof(u1d4);
-      break;
-    case 0x1d5:
-      table = u1d5;
-      size = sizeof(u1d5);
-      break;
-    case 0x1d6:
-      table = u1d6;
-      size = sizeof(u1d6);
-      break;
-    case 0x1d7:
-      table = u1d7;
-      size = sizeof(u1d7);
-      break;
-    case 0x1f0:
-      table = u1f0;
-      size = sizeof(u1f0);
-      break;
-    case 0x1f1:
-      table = u1f1;
-      size = sizeof(u1f1);
-      break;
-    case 0x1f2:
-      table = u1f2;
-      size = sizeof(u1f2);
-      break;
-    case 0x2a6:
-      table = u2a6;
-      size = sizeof(u2a6);
-      break;
-    case 0x2b7:
-      table = u2b7;
-      size = sizeof(u2b7);
-      break;
-    case 0x2fa:
-      table = u2fa;
-      size = sizeof(u2fa);
-      break;
-    case 0xe00:
-      table = ue00;
-      size = sizeof(ue00);
-      break;
-    case 0xe01:
-      table = ue01;
-      size = sizeof(ue01);
-      break;
-    case 0xfff:
-      table = ufff;
-      size = sizeof(ufff);
-      break;
-    case 0x10ff:
-      table = u10ff;
-      size = sizeof(u10ff);
-      break;
-    default:
-      return 0;
-    }
-  /* we have narrowed down to a section of 256 characters to check */
-  /* now check if c matches the printuation wide-chars within that section */
-  ptr = (unsigned char *)table;
-  ctmp = (unsigned char)c;
-  while (ptr < table + size)
-    {
-      if (ctmp == *ptr)
-	return 1;
-      if (ctmp < *ptr)
-	return 0;
-      /* otherwise c > *ptr */
-      /* look for 0x0 as next element which indicates a range */
-      ++ptr;
-      if (*ptr == 0x0)
-	{
-	  /* we have a range..see if c falls within range */
-	  ++ptr;
-	  if (ctmp <= *ptr)
-	    return 1;
-	  ++ptr;
-	}
-    }
-  /* not in table */
-  return 0;
-#else
-  return (c < (wint_t)0x100 ? isprint (c) : 0);
-#endif /* _MB_CAPABLE */
+  return iswprint_l (c, 0);
 }
diff --git a/newlib/libc/ctype/iswprint_l.c b/newlib/libc/ctype/iswprint_l.c
index a8d8686..cdf027b 100644
--- a/newlib/libc/ctype/iswprint_l.c
+++ b/newlib/libc/ctype/iswprint_l.c
@@ -1,10 +1,21 @@
+/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
 #include <_ansi.h>
+#include <ctype.h>
 #include <wctype.h>
+#include "local.h"
+#include "categories.h"
 
 int
 iswprint_l (wint_t c, struct __locale_t *locale)
 {
-  /* We're using a locale-independent representation of upper/lower case
-     based on Unicode data.  Thus, the locale doesn't matter. */
-  return iswprint (c);
+#ifdef _MB_CAPABLE
+  c = _jp2uc_l (c, locale);
+  enum category cat = category (c);
+  return cat != -1
+      && cat != CAT_Cc && cat != CAT_Cf
+      && cat != CAT_Cs // Surrogate
+      ;
+#else
+  return c < (wint_t)0x100 ? isprint (c) : 0;
+#endif /* _MB_CAPABLE */
 }
diff --git a/newlib/libc/ctype/iswpunct.c b/newlib/libc/ctype/iswpunct.c
index 8ab7038..27a6d65 100644
--- a/newlib/libc/ctype/iswpunct.c
+++ b/newlib/libc/ctype/iswpunct.c
@@ -1,5 +1,6 @@
 /* Copyright (c) 2002 Red Hat Incorporated.
    All rights reserved.
+   Modified (m) 2017 Thomas Wolff to refer to generated Unicode data tables.
 
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are met:
@@ -62,14 +63,10 @@ PORTABILITY
 No supporting OS subroutines are required.
 */
 #include <_ansi.h>
-#include <newlib.h>
 #include <wctype.h>
-#include <string.h>
-#include <ctype.h>
-#include "local.h"
 
 int
 iswpunct (wint_t c)
 {
-  return (!iswalnum (c) && iswgraph (c));
+  return iswpunct_l (c, 0);
 }
diff --git a/newlib/libc/ctype/iswpunct_l.c b/newlib/libc/ctype/iswpunct_l.c
index c7acc4e..4adc1ed 100644
--- a/newlib/libc/ctype/iswpunct_l.c
+++ b/newlib/libc/ctype/iswpunct_l.c
@@ -1,10 +1,26 @@
+/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
 #include <_ansi.h>
+#include <ctype.h>
 #include <wctype.h>
+#include "local.h"
+#include "categories.h"
 
 int
 iswpunct_l (wint_t c, struct __locale_t *locale)
 {
-  /* We're using a locale-independent representation of upper/lower case
-     based on Unicode data.  Thus, the locale doesn't matter. */
-  return !iswalnum (c) && iswgraph (c);
+#ifdef _MB_CAPABLE
+  //return !iswalnum (c) && iswgraph (c);
+  c = _jp2uc_l (c, locale);
+  enum category cat = category (c);
+  return cat == CAT_Pc || cat == CAT_Pd || cat == CAT_Pe || cat == CAT_Pf || cat == CAT_Pi || cat == CAT_Po || cat == CAT_Ps
+      || cat == CAT_Sm // Math Symbols
+      // the following are included for backwards consistency:
+      || cat == CAT_Sc // Currency Symbols
+      || cat == CAT_Sk // Modifier_Symbol
+      || cat == CAT_So // Other_Symbol
+      || cat == CAT_No // Other_Number
+      ;
+#else
+  return c < (wint_t)0x100 ? ispunct (c) : 0;
+#endif /* _MB_CAPABLE */
 }
diff --git a/newlib/libc/ctype/iswspace.c b/newlib/libc/ctype/iswspace.c
index ae3841a..ca6a887 100644
--- a/newlib/libc/ctype/iswspace.c
+++ b/newlib/libc/ctype/iswspace.c
@@ -1,5 +1,6 @@
 /* Copyright (c) 2002 Red Hat Incorporated.
    All rights reserved.
+   Modified (m) 2017 Thomas Wolff to refer to generated Unicode data tables.
 
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are met:
@@ -62,27 +63,10 @@ PORTABILITY
 No supporting OS subroutines are required.
 */
 #include <_ansi.h>
-#include <newlib.h>
 #include <wctype.h>
-#include <ctype.h>
-#include <string.h>
-#include "local.h"
 
 int
 iswspace (wint_t c)
 {
-#ifdef _MB_CAPABLE
-  c = _jp2uc (c);
-  /* Based on Unicode 5.2.  Control chars 09-0D, plus all characters
-     from general category "Zs", which are not marked as decomposition
-     type "noBreak". */
-  return ((c >= 0x0009 && c <= 0x000d) || c == 0x0020 ||
-	  c == 0x1680 || c == 0x180e ||
-	  (c >= 0x2000 && c <= 0x2006) ||
-	  (c >= 0x2008 && c <= 0x200a) ||
-	  c == 0x2028 || c == 0x2029 ||
-	  c == 0x205f || c == 0x3000);
-#else
-  return (c < 0x100 ? isspace (c) : 0);
-#endif /* _MB_CAPABLE */
+  return iswspace_l (c, 0);
 }
diff --git a/newlib/libc/ctype/iswspace_l.c b/newlib/libc/ctype/iswspace_l.c
index 0c30242..e7f47ee 100644
--- a/newlib/libc/ctype/iswspace_l.c
+++ b/newlib/libc/ctype/iswspace_l.c
@@ -1,10 +1,21 @@
+/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
 #include <_ansi.h>
+#include <ctype.h>
 #include <wctype.h>
+#include "local.h"
+#include "categories.h"
 
 int
 iswspace_l (wint_t c, struct __locale_t *locale)
 {
-  /* We're using a locale-independent representation of upper/lower case
-     based on Unicode data.  Thus, the locale doesn't matter. */
-  return iswspace (c);
+#ifdef _MB_CAPABLE
+  c = _jp2uc_l (c, locale);
+  enum category cat = category (c);
+  // exclude "<noBreak>"?
+  return cat == CAT_Zs
+      || cat == CAT_Zl || cat == CAT_Zp // Line/Paragraph Separator
+      || (c >= 0x9 && c <= 0xD);
+#else
+  return c < 0x100 ? isspace (c) : 0;
+#endif /* _MB_CAPABLE */
 }
diff --git a/newlib/libc/ctype/iswupper.c b/newlib/libc/ctype/iswupper.c
index c4969a3..96b5a0c 100644
--- a/newlib/libc/ctype/iswupper.c
+++ b/newlib/libc/ctype/iswupper.c
@@ -17,14 +17,14 @@ SYNOPSIS
 
 DESCRIPTION
 <<iswupper>> is a function which classifies wide-character values that
-have uppercase translations.
+are categorized as uppercase.
 
 <<iswupper_l>> is like <<iswupper>> but performs the check based on the
 locale specified by the locale object locale.  If <[locale]> is
 LC_GLOBAL_LOCALE or not a valid locale object, the behaviour is undefined.
 
 RETURNS
-<<iswupper>>, <<iswupper_l>> return non-zero if <[c]> is a uppercase wide character.
+<<iswupper>>, <<iswupper_l>> return non-zero if <[c]> is an uppercase wide character.
 
 PORTABILITY
 <<iswupper>> is C99.
@@ -38,5 +38,5 @@ No supporting OS subroutines are required.
 int
 iswupper (wint_t c)
 {
-  return (towlower (c) != c);
+  return iswupper_l (c, 0);
 }
diff --git a/newlib/libc/ctype/iswupper_l.c b/newlib/libc/ctype/iswupper_l.c
index 2555cd0..091b6e6 100644
--- a/newlib/libc/ctype/iswupper_l.c
+++ b/newlib/libc/ctype/iswupper_l.c
@@ -1,10 +1,20 @@
+/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
 #include <_ansi.h>
+#include <ctype.h>
 #include <wctype.h>
+#include "local.h"
+#include "categories.h"
 
 int
 iswupper_l (wint_t c, struct __locale_t *locale)
 {
-  /* We're using a locale-independent representation of upper/lower case
-     based on Unicode data.  Thus, the locale doesn't matter. */
-  return towlower (c) != c;
+#ifdef _MB_CAPABLE
+  c = _jp2uc_l (c, locale);
+  // The wide-character class "upper" contains at least those characters wc
+  // which are equal to towupper(wc) and different from towlower(wc).
+  enum category cat = category (c);
+  return cat == CAT_Lu || (cat == CAT_LC && towupper (c) == c);
+#else
+  return c < 0x100 ? islower (c) : 0;
+#endif /* _MB_CAPABLE */
 }
diff --git a/newlib/libc/ctype/iswxdigit.c b/newlib/libc/ctype/iswxdigit.c
index 4367186..fce2a4d 100644
--- a/newlib/libc/ctype/iswxdigit.c
+++ b/newlib/libc/ctype/iswxdigit.c
@@ -38,7 +38,7 @@ No supporting OS subroutines are required.
 int
 iswxdigit (wint_t c)
 {
-  return ((c >= (wint_t)'0' && c <= (wint_t)'9') ||
-	  (c >= (wint_t)'a' && c <= (wint_t)'f') ||
-	  (c >= (wint_t)'A' && c <= (wint_t)'F'));
+  return (c >= (wint_t)'0' && c <= (wint_t)'9')
+      || (c >= (wint_t)'a' && c <= (wint_t)'f')
+      || (c >= (wint_t)'A' && c <= (wint_t)'F');
 }
diff --git a/newlib/libc/ctype/jp2uc.c b/newlib/libc/ctype/jp2uc.c
index 29eec0f..b89b5ea 100644
--- a/newlib/libc/ctype/jp2uc.c
+++ b/newlib/libc/ctype/jp2uc.c
@@ -1,7 +1,8 @@
-/* Routine to translate from Japanese characters to Unicode */
+/* Routine to translate between Japanese characters and Unicode */
 
 /* Copyright (c) 2002 Red Hat Incorporated.
    All rights reserved.
+   Modified (m) 2017 Thomas Wolff: consider locale, add dummy uc2jp
 
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are met:
@@ -25,7 +26,7 @@
    (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
    ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS   
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
@@ -40,13 +41,15 @@
 #include <string.h>
 #include <wctype.h>
 #include "local.h"
-#include "jp2uc.h"
 
 /* Japanese encoding types supported */
 #define JP_JIS		1
 #define JP_SJIS		2
 #define JP_EUCJP	3
 
+/* Japanese to Unicode conversion routine */
+#include "jp2uc.h"
+
 static wint_t
 __jp2uc (wint_t c, int type)
 {
@@ -104,7 +107,7 @@ __jp2uc (wint_t c, int type)
       return d02f4[index];
     }
 
-  /* handle smaller ranges here */    
+  /* handle smaller ranges here */
   switch (byte1)
     {
     case 0xA1:
@@ -148,20 +151,50 @@ __jp2uc (wint_t c, int type)
       return WEOF;
     }
 
-  return WEOF; 
+  return WEOF;
+}
+
+/* Unicode to Japanese conversion routine */
+static wint_t
+__uc2jp (wint_t c, int type)
+{
+#warning back-conversion Unicode to Japanese not implemented; needed for towupper/towlower
+  return c;
 }
 
+/* Japanese to Unicode conversion interface */
 wint_t
-_jp2uc (wint_t c)
+_jp2uc_l (wint_t c, struct __locale_t * l)
 {
-  if (!strcmp (__current_locale_charset (), "JIS"))
+  char * cs = l ? __locale_charset(l) : __current_locale_charset();
+  if (0 == strcmp (cs, "JIS"))
     c = __jp2uc (c, JP_JIS);
-  else if (!strcmp (__current_locale_charset (), "SJIS"))
+  else if (0 == strcmp (cs, "SJIS"))
     c = __jp2uc (c, JP_SJIS);
-  else if (!strcmp (__current_locale_charset (), "EUCJP"))
+  else if (0 == strcmp (cs, "EUCJP"))
     c = __jp2uc (c, JP_EUCJP);
   return c;
 }
 
+wint_t
+_jp2uc (wint_t c)
+{
+  return _jp2uc_l (c, 0);
+}
+
+/* Unicode to Japanese conversion interface */
+wint_t
+_uc2jp_l (wint_t c, struct __locale_t * l)
+{
+  char * cs = l ? __locale_charset(l) : __current_locale_charset();
+  if (0 == strcmp (cs, "JIS"))
+    c = __uc2jp (c, JP_JIS);
+  else if (0 == strcmp (cs, "SJIS"))
+    c = __uc2jp (c, JP_SJIS);
+  else if (0 == strcmp (cs, "EUCJP"))
+    c = __uc2jp (c, JP_EUCJP);
+  return c;
+}
+
 #endif /* !__CYGWIN__ */
 #endif /* _MB_CAPABLE */
diff --git a/newlib/libc/ctype/local.h b/newlib/libc/ctype/local.h
index 62d2b15..5c293c8 100644
--- a/newlib/libc/ctype/local.h
+++ b/newlib/libc/ctype/local.h
@@ -1,3 +1,5 @@
+/* Modified (m) 2017 Thomas Wolff: fixed locale/wchar handling */
+
 /* wctrans constants */
 
 #include <_ansi.h>
@@ -21,11 +23,22 @@
 #define WC_UPPER	11
 #define WC_XDIGIT	12
 
-/* internal function to translate JP to Unicode */
+/* internal functions to translate between JP and Unicode */
+/* note this is not applicable to Cygwin, where wchar_t is always Unicode,
+   and should not be applicable to most other platforms either;
+   * platforms for which wchar_t is not Unicode should be explicitly listed
+   * the transformation should be applied to all non-Unicode locales
+     (also Chinese, Korean, and even 8-bit locales such as *.CP1252)
+   * for towupper and towlower, the result must be back-transformed
+     into the respective locale encoding; currently NOT IMPLEMENTED
+*/
 #ifdef __CYGWIN__
-/* Under Cygwin, the incoming wide character is already given in UTF due
-   to the requirements of the underlying OS. */
+/* Under Cygwin, wchar_t (or its extension wint_t) is Unicode */
 #define _jp2uc(c) (c)
+#define _jp2uc_l(c, l) (c)
+#define _uc2jp_l(c, l) (c)
 #else
 wint_t _jp2uc (wint_t);
+wint_t _jp2uc_l (wint_t, struct __locale_t *);
+wint_t _uc2jp_l (wint_t, struct __locale_t *);
 #endif
diff --git a/newlib/libc/ctype/towctrans.c b/newlib/libc/ctype/towctrans.c
index edbdfce..176aa3d 100644
--- a/newlib/libc/ctype/towctrans.c
+++ b/newlib/libc/ctype/towctrans.c
@@ -1,5 +1,6 @@
 /* Copyright (c) 2002 Red Hat Incorporated.
    All rights reserved.
+   Modified (m) 2017 Thomas Wolff to refer to generated Unicode data tables.
 
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are met:
@@ -69,10 +70,9 @@ No supporting OS subroutines are required.
 */
 
 #include <_ansi.h>
-#include <string.h>
 #include <reent.h>
 #include <wctype.h>
-#include <errno.h>
+//#include <errno.h>
 #include "local.h"
 
 wint_t
@@ -80,13 +80,13 @@ _towctrans_r (struct _reent *r,
 	wint_t c,
 	wctrans_t w)
 {
-  if (w == WCT_TOLOWER)
-    return towlower (c);
-  else if (w == WCT_TOUPPER)
-    return towupper (c);
+  if (w == WCT_TOLOWER || w == WCT_TOUPPER)
+    return towctrans_l (c, w, 0);
   else
     {
-      r->_errno = EINVAL;
+      // skipping this because it was causing trouble (cygwin crash)
+      // and there is no errno specified for towctrans
+      //r->_errno = EINVAL;
       return c;
     }
 }
@@ -94,7 +94,7 @@ _towctrans_r (struct _reent *r,
 #ifndef _REENT_ONLY
 wint_t
 towctrans (wint_t c,
-        wctrans_t w)
+	wctrans_t w)
 {
   return _towctrans_r (_REENT, c, w);
 }
diff --git a/newlib/libc/ctype/towctrans_l.c b/newlib/libc/ctype/towctrans_l.c
index d7369e1..8da372f 100644
--- a/newlib/libc/ctype/towctrans_l.c
+++ b/newlib/libc/ctype/towctrans_l.c
@@ -1,10 +1,101 @@
+/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
 #include <_ansi.h>
 #include <wctype.h>
+//#include <errno.h>
+#include "local.h"
+
+enum {EVENCAP, ODDCAP};
+enum {TO1, TOLO, TOUP, TOBOTH};
+static struct caseconv_entry {
+  unsigned int first: 21;
+  unsigned short diff: 8;
+  unsigned char mode: 2;
+  int delta: 17;
+} __attribute__ ((packed))
+caseconv_table [] = {
+#include "caseconv.t"
+};
+#define first(ce)	ce.first
+#define last(ce)	(ce.first + ce.diff)
+
+/* auxiliary function for binary search in interval properties table */
+static const struct caseconv_entry *
+bisearch(wint_t ucs, const struct caseconv_entry *table, int max)
+{
+  int min = 0;
+  int mid;
+
+  if (ucs < first(table[0]) || ucs > last(table[max]))
+    return 0;
+  while (max >= min)
+    {
+      mid = (min + max) / 2;
+      if (ucs > last(table[mid]))
+	min = mid + 1;
+      else if (ucs < first(table[mid]))
+	max = mid - 1;
+      else
+	return &table[mid];
+    }
+  return 0;
+}
+
+static wint_t
+toulower (wint_t c)
+{
+  const struct caseconv_entry * cce =
+    bisearch(c, caseconv_table,
+             sizeof(caseconv_table) / sizeof(*caseconv_table) - 1);
+  if (cce)
+    switch (cce->mode) {
+      case TOLO: return c + cce->delta;
+      case TOBOTH: return c + 1;
+      case TO1: switch (cce->delta) {
+        case EVENCAP: if (!(c & 1)) return c + 1; break;
+        case ODDCAP: if (c & 1) return c + 1; break;
+      }
+    }
+  else
+    return c;
+}
+
+static wint_t
+touupper (wint_t c)
+{
+  const struct caseconv_entry * cce =
+    bisearch(c, caseconv_table,
+             sizeof(caseconv_table) / sizeof(*caseconv_table) - 1);
+  if (cce)
+    switch (cce->mode) {
+      case TOUP: return c + cce->delta;
+      case TOBOTH: return c - 1;
+      case TO1: switch (cce->delta) {
+        case EVENCAP: if (c & 1) return c - 1; break;
+        case ODDCAP: if (!(c & 1)) return c - 1; break;
+      }
+    }
+  else
+    return c;
+}
 
 wint_t
 towctrans_l (wint_t c, wctrans_t w, struct __locale_t *locale)
 {
-  /* We're using a locale-independent representation of upper/lower case
-     based on Unicode data.  Thus, the locale doesn't matter. */
-  return towctrans (c, w);
+  wint_t u = _jp2uc_l (c, locale);
+  wint_t res;
+  if (w == WCT_TOLOWER)
+    res = toulower (u);
+  else if (w == WCT_TOUPPER)
+    res = touupper (u);
+  else
+    {
+      // skipping the errno setting that was previously involved
+      // by delegating to towctrans; it was causing trouble (cygwin crash)
+      // and there is no errno specified for towctrans
+      return c;
+    }
+  if (res != u)
+    return _uc2jp_l (res, locale);
+  else
+    return c;
 }
diff --git a/newlib/libc/ctype/towlower.c b/newlib/libc/ctype/towlower.c
index db390db..01de1bd 100644
--- a/newlib/libc/ctype/towlower.c
+++ b/newlib/libc/ctype/towlower.c
@@ -1,5 +1,6 @@
 /* Copyright (c) 2002 Red Hat Incorporated.
    All rights reserved.
+   Modified (m) 2017 Thomas Wolff to refer to generated Unicode data tables.
 
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are met:
@@ -65,9 +66,6 @@ No supporting OS subroutines are required.
 */
 
 #include <_ansi.h>
-#include <newlib.h>
-#include <string.h>
-#include <reent.h>
 #include <ctype.h>
 #include <wctype.h>
 #include "local.h"
@@ -76,500 +74,8 @@ wint_t
 towlower (wint_t c)
 {
 #ifdef _MB_CAPABLE
-  c = _jp2uc (c);
-  /* Based on and tested against Unicode 5.2 */
-
-  /* Expression used to filter out the characters for the below code:
-
-     awk -F\; '{ if ( $14 != "" ) print $1; }' UnicodeData.txt
-  */
-  if (c < 0x100)
-    {
-      if ((c >= 0x0041 && c <= 0x005a) ||
-	  (c >= 0x00c0 && c <= 0x00d6) ||
-	  (c >= 0x00d8 && c <= 0x00de))
-	return (c + 0x20);
-
-      return c;
-    }
-  else if (c < 0x300)
-    {
-      if ((c >= 0x0100 && c <= 0x012e) ||
-	  (c >= 0x0132 && c <= 0x0136) ||
-	  (c >= 0x014a && c <= 0x0176) ||
-	  (c >= 0x01de && c <= 0x01ee) ||
-	  (c >= 0x01f8 && c <= 0x021e) ||
-	  (c >= 0x0222 && c <= 0x0232))
-	{
-	  if (!(c & 0x01))
-	    return (c + 1);
-	  return c;
-	}
-
-      if (c == 0x0130)
-	return 0x0069;
-
-      if ((c >= 0x0139 && c <= 0x0147) ||
-	  (c >= 0x01cd && c <= 0x01db))
-	{
-	  if (c & 0x01)
-	    return (c + 1);
-	  return c;
-	}
-      
-      if (c >= 0x178 && c <= 0x01f7)
-	{
-	  wint_t k;
-	  switch (c)
-	    {
-	    case 0x0178:
-	      k = 0x00ff;
-	      break;
-	    case 0x0179:
-	    case 0x017b:
-	    case 0x017d:
-	    case 0x0182:
-	    case 0x0184:
-	    case 0x0187:
-	    case 0x018b:
-	    case 0x0191:
-	    case 0x0198:
-	    case 0x01a0:
-	    case 0x01a2:
-	    case 0x01a4:
-	    case 0x01a7:
-	    case 0x01ac:
-	    case 0x01af:
-	    case 0x01b3:
-	    case 0x01b5:
-	    case 0x01b8:
-	    case 0x01bc:
-	    case 0x01c5:
-	    case 0x01c8:
-	    case 0x01cb:
-	    case 0x01cd:
-	    case 0x01cf:
-	    case 0x01d1:
-	    case 0x01d3:
-	    case 0x01d5:
-	    case 0x01d7:
-	    case 0x01d9:
-	    case 0x01db:
-	    case 0x01f2:
-	    case 0x01f4:
-	      k = c + 1;
-	      break;
-	    case 0x0181:
-	      k = 0x0253;
-	      break;
-	    case 0x0186:
-	      k = 0x0254;
-	      break;
-	    case 0x0189:
-	      k = 0x0256;
-	      break;
-	    case 0x018a:
-	      k = 0x0257;
-	      break;
-	    case 0x018e:
-	      k = 0x01dd;
-	      break;
-	    case 0x018f:
-	      k = 0x0259;
-	      break;
-	    case 0x0190:
-	      k = 0x025b;
-	      break;
-	    case 0x0193:
-	      k = 0x0260;
-	      break;
-	    case 0x0194:
-	      k = 0x0263;
-	      break;
-	    case 0x0196:
-	      k = 0x0269;
-	      break;
-	    case 0x0197:
-	      k = 0x0268;
-	      break;
-	    case 0x019c:
-	      k = 0x026f;
-	      break;
-	    case 0x019d:
-	      k = 0x0272;
-	      break;
-	    case 0x019f:
-	      k = 0x0275;
-	      break;
-	    case 0x01a6:
-	      k = 0x0280;
-	      break;
-	    case 0x01a9:
-	      k = 0x0283;
-	      break;
-	    case 0x01ae:
-	      k = 0x0288;
-	      break;
-	    case 0x01b1:
-	      k = 0x028a;
-	      break;
-	    case 0x01b2:
-	      k = 0x028b;
-	      break;
-	    case 0x01b7:
-	      k = 0x0292;
-	      break;
-	    case 0x01c4:
-	    case 0x01c7:
-	    case 0x01ca:
-	    case 0x01f1:
-	      k = c + 2;
-	      break;
-	    case 0x01f6:
-	      k = 0x0195;
-	      break;
-	    case 0x01f7:
-	      k = 0x01bf;
-	      break;
-	    default:
-	      k = 0;
-	    }
-	  if (k != 0)
-	    return k;
-	}
-      else if (c == 0x0220)
-      	return 0x019e;
-      else if (c >= 0x023a && c <= 0x024e)
-      	{
-	  wint_t k;
-	  switch (c)
-	    {
-	    case 0x023a:
-	      k = 0x2c65;
-	      break;
-	    case 0x023b:
-	    case 0x0241:
-	    case 0x0246:
-	    case 0x0248:
-	    case 0x024a:
-	    case 0x024c:
-	    case 0x024e:
-	      k = c + 1;
-	      break;
-	    case 0x023d:
-	      k = 0x019a;
-	      break;
-	    case 0x023e:
-	      k = 0x2c66;
-	      break;
-	    case 0x0243:
-	      k = 0x0180;
-	      break;
-	    case 0x0244:
-	      k = 0x0289;
-	      break;
-	    case 0x0245:
-	      k = 0x028c;
-	      break;
-	    default:
-	      k = 0;
-	    }
-	  if (k != 0)
-	    return k;
-	}
-    }
-  else if (c < 0x0400)
-    {
-      if (c == 0x0370 || c == 0x0372 || c == 0x0376)
-      	return (c + 1);
-      if (c >= 0x0391 && c <= 0x03ab && c != 0x03a2)
-	return (c + 0x20);
-      if (c >= 0x03d8 && c <= 0x03ee && !(c & 0x01))
-	return (c + 1);
-      if (c >= 0x0386 && c <= 0x03ff)
-	{
-	  wint_t k;
-	  switch (c)
-	    {
-	    case 0x0386:
-	      k = 0x03ac;
-	      break;
-	    case 0x0388:
-	      k = 0x03ad;
-	      break;
-	    case 0x0389:
-	      k = 0x03ae;
-	      break;
-	    case 0x038a:
-	      k = 0x03af;
-	      break;
-	    case 0x038c:
-	      k = 0x03cc;
-	      break;
-	    case 0x038e:
-	      k = 0x03cd;
-	      break;
-	    case 0x038f:
-	      k = 0x03ce;
-	      break;
-	    case 0x03cf:
-	      k = 0x03d7;
-	      break;
-	    case 0x03f4:
-	      k = 0x03b8;
-	      break;
-	    case 0x03f7:
-	      k = 0x03f8;
-	      break;
-	    case 0x03f9:
-	      k = 0x03f2;
-	      break;
-	    case 0x03fa:
-	      k = 0x03fb;
-	      break;
-	    case 0x03fd:
-	      k = 0x037b;
-	      break;
-	    case 0x03fe:
-	      k = 0x037c;
-	      break;
-	    case 0x03ff:
-	      k = 0x037d;
-	      break;
-	    default:
-	      k = 0;
-	    }
-	  if (k != 0)
-	    return k;
-	}
-    }
-  else if (c < 0x500)
-    {
-      if (c >= 0x0400 && c <= 0x040f)
-	return (c + 0x50);
-      
-      if (c >= 0x0410 && c <= 0x042f)
-	return (c + 0x20);
-      
-      if ((c >= 0x0460 && c <= 0x0480) ||
-	  (c >= 0x048a && c <= 0x04be) ||
-	  (c >= 0x04d0 && c <= 0x04fe))
-	{
-	  if (!(c & 0x01))
-	    return (c + 1);
-	  return c;
-	}
-      
-      if (c == 0x04c0)
-	return 0x04cf;
-
-      if (c >= 0x04c1 && c <= 0x04cd)
-	{
-	  if (c & 0x01)
-	    return (c + 1);
-	  return c;
-	}
-    }
-  else if (c < 0x1f00)
-    {
-      if ((c >= 0x0500 && c <= 0x050e) ||
-	  (c >= 0x0510 && c <= 0x0524) ||
-	  (c >= 0x1e00 && c <= 0x1e94) ||
-	  (c >= 0x1ea0 && c <= 0x1ef8))
-	{
-	  if (!(c & 0x01))
-	    return (c + 1);
-	  return c;
-	}
-      
-      if (c >= 0x0531 && c <= 0x0556)
-	return (c + 0x30);
-
-      if (c >= 0x10a0 && c <= 0x10c5)
-	return (c + 0x1c60);
-
-      if (c == 0x1e9e)
-	return 0x00df;
-
-      if (c >= 0x1efa && c <= 0x1efe && !(c & 0x01))
-	return (c + 1);
-    }
-  else if (c < 0x2000)
-    {
-      if ((c >= 0x1f08 && c <= 0x1f0f) ||
-	  (c >= 0x1f18 && c <= 0x1f1d) ||
-	  (c >= 0x1f28 && c <= 0x1f2f) ||
-	  (c >= 0x1f38 && c <= 0x1f3f) ||
-	  (c >= 0x1f48 && c <= 0x1f4d) ||
-	  (c >= 0x1f68 && c <= 0x1f6f) ||
-	  (c >= 0x1f88 && c <= 0x1f8f) ||
-	  (c >= 0x1f98 && c <= 0x1f9f) ||
-	  (c >= 0x1fa8 && c <= 0x1faf))
-	return (c - 0x08);
-
-      if (c >= 0x1f59 && c <= 0x1f5f)
-	{
-	  if (c & 0x01)
-	    return (c - 0x08);
-	  return c;
-	}
-    
-      if (c >= 0x1fb8 && c <= 0x1ffc)
-	{
-	  wint_t k;
-	  switch (c)
-	    {
-	    case 0x1fb8:
-	    case 0x1fb9:
-	    case 0x1fd8:
-	    case 0x1fd9:
-	    case 0x1fe8:
-	    case 0x1fe9:
-	      k = c - 0x08;
-	      break;
-	    case 0x1fba:
-	    case 0x1fbb:
-	      k = c - 0x4a;
-	      break;
-	    case 0x1fbc:
-	      k = 0x1fb3;
-	      break;
-	    case 0x1fc8:
-	    case 0x1fc9:
-	    case 0x1fca:
-	    case 0x1fcb:
-	      k = c - 0x56;
-	      break;
-	    case 0x1fcc:
-	      k = 0x1fc3;
-	      break;
-	    case 0x1fda:
-	    case 0x1fdb:
-	      k = c - 0x64;
-	      break;
-	    case 0x1fea:
-	    case 0x1feb:
-	      k = c - 0x70;
-	      break;
-	    case 0x1fec:
-	      k = 0x1fe5;
-	      break;
-	    case 0x1ff8:
-	    case 0x1ff9:
-	      k = c - 0x80;
-	      break;
-	    case 0x1ffa:
-	    case 0x1ffb:
-	      k = c - 0x7e;
-	      break;
-	    case 0x1ffc:
-	      k = 0x1ff3;
-	      break;
-	    default:
-	      k = 0;
-	    }
-	  if (k != 0)
-	    return k;
-	}
-    }
-  else if (c < 0x2c00)
-    {
-      if (c >= 0x2160 && c <= 0x216f)
-	return (c + 0x10);
-
-      if (c >= 0x24b6 && c <= 0x24cf)
-	return (c + 0x1a);
-      
-      switch (c)
-      	{
-	case 0x2126:
-	  return 0x03c9;
-	case 0x212a:
-	  return 0x006b;
-	case 0x212b:
-	  return 0x00e5;
-	case 0x2132:
-	  return 0x214e;
-	case 0x2183:
-	  return 0x2184;
-	}
-    }
-  else if (c < 0x2d00)
-    {
-      if (c >= 0x2c00 && c <= 0x2c2e)
-	return (c + 0x30);
-
-      if (c >= 0x2c80 && c <= 0x2ce2 && !(c & 0x01))
-	return (c + 1);
-
-      switch (c)
-      	{
-	case 0x2c60:
-	  return 0x2c61;
-	case 0x2c62:
-	  return 0x026b;
-	case 0x2c63:
-	  return 0x1d7d;
-	case 0x2c64:
-	  return 0x027d;
-	case 0x2c67:
-	case 0x2c69:
-	case 0x2c6b:
-	case 0x2c72:
-	case 0x2c75:
-	case 0x2ceb:
-	case 0x2ced:
-	  return c + 1;
-	case 0x2c6d:
-	  return 0x0251;
-	case 0x2c6e:
-	  return 0x0271;
-	case 0x2c6f:
-	  return 0x0250;
-	case 0x2c70:
-	  return 0x0252;
-	case 0x2c7e:
-	  return 0x023f;
-	case 0x2c7f:
-	  return 0x0240;
-	}
-    }
-  else if (c >= 0xa600 && c < 0xa800)
-    {
-      if ((c >= 0xa640 && c <= 0xa65e) ||
-	  (c >= 0xa662 && c <= 0xa66c) ||
-	  (c >= 0xa680 && c <= 0xa696) ||
-	  (c >= 0xa722 && c <= 0xa72e) ||
-	  (c >= 0xa732 && c <= 0xa76e) ||
-	  (c >= 0xa77f && c <= 0xa786))
-	{
-	  if (!(c & 1))
-	    return (c + 1);
-	  return c;
-	}
-
-      switch (c)
-      	{
-	case 0xa779:
-	case 0xa77b:
-	case 0xa77e:
-	case 0xa78b:
-	  return (c + 1);
-	case 0xa77d:
-	  return 0x1d79;
-	}
-    }
-  else
-    {
-      if (c >= 0xff21 && c <= 0xff3a)
-	return (c + 0x20);
-      
-      if (c >= 0x10400 && c <= 0x10427)
-	return (c + 0x28);
-    }
-  return c;
+  return towctrans (c, WCT_TOLOWER);
 #else
-  return (c < 0x00ff ? (wint_t)(tolower ((int)c)) : c);
+  return c < 0x00ff ? (wint_t)(tolower ((int)c)) : c;
 #endif /* _MB_CAPABLE */
 }
-
diff --git a/newlib/libc/ctype/towlower_l.c b/newlib/libc/ctype/towlower_l.c
index 2e89ec9..46e024d 100644
--- a/newlib/libc/ctype/towlower_l.c
+++ b/newlib/libc/ctype/towlower_l.c
@@ -1,3 +1,4 @@
+/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
 #include <_ansi.h>
 #include <newlib.h>
 #include <wctype.h>
@@ -6,7 +7,9 @@
 wint_t
 towlower_l (wint_t c, struct __locale_t *locale)
 {
-  /* We're using a locale-independent representation of upper/lower case
-     based on Unicode data.  Thus, the locale doesn't matter. */
+#ifdef _MB_CAPABLE
+  return towctrans_l (c, WCT_TOLOWER, locale);
+#else
   return towlower (c);
+#endif /* _MB_CAPABLE */
 }
diff --git a/newlib/libc/ctype/towupper.c b/newlib/libc/ctype/towupper.c
index 306f72b..a60e62b 100644
--- a/newlib/libc/ctype/towupper.c
+++ b/newlib/libc/ctype/towupper.c
@@ -1,5 +1,6 @@
 /* Copyright (c) 2002 Red Hat Incorporated.
    All rights reserved.
+   Modified (m) 2017 Thomas Wolff to refer to generated Unicode data tables.
 
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are met:
@@ -76,518 +77,8 @@ wint_t
 towupper (wint_t c)
 {
 #ifdef _MB_CAPABLE
-  c = _jp2uc (c);
-  /* Based on and tested against Unicode 5.2 */
-
-  /* Expression used to filter out the characters for the below code:
-
-     awk -F\; '{ if ( $13 != "" ) print $1; }' UnicodeData.txt
-  */
-  if (c < 0x100)
-    {
-      if (c == 0x00b5)
-	return 0x039c;
-      
-      if ((c >= 0x00e0 && c <= 0x00fe && c != 0x00f7) ||
-	  (c >= 0x0061 && c <= 0x007a))
-	return (c - 0x20);
-      
-      if (c == 0xff)
-	return 0x0178;
-      
-      return c;
-    }
-  else if (c < 0x300)
-    {
-      if ((c >= 0x0101 && c <= 0x012f) ||
-	  (c >= 0x0133 && c <= 0x0137) ||
-	  (c >= 0x014b && c <= 0x0177) ||
-	  (c >= 0x01df && c <= 0x01ef) ||
-	  (c >= 0x01f9 && c <= 0x021f) ||
-	  (c >= 0x0223 && c <= 0x0233) ||
-	  (c >= 0x0247 && c <= 0x024f))
-	{
-	  if (c & 0x01)
-	    return (c - 1);
-	  return c;
-	}
-
-      if ((c >= 0x013a && c <= 0x0148) ||
-	  (c >= 0x01ce && c <= 0x01dc) ||
-	  c == 0x023c || c == 0x0242)
-	{
-	  if (!(c & 0x01))
-	    return (c - 1);
-	  return c;
-	}
-      
-      if (c == 0x0131)
-	return 0x0049;
-      
-      if (c == 0x017a || c == 0x017c || c == 0x017e)
-	return (c - 1);
-      
-      if (c >= 0x017f && c <= 0x0292)
-	{
-	  wint_t k;
-	  switch (c)
-	    {
-	    case 0x017f:
-	      k = 0x0053;
-	      break;
-	    case 0x0180:
-	      k = 0x0243;
-	      break;
-	    case 0x0183:
-	      k = 0x0182;
-	      break;
-	    case 0x0185:
-	      k = 0x0184;
-	      break;
-	    case 0x0188:
-	      k = 0x0187;
-	      break;
-	    case 0x018c:
-	      k = 0x018b;
-	      break;
-	    case 0x0192:
-	      k = 0x0191;
-	      break;
-	    case 0x0195:
-	      k = 0x01f6;
-	      break;
-	    case 0x0199:
-	      k = 0x0198;
-	      break;
-	    case 0x019a:
-	      k = 0x023d;
-	      break;
-	    case 0x019e:
-	      k = 0x0220;
-	      break;
-	    case 0x01a1:
-	    case 0x01a3:
-	    case 0x01a5:
-	    case 0x01a8:
-	    case 0x01ad:
-	    case 0x01b0:
-	    case 0x01b4:
-	    case 0x01b6:
-	    case 0x01b9:
-	    case 0x01bd:
-	    case 0x01c5:
-	    case 0x01c8:
-	    case 0x01cb:
-	    case 0x01f2:
-	    case 0x01f5:
-	      k = c - 1;
-	      break;
-	    case 0x01bf:
-	      k = 0x01f7;
-	      break;
-	    case 0x01c6:
-	    case 0x01c9:
-	    case 0x01cc:
-	      k = c - 2;
-	      break;
-	    case 0x01dd:
-	      k = 0x018e;
-	      break;
-	    case 0x01f3:
-	      k = 0x01f1;
-	      break;
-	    case 0x023f:
-	      k = 0x2c7e;
-	      break;
-	    case 0x0240:
-	      k = 0x2c7f;
-	      break;
-	    case 0x0250:
-	      k = 0x2c6f;
-	      break;
-	    case 0x0251:
-	      k = 0x2c6d;
-	      break;
-	    case 0x0252:
-	      k = 0x2c70;
-	      break;
-	    case 0x0253:
-	      k = 0x0181;
-	      break;
-	    case 0x0254:
-	      k = 0x0186;
-	      break;
-	    case 0x0256:
-	      k = 0x0189;
-	      break;
-	    case 0x0257:
-	      k = 0x018a;
-	      break;
-	    case 0x0259:
-	      k = 0x018f;
-	      break;
-	    case 0x025b:
-	      k = 0x0190;
-	      break;
-	    case 0x0260:
-	      k = 0x0193;
-	      break;
-	    case 0x0263:
-	      k = 0x0194;
-	      break;
-	    case 0x0268:
-	      k = 0x0197;
-	      break;
-	    case 0x0269:
-	      k = 0x0196;
-	      break;
-	    case 0x026b:
-	      k = 0x2c62;
-	      break;
-	    case 0x026f:
-	      k = 0x019c;
-	      break;
-	    case 0x0271:
-	      k = 0x2c6e;
-	      break;
-	    case 0x0272:
-	      k = 0x019d;
-	      break;
-	    case 0x0275:
-	      k = 0x019f;
-	      break;
-	    case 0x027d:
-	      k = 0x2c64;
-	      break;
-	    case 0x0280:
-	      k = 0x01a6;
-	      break;
-	    case 0x0283:
-	      k = 0x01a9;
-	      break;
-	    case 0x0288:
-	      k = 0x01ae;
-	      break;
-	    case 0x0289:
-	      k = 0x0244;
-	      break;
-	    case 0x028a:
-	      k = 0x01b1;
-	      break;
-	    case 0x028b:
-	      k = 0x01b2;
-	      break;
-	    case 0x028c:
-	      k = 0x0245;
-	      break;
-	    case 0x0292:
-	      k = 0x01b7;
-	      break;
-	    default:
-	      k = 0;
-	    }
-	  if (k != 0)
-	    return k;
-	}
-    }
-  else if (c < 0x0400)
-    {
-      wint_t k;
-
-      if (c >= 0x03ad && c <= 0x03af)
-      	return (c - 0x25);
-
-      if (c >= 0x03b1 && c <= 0x03cb && c != 0x03c2)
-	return (c - 0x20);
-      
-      if (c >= 0x03d9 && c <= 0x03ef && (c & 1))
-	return (c - 1);
-
-      switch (c)
-	{
-	case 0x0345:
-	  k = 0x0399;
-	  break;
-	case 0x0371:
-	case 0x0373:
-	case 0x0377:
-	case 0x03f8:
-	case 0x03fb:
-	  k = c - 1;
-	  break;
-	case 0x037b:
-	case 0x037c:
-	case 0x037d:
-	  k = c + 0x82;
-	  break;
-	case 0x03ac:
-	  k = 0x0386;
-	  break;
-	case 0x03c2:
-	  k = 0x03a3;
-	  break;
-	case 0x03cc:
-	  k = 0x038c;
-	  break;
-	case 0x03cd:
-	case 0x03ce:
-	  k = c - 0x3f;
-	  break;
-	case 0x03d0:
-	  k = 0x0392;
-	  break;
-	case 0x03d1:
-	  k = 0x0398;
-	  break;
-	case 0x03d5:
-	  k = 0x03a6;
-	  break;
-	case 0x03d6:
-	  k = 0x03a0;
-	  break;
-	case 0x03d7:
-	  k = 0x03cf;
-	  break;
-	case 0x03f0:
-	  k = 0x039a;
-	  break;
-	case 0x03f1:
-	  k = 0x03a1;
-	  break;
-	case 0x03f2:
-	  k = 0x03f9;
-	  break;
-	case 0x03f5:
-	  k = 0x0395;
-	  break;
-	default:
-	  k = 0;
-	}
-      if (k != 0)
-	return k;
-    }
-  else if (c < 0x500)
-    {
-      if (c >= 0x0430 && c <= 0x044f)
-	return (c - 0x20);
-      
-      if (c >= 0x0450 && c <= 0x045f)
-	return (c - 0x50);
-      
-      if ((c >= 0x0461 && c <= 0x0481) ||
-	  (c >= 0x048b && c <= 0x04bf) ||
-	  (c >= 0x04d1 && c <= 0x04ff))
-	{
-	  if (c & 0x01)
-	    return (c - 1);
-	  return c;
-	}
-      
-      if (c >= 0x04c2 && c <= 0x04ce)
-	{
-	  if (!(c & 0x01))
-	    return (c - 1);
-	  return c;
-	}
-      
-      if (c == 0x04cf)
-      	return 0x04c0;
-    }
-  else if (c < 0x0600)
-    {
-      if (c >= 0x0501 && c <= 0x0525 && (c & 1))
-      	return c - 1;
-
-      if (c >= 0x0561 && c <= 0x0586)
-	return (c - 0x30);
-    }
-  else if (c < 0x1f00)
-    {
-      if (c == 0x1d79)
-      	return 0xa77d;
-
-      if (c == 0x1d7d)
-      	return 0x2c63;
-
-      if ((c >= 0x1e01 && c <= 0x1e95) ||
-	  (c >= 0x1ea1 && c <= 0x1eff))
-	{
-	  if (c & 0x01)
-	    return (c - 1);
-	  return c;
-	}
-      
-      if (c == 0x1e9b)
-	return 0x1e60;
-    }
-  else if (c < 0x2000)
-    {
-      
-      if ((c >= 0x1f00 && c <= 0x1f07) ||
-	  (c >= 0x1f10 && c <= 0x1f15) ||
-	  (c >= 0x1f20 && c <= 0x1f27) ||
-	  (c >= 0x1f30 && c <= 0x1f37) ||
-	  (c >= 0x1f40 && c <= 0x1f45) ||
-	  (c >= 0x1f60 && c <= 0x1f67) ||
-	  (c >= 0x1f80 && c <= 0x1f87) ||
-	  (c >= 0x1f90 && c <= 0x1f97) ||
-	  (c >= 0x1fa0 && c <= 0x1fa7))
-	return (c + 0x08);
-
-      if (c >= 0x1f51 && c <= 0x1f57 && (c & 0x01))
-	return (c + 0x08);
-      
-      if (c >= 0x1f70 && c <= 0x1ff3)
-	{
-	  wint_t k;
-	  switch (c)
-	    {
-	    case 0x1fb0:
-	      k = 0x1fb8;
-	      break;
-	    case 0x1fb1:
-	      k = 0x1fb9;
-	      break;
-	    case 0x1f70:
-	      k = 0x1fba;
-	      break;
-	    case 0x1f71:
-	      k = 0x1fbb;
-	      break;
-	    case 0x1fb3:
-	      k = 0x1fbc;
-	      break;
-	    case 0x1fbe:
-	      k = 0x0399;
-	      break;
-	    case 0x1f72:
-	      k = 0x1fc8;
-	      break;
-	    case 0x1f73:
-	      k = 0x1fc9;
-	      break;
-	    case 0x1f74:
-	      k = 0x1fca;
-	      break;
-	    case 0x1f75:
-	      k = 0x1fcb;
-	      break;
-	    case 0x1fc3:
-	      k = 0x1fcc;
-	      break;
-	    case 0x1fd0:
-	      k = 0x1fd8;
-	      break;
-	    case 0x1fd1:
-	      k = 0x1fd9;
-	      break;
-	    case 0x1f76:
-	      k = 0x1fda;
-	      break;
-	    case 0x1f77:
-	      k = 0x1fdb;
-	      break;
-	    case 0x1fe0:
-	      k = 0x1fe8;
-	      break;
-	    case 0x1fe1:
-	      k = 0x1fe9;
-	      break;
-	    case 0x1f7a:
-	      k = 0x1fea;
-	      break;
-	    case 0x1f7b:
-	      k = 0x1feb;
-	      break;
-	    case 0x1fe5:
-	      k = 0x1fec;
-	      break;
-	    case 0x1f78:
-	      k = 0x1ff8;
-	      break;
-	    case 0x1f79:
-	      k = 0x1ff9;
-	      break;
-	    case 0x1f7c:
-	      k = 0x1ffa;
-	      break;
-	    case 0x1f7d:
-	      k = 0x1ffb;
-	      break;
-	    case 0x1ff3:
-	      k = 0x1ffc;
-	      break;
-	    default:
-	      k = 0;
-	    }
-	  if (k != 0)
-	    return k;
-	}
-    }
-  else if (c < 0x3000)
-    {
-      if (c == 0x214e)
-      	return 0x2132;
-
-      if (c == 0x2184)
-      	return 0x2183;
-
-      if (c >= 0x2170 && c <= 0x217f)
-	return (c - 0x10);
-      
-      if (c >= 0x24d0 && c <= 0x24e9)
-	return (c - 0x1a);
-      
-      if (c >= 0x2c30 && c <= 0x2c5e)
-	return (c - 0x30);
-
-      if ((c >= 0x2c68 && c <= 0x2c6c && !(c & 1)) ||
-	  (c >= 0x2c81 && c <= 0x2ce3 &&  (c & 1)) ||
-	  c == 0x2c73 || c == 0x2c76 ||
-	  c == 0x2cec || c == 0x2cee)
-      	return (c - 1);
-
-      if (c >= 0x2c81 && c <= 0x2ce3 && (c & 1))
-	return (c - 1);
-
-      if (c >= 0x2d00 && c <= 0x2d25)
-      	return (c - 0x1c60);
-
-      switch (c)
-      	{
-	case 0x2c61:
-	  return 0x2c60;
-	case 0x2c65:
-	  return 0x023a;
-	case 0x2c66:
-	  return 0x023e;
-	}
-    }
-  else if (c >= 0xa000 && c < 0xb000)
-    {
-      if (((c >= 0xa641 && c <= 0xa65f) ||
-           (c >= 0xa663 && c <= 0xa66d) ||
-           (c >= 0xa681 && c <= 0xa697) ||
-           (c >= 0xa723 && c <= 0xa72f) ||
-           (c >= 0xa733 && c <= 0xa76f) ||
-           (c >= 0xa77f && c <= 0xa787)) &&
-	  (c & 1))
-	return (c - 1);
-      	
-      if (c == 0xa77a || c == 0xa77c || c == 0xa78c)
-	return (c - 1);
-    }
-  else
-    {
-      if (c >= 0xff41 && c <= 0xff5a)
-	return (c - 0x20);
-      
-      if (c >= 0x10428 && c <= 0x1044f)
-	return (c - 0x28);
-    }
-  return c;
+  return towctrans (c, WCT_TOUPPER);
 #else
-  return (c < 0x00ff ? (wint_t)(toupper ((int)c)) : c);
+  return c < 0x00ff ? (wint_t)(toupper ((int)c)) : c;
 #endif /* _MB_CAPABLE */
 }
-
diff --git a/newlib/libc/ctype/towupper_l.c b/newlib/libc/ctype/towupper_l.c
index 5a8384c..d7c1adb 100644
--- a/newlib/libc/ctype/towupper_l.c
+++ b/newlib/libc/ctype/towupper_l.c
@@ -1,10 +1,14 @@
+/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
 #include <_ansi.h>
 #include <wctype.h>
+#include "local.h"
 
 wint_t
 towupper_l (wint_t c, struct __locale_t *locale)
 {
-  /* We're using a locale-independent representation of upper/lower case
-     based on Unicode data.  Thus, the locale doesn't matter. */
+#ifdef _MB_CAPABLE
+  return towctrans_l (c, WCT_TOUPPER, locale);
+#else
   return towupper (c);
+#endif /* _MB_CAPABLE */
 }
diff --git a/newlib/libc/ctype/utf8alpha.h b/newlib/libc/ctype/utf8alpha.h
deleted file mode 100644
index d9306b7..0000000
--- a/newlib/libc/ctype/utf8alpha.h
+++ /dev/null
@@ -1,355 +0,0 @@
-/* Copyright (c) 2002 Red Hat Incorporated.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are met:
-
-     Redistributions of source code must retain the above copyright
-     notice, this list of conditions and the following disclaimer.
-
-     Redistributions in binary form must reproduce the above copyright
-     notice, this list of conditions and the following disclaimer in the
-     documentation and/or other materials provided with the distribution.
-
-     The name of Red Hat Incorporated may not be used to endorse
-     or promote products derived from this software without specific
-     prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED.  IN NO EVENT SHALL RED HAT INCORPORATED BE LIABLE FOR ANY
-   DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-   (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-   LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-   ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS   
-   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-/* Generated using UnicodeData.txt 5.2 */
-
-/* Expression used to filter out the characters for the below tables:
-
-  awk -F\; \
-  '{ \
-    VAL = strtonum (sprintf("0x%s", $1)); \
-    # All of general category "L", except for two Thai characters which \
-    # are actually punctuation characters.  Old Unicode weirdness. \
-    # The character "COMBINING GREEK YPOGEGRAMMENI", as well as all Thai \
-    # characters which are in "Mn" category.  Old Unicode weirdness. \
-    # All numerical digit or letter characters, except the ASCII variants. \
-    # This is necessary due to the unfortunate ISO C definition for the \
-    # iswdigit class, otherwise these characters are missing in iswalnum. \
-    # All "Other Symbols" which are named as "LETTER" characters. \
-    # \
-    # Before running this test, make sure to expand all Unicode blocks \
-    # which are just marked by their first and last character! \
-    # \
-    if (   (match($3, "^L") && VAL != 0x0e2f && VAL != 0x0e46) \
-	|| (match($3, "^Mn") && (VAL == 0x0345 || match($2, "\\<CHARACTER\\>"))) \
-	|| (match($3, "^N[dl]") && VAL >= 0x100) \
-	|| (match($3, "^So") && match($2, "\\<LETTER\\>"))) \
-      print $1; \
-  }' UnicodeData.txt
-*/
-
-static const unsigned char u0[] = {
-  0x41, 0x0, 0x5a, 0x61, 0x0, 0x7a, 0xaa, 0xb5, 
-  0xba, 0xc0, 0x0, 0xd6, 0xd8, 0x0, 0xf6, 0xf8, 
-  0x0, 0xff };
-/* u1 all alphabetic */
-static const unsigned char u2[] = {
-  0x00, 0x0, 0xc1, 0xc6, 0x0, 0xd1,
-  0xe0, 0x0, 0xe4, 0xec, 0xee };
-static const unsigned char u3[] = {
-  0x45, 0x70, 0x0, 0x74, 0x76, 0x77,
-  0x7a, 0x0, 0x7d, 0x86, 0x88, 0x0, 0x8a, 0x8c,
-  0x8e, 0x0, 0xa1, 0xa3, 0x0, 0xf5,
-  0xf7, 0x0, 0xff };
-static const unsigned char u4[] = { 
-  0x00, 0x0, 0x81, 0x8a, 0x0, 0xff };
-static const unsigned char u5[] = { 
-  0x00, 0x0, 0x25, 0x31, 0x0, 0x56, 0x59, 
-  0x61, 0x0, 0x87, 0xd0, 0x0, 0xea,
-  0xf0, 0x0, 0xf2 };
-static const unsigned char u6[] = { 
-  0x21, 0x0, 0x4a, 0x60, 0x0, 0x69,
-  0x6e, 0x0, 0x6f, 0x71, 0x0, 0xd3, 
-  0xd5, 0xe5, 0x0, 0xe6, 0xee, 0x0, 0xfc, 0xff };
-static const unsigned char u7[] = { 
-  0x10, 0x12, 0x0, 0x2f, 0x4d, 0x0, 0xa5, 0xb1,
-  0xc0, 0x0, 0xea, 0xf4, 0xf5, 0xfa };
-static const unsigned char u8[] = { 
-  0x00, 0x0, 0x15, 0x1a, 0x24, 0x28 };
-static const unsigned char u9[] = { 
-  0x04, 0x0, 0x39, 0x3d, 0x50, 0x58, 0x0, 0x61,
-  0x66, 0x0, 0x6f, 0x71, 0x72, 0x79, 0x0, 0x7f,
-  0x85, 0x0, 0x8c, 0x8f, 0x0, 0x90, 
-  0x93, 0x0, 0xa8, 0xaa, 0x0, 0xb0, 0xb2,
-  0xb6, 0x0, 0xb9, 0xbd, 0xce, 0xdc, 0x0, 0xdd,
-  0xdf, 0x0, 0xe1, 0xe6, 0x0, 0xf1 };
-static const unsigned char ua[] = { 
-  0x05, 0x0, 0x0a, 0x0f, 0x0, 0x10,
-  0x13, 0x0, 0x28, 0x2a, 0x0, 0x30,
-  0x32, 0x0, 0x33, 0x35, 0x0, 0x36,
-  0x38, 0x0, 0x39, 0x59, 0x0, 0x5c,
-  0x5e, 0x66, 0x0, 0x6f, 0x72, 0x0, 0x74,
-  0x85, 0x0, 0x8d, 0x8f, 0x0, 0x91,
-  0x93, 0x0, 0xa8, 0xaa, 0x0, 0xb0,
-  0xb2, 0x0, 0xb3, 0xb5, 0x0, 0xb9,
-  0xbd, 0xd0, 0xe0, 0xe1, 0xe6, 0x0, 0xef };
-static const unsigned char ub[] = { 
-  0x05, 0x0, 0x0c, 0x0f, 0x0, 0x10,
-  0x13, 0x0, 0x28, 0x2a, 0x0, 0x30,
-  0x32, 0x0, 0x33, 0x35, 0x0, 0x39, 0x3d,
-  0x5c, 0x0, 0x5d, 0x5f, 0x0, 0x61,
-  0x66, 0x0, 0x6f, 0x71, 0x83, 0x85, 0x0, 0x8a,
-  0x8e, 0x0, 0x90, 0x92, 0x0, 0x95,
-  0x99, 0x0, 0x9a, 0x9c, 0x9e, 0x0, 0x9f,
-  0xa3, 0x0, 0xa4, 0xa8, 0x0, 0xaa,
-  0xae, 0x0, 0xb9, 0xd0, 0xe6, 0x0, 0xef };
-static const unsigned char uc[] = { 
-  0x05, 0x0, 0x0c, 0x0e, 0x0, 0x10,
-  0x12, 0x0, 0x28, 0x2a, 0x0, 0x33,
-  0x35, 0x0, 0x39, 0x3d, 0x58, 0x59,
-  0x60, 0x0, 0x61, 0x66, 0x0, 0x6f,
-  0x85, 0x0, 0x8c, 0x8e, 0x0, 0x90,
-  0x92, 0x0, 0xa8, 0xaa, 0x0, 0xb3,
-  0xb5, 0x0, 0xb9, 0xbd, 0xde, 0xe0, 0x0, 0xe1,
-  0xe6, 0x0, 0xef };
-static const unsigned char ud[] = { 
-  0x05, 0x0, 0x0c, 0x0e, 0x0, 0x10, 
-  0x12, 0x0, 0x28, 0x2a, 0x0, 0x39, 0x3d,
-  0x60, 0x0, 0x61, 0x66, 0x0, 0x6f,
-  0x7a, 0x0, 0x7f, 0x85, 0x0, 0x96, 0x9a, 
-  0x0, 0xb1, 0xb3, 0x0, 0xbb, 0xbd,
-  0xc0, 0x0, 0xc6 };
-static const unsigned char ue[] = { 
-  0x01, 0x0, 0x2e, 0x30, 0x0, 0x3a, 0x40, 
-  0x0, 0x45, 0x47, 0x0, 0x4e, 0x50, 0x0, 0x59, 
-  0x81, 0x0, 0x82, 0x84, 0x87, 0x0, 0x88, 0x8a, 
-  0x8d, 0x94, 0x0, 0x97, 0x99, 0x0, 0x9f, 0xa1, 
-  0x0, 0xa3, 0xa5, 0xa7, 0xaa, 0x0, 0xab, 0xad, 
-  0x0, 0xb0, 0xb2, 0x0, 0xb3, 0xbd, 0xc0, 0x0, 
-  0xc4, 0xc6, 0xd0, 0x0, 0xd9, 0xdc, 0x0, 0xdd }; 
-static const unsigned char uf[] = {
-  0x00, 0x20, 0x0, 0x29, 0x40, 0x0, 0x47, 0x49, 
-  0x0, 0x6c, 0x88, 0x0, 0x8b };
-static const unsigned char u10[] = { 
-  0x00, 0x0, 0x2a, 0x3f, 0x0, 0x49,
-  0x50, 0x0, 0x55, 0x5a, 0x0, 0x5d,
-  0x61, 0x65, 0x66, 0x6e, 0x0, 0x70,
-  0x75, 0x0, 0x81, 0x8e, 0x90, 0x0, 0x99,
-  0xa0, 0x0, 0xc5, 0xd0, 0x0, 0xfa, 0xfc };
-/* u11 all alphabetic */
-static const unsigned char u12[] = { 
-  0x00, 0x0, 0x48, 0x4a, 0x0, 0x4d,
-  0x50, 0x0, 0x56, 0x58, 0x5a, 0x0, 0x5d,
-  0x60, 0x0, 0x88, 0x8a, 0x0, 0x8d,
-  0x90, 0x0, 0xb0, 0xb2, 0x0, 0xb5,
-  0xb8, 0x0, 0xbe, 0xc0, 0xc2, 0x0, 0xc5,
-  0xc8, 0x0, 0xd6, 0xd8, 0x0, 0xff };
-static const unsigned char u13[] = { 
-  0x00, 0x0, 0x10, 0x12, 0x0, 0x15,
-  0x18, 0x0, 0x5a, 0x80, 0x0, 0x8f,
-  0xa0, 0x0, 0xf4 };
-static const unsigned char u14[] = { 
-  0x01, 0x0, 0xff };
-/* u15 all alphabetic */
-static const unsigned char u16[] = { 
-  0x00, 0x0, 0x6c, 0x6f, 0x0, 0x7f, 
-  0x81, 0x0, 0x9a, 0xa0, 0x0, 0xea,
-  0xee, 0x0, 0xf0 };
-static const unsigned char u17[] = { 
-  0x00, 0x0, 0x0c, 0x0e, 0x0, 0x11,
-  0x20, 0x0, 0x31, 0x40, 0x0, 0x51,
-  0x60, 0x0, 0x6c, 0x6e, 0x0, 0x70,
-  0x80, 0x0, 0xb3, 0xd7, 0xdc, 0xe0, 0x0, 0xe9 };
-static const unsigned char u18[] = { 
-  0x10, 0x0, 0x19, 0x20, 0x0, 0x77,
-  0x80, 0x0, 0xa8, 0xaa, 0xb0, 0x0, 0xf5 };
-static const unsigned char u19[] = { 
-  0x00, 0x0, 0x1c, 0x46, 0x0, 0x6d,
-  0x70, 0x0, 0x74, 0x80, 0x0, 0xab,
-  0xc1, 0x0, 0xc7, 0xd0, 0x0, 0xda };
-static const unsigned char u1a[] = { 
-  0x00, 0x0, 0x16, 0x20, 0x0, 0x54,
-  0x80, 0x0, 0x89, 0x90, 0x0, 0x99, 0xa7 };
-static const unsigned char u1b[] = { 
-  0x05, 0x0, 0x33, 0x45, 0x0, 0x4b,
-  0x50, 0x0, 0x59, 0x83, 0x0, 0xa0,
-  0xae, 0x0, 0xb9 };
-static const unsigned char u1c[] = { 
-  0x00, 0x0, 0x23, 0x40, 0x0, 0x49,
-  0x4d, 0x0, 0x7d, 0xe9, 0x0, 0xec,
-  0xee, 0x0, 0xf1 };
-static const unsigned char u1d[] = { 
-  0x00, 0x0, 0xbf };
-/* u1e all alphabetic */
-static const unsigned char u1f[] = { 
-  0x00, 0x0, 0x15, 0x18, 0x0, 0x1d, 
-  0x20, 0x0, 0x45, 0x48, 0x0, 0x4d, 0x50, 0x0, 0x57, 0x59, 
-  0x5b, 0x5d, 0x5f, 0x0, 0x7d, 0x80, 0x0, 0xb4, 
-  0xb6, 0x0, 0xbc, 0xbe, 0xc2, 0x0, 0xc4, 0xc6, 
-  0x0, 0xcc, 0xd0, 0x0, 0xd3, 0xd6, 0x0, 0xdb, 
-  0xe0, 0x0, 0xec, 0xf2, 0x0, 0xf4, 0xf6, 0x0, 
-  0xfc };
-static const unsigned char u20[] = { 
-  0x71, 0x7f, 0x90, 0x0, 0x94 };
-static const unsigned char u21[] = { 
-  0x02, 0x07, 0x0a, 0x0, 0x13, 0x15,
-  0x19, 0x0, 0x1d, 0x24, 0x26, 0x28, 0x0, 0x2d,
-  0x2f, 0x0, 0x39, 0x3c, 0x0, 0x3f,
-  0x45, 0x0, 0x49, 0x4e, 0x60, 0x0, 0x88 }; 
-static const unsigned char u24[] = { 
-  0x9c, 0x0, 0xe9 };
-static const unsigned char u2c[] = { 
-  0x00, 0x0, 0x2e, 0x30, 0x0, 0x5e,
-  0x60, 0x0, 0xe4, 0xeb, 0x0, 0xee };
-static const unsigned char u2d[] = { 
-  0x00, 0x0, 0x25, 0x30, 0x0, 0x65, 0x6f,
-  0x80, 0x0, 0x96, 0xa0, 0x0, 0xa6,
-  0xa8, 0x0, 0xae, 0xb0, 0x0, 0xb6,
-  0xb8, 0x0, 0xbe, 0xc0, 0x0, 0xc6,
-  0xc8, 0x0, 0xce, 0xd0, 0x0, 0xd6,
-  0xd8, 0x0, 0xde };
-static const unsigned char u2e[] = {
-  0x2f };
-static const unsigned char u30[] = { 
-  0x05, 0x0, 0x07, 0x21, 0x0, 
-  0x29, 0x31, 0x0, 0x35, 0x38, 0x0, 0x3c, 0x41, 
-  0x0, 0x96, 0x9d, 0x0, 0x9f, 0xa1, 0x0, 0xfa, 
-  0xfc, 0x0, 0xff };
-static const unsigned char u31[] = { 
-  0x05, 0x0, 0x2d, 0x31, 0x0, 
-  0x8e, 0xa0, 0x0, 0xb7, 0xf0, 0x0, 0xff };
-/* u34 to u4c all alphabetic */
-static const unsigned char u4d[] = { 
-  0x00, 0x0, 0xb5 };
-/* u4e to u9e all alphabetic */
-static const unsigned char u9f[] = { 
-  0x00, 0x0, 0xcb };
-/* ua0 to ua3 all alphabetic */
-static const unsigned char ua4[] = { 
-  0x00, 0x0, 0x8c, 0xd0, 0x0, 0xfd }; 
-/* ua5 all alphabetic */
-static const unsigned char ua6[] = {
-  0x00, 0x0, 0x0c, 0x10, 0x0, 0x2b,
-  0x40, 0x0, 0x5f, 0x62, 0x0, 0x6e,
-  0x7f, 0x0, 0x97, 0xa0, 0x0, 0xef };
-static const unsigned char ua7[] = {
-  0x17, 0x0, 0x1f, 0x22, 0x0, 0x88,
-  0x8b, 0x8c,
-  0xfb, 0x0, 0xff };
-static const unsigned char ua8[] = {
-  0x00, 0x01, 0x03, 0x0, 0x05, 0x07, 0x0, 0x0a,
-  0x0c, 0x0, 0x22, 0x40, 0x0, 0x73,
-  0x82, 0x0, 0xb3, 0xd0, 0x0, 0xd9,
-  0xf2, 0x0, 0xf7, 0xfb };
-static const unsigned char ua9[] = {
-  0x00, 0x0, 0x25, 0x30, 0x0, 0x46,
-  0x60, 0x0, 0x7c, 0x84, 0x0, 0xb2,
-  0xcf, 0x0, 0xd9 };
-static const unsigned char uaa[] = {
-  0x00, 0x0, 0x28, 0x40, 0x0, 0x42,
-  0x44, 0x0, 0x4b, 0x50, 0x0, 0x59,
-  0x60, 0x0, 0x76, 0x7a, 0x80, 0x0, 0xaf,
-  0xb1, 0xb5, 0xb6, 0xb9, 0x0, 0xbd,
-  0xc0, 0xc2, 0xdb, 0x0, 0xdd };
-static const unsigned char uab[] = {
-  0xc0, 0x0, 0xe2, 0xf0, 0x0, 0xf9 };
-/* uac to ud6 all alphabetic */
-static const unsigned char ud7[] = { 
-  0x00, 0x0, 0xa3, 0xb0, 0x0, 0xc6,
-  0xcb, 0x0, 0xfb };
-/* uf9 all alphabetic */
-static const unsigned char ufa[] = { 
-  0x00, 0x0, 0x2d, 0x30, 0x0, 0x6d,
-  0x70, 0x0, 0xd9 };
-static const unsigned char ufb[] = { 
-  0x00, 0x0, 0x06, 0x13, 0x0, 0x17, 0x1d, 
-  0x1f, 0x0, 0x28, 0x2a, 0x0, 0x36, 0x38, 0x0, 
-  0x3c, 0x3e, 0x40, 0x0, 0x41, 0x43, 0x0, 0x44, 
-  0x46, 0x0, 0xb1, 0xd3, 0x0, 0xff };
-/* ufc all alphabetic */
-static const unsigned char ufd[] = { 
-  0x00, 0x0, 0x3d, 0x50, 0x0, 
-  0x8f, 0x92, 0x0, 0xc7, 0xf0, 0x0, 0xfb };
-static const unsigned char ufe[] = { 
-  0x70, 
-  0x0, 0x74, 0x76, 0x0, 0xfc };
-static const unsigned char uff[] = { 
-  0x10, 0x0, 0x19, 
-  0x21, 0x0, 0x3a, 0x41, 0x0, 0x5a, 0x66, 0x0, 
-  0xbe, 0xc2, 0x0, 0xc7, 0xca, 0x0, 0xcf, 0xd2, 
-  0x0, 0xd7, 0xda, 0x0, 0xdc };
-static const unsigned char u100[] = { 
-  0x00, 0x0, 0x0b, 0x0d, 0x0, 0x26,
-  0x28, 0x0, 0x3a, 0x3c, 0x3d, 0x3f, 0x0, 0x4d,
-  0x50, 0x0, 0x5d, 0x80, 0x0, 0xfa };
-static const unsigned char u101[] = { 
-  0x40, 0x0, 0x74 };
-static const unsigned char u102[] = { 
-  0x80, 0x0, 0x9c, 0xa0, 0x0, 0xd0 };
-static const unsigned char u103[] = { 
-  0x00, 0x0, 0x1e, 0x30, 0x0, 0x4a,
-  0x80, 0x0, 0x9d, 0xa0, 0x0, 0xc3,
-  0xc8, 0x0, 0xcf, 0xd1, 0x0, 0xd5 };
-static const unsigned char u104[] = { 
-  0x00, 0x0, 0x9d, 0xa0, 0x0, 0xa9 };
-static const unsigned char u108[] = { 
-  0x00, 0x0, 0x05, 0x08, 0x0a, 0x0, 0x35,
-  0x37, 0x38, 0x3c, 0x3f, 0x0, 0x55 };
-static const unsigned char u109[] = {
-  0x00, 0x0, 0x15, 0x20, 0x0, 0x39 };
-static const unsigned char u10a[] = {
-  0x00, 0x10, 0x0, 0x13, 0x15, 0x0, 0x17,
-  0x19, 0x0, 0x33, 0x60, 0x0, 0x7c };
-static const unsigned char u10b[] = {
-  0x00, 0x0, 0x35, 0x40, 0x0, 0x55,
-  0x60, 0x0, 0x72 };
-static const unsigned char u10c[] = {
-  0x00, 0x0, 0x48 };
-static const unsigned char u110[] = {
-  0x83, 0x0, 0xaf };
-/* u120 to u122 all alphabetic */
-static const unsigned char u123[] = { 
-  0x00, 0x0, 0x6e };
-static const unsigned char u124[] = { 
-  0x00, 0x0, 0x62 };
-/* u130 to u133 all alphabetic */
-static const unsigned char u134[] = {
-  0x00, 0x0, 0x2e };
-static const unsigned char u1d4[] = { 
-  0x00, 0x0, 0x54, 0x56, 0x0, 0x9c,
-  0x9e, 0x0, 0x9f, 0xa2, 0xa5, 0x0, 0xa6,
-  0xa9, 0x0, 0xac, 0xae, 0x0, 0xb9, 0xbb,
-  0xbd, 0x0, 0xc3, 0xc5, 0x0, 0xff };
-static const unsigned char u1d5[] = { 
-  0x00, 0x0, 0x05, 0x07, 0x0, 
-  0x0a, 0x0d, 0x0, 0x14, 0x16, 0x0, 0x1c, 0x1e, 
-  0x0, 0x39, 0x3b, 0x0, 0x3e, 0x40, 0x0, 0x44, 
-  0x46, 0x4a, 0x0, 0x50, 0x52, 0x0, 0xff }; 
-static const unsigned char u1d6[] = { 
-  0x00, 0x0, 0xa5, 0xa8, 0x0, 0xc0,
-  0xc2, 0x0, 0xda, 0xdc, 0x0, 0xfa, 
-  0xfc, 0x0, 0xff };
-static const unsigned char u1d7[] = { 
-  0x00, 0x0, 0x14, 0x16, 0x0, 0x34,
-  0x36, 0x0, 0x4e, 0x50, 0x0, 0x6e, 
-  0x70, 0x0, 0x88, 0x8a, 0x0, 0xa8,
-  0xaa, 0x0, 0xc2, 0xc4, 0x0, 0xcb,
-  0xce, 0x0, 0xff };
-static const unsigned char u1f1[] = {
-  0x10, 0x0, 0x2c, 0x31, 0x3d, 0x3f, 0x42, 0x46,
-  0x57, 0x5f, 0x79, 0x7b, 0x7c, 0x7f, 0x8a };
-/* u200 to u2a5 all alphabetic */
-static const unsigned char u2a6[] = { 
-  0x00, 0x0, 0xd6 };
-/* u2a7 to u2b6 all alphabetic */
-static const unsigned char u2b7[] = {
-  0x00, 0x0, 0x34 };
-/* u2f8 to u2f9 all alphabetic */
-static const unsigned char u2fa[] = { 
-  0x00, 0x0, 0x1d };
diff --git a/newlib/libc/ctype/utf8print.h b/newlib/libc/ctype/utf8print.h
deleted file mode 100644
index abeb81c..0000000
--- a/newlib/libc/ctype/utf8print.h
+++ /dev/null
@@ -1,389 +0,0 @@
-/* Copyright (c) 2002 Red Hat Incorporated.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are met:
-
-     Redistributions of source code must retain the above copyright
-     notice, this list of conditions and the following disclaimer.
-
-     Redistributions in binary form must reproduce the above copyright
-     notice, this list of conditions and the following disclaimer in the
-     documentation and/or other materials provided with the distribution.
-
-     The name of Red Hat Incorporated may not be used to endorse
-     or promote products derived from this software without specific
-     prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED.  IN NO EVENT SHALL RED HAT INCORPORATED BE LIABLE FOR ANY
-   DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-   (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-   LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-   ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS   
-   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-/* Generated using UnicodeData.txt 5.2 */
-
-/* Expression used to filter out the characters for the below tables:
-
-   awk -F\; \
-   '{ \
-     VAL = strtonum (sprintf("0x%s", $1)); \
-     # All valid characters except from categories Cc (C0 or C1 control code), \
-     # Cs (Surrogates), Zl (Line separator), and Zp (Paragraph separator).\
-     # \
-     # Before running this test, make sure to expand all Unicode blocks \
-     # which are just marked by their first and last character! \
-     # \
-     if (!match($3, "^C[cs]") && !match($3, "^Z[lp]")) \
-       print $1; \
-   }' UnicodeData.txt
-*/
-static const unsigned char u0[] = {
-  0x20, 0x0, 0x7e, 0xa0, 0x0, 0xff };
-/* u1 is all-print */
-/* u2 is all-print */
-static const unsigned char u3[] = { 
-  0x00, 0x0, 0x77, 0x7a, 0x0, 0x7e,
-  0x84, 0x0, 0x8a, 0x8c, 0x8e, 0x0, 
-  0xa1, 0xa3, 0x0, 0xff };
-/* u4 is all-print */
-static const unsigned char u5[] = {
-  0x00, 0x0, 0x25, 0x31, 0x0, 
-  0x56, 0x59, 0x0, 0x5f, 0x61, 0x0, 0x87, 0x89, 
-  0x0, 0x8a, 0x91, 0x0, 0xc7, 0xd0, 0x0, 0xea,
-  0xf0, 0x0, 0xf4 };
-static const unsigned char u6[] = {
-  0x00, 0x0, 0x03, 0x06, 0x0, 0x1b, 0x1e, 0x1f,
-  0x21, 0x0, 0x5e, 0x60, 0x0, 0xff };
-static const unsigned char u7[] = { 
-  0x00, 0x0, 0x0d, 0x0f, 0x0, 0x4a, 0x4d, 0x0, 0xb1,
-  0xc0, 0x0, 0xfa };
-static const unsigned char u8[] = { 
-  0x00, 0x0, 0x2d, 0x30, 0x0, 0x3e, };
-static const unsigned char u9[] = {
-  0x00, 0x0, 0x39, 0x3c, 0x0, 0x4e, 0x50, 0x0, 0x55, 
-  0x58, 0x0, 0x72, 0x79, 0x0, 0x7f, 0x81, 0x0, 0x83,
-  0x85, 0x0, 0x8c, 0x8f, 0x0, 0x90, 0x93, 0x0, 0xa8,
-  0xaa, 0x0, 0xb0, 0xb2, 0xb6, 0x0, 0xb9, 0xbc, 
-  0x0, 0xc4, 0xc7, 0xc8, 0xcb, 0x0, 0xce, 
-  0xd7, 0xdc, 0x0, 0xdd, 0xdf, 0x0, 0xe3, 0xe6, 
-  0x0, 0xfb };
-static const unsigned char ua[] = { 
-  0x01, 0x0, 0x03, 0x05, 0x0, 0x0a, 0x0f, 0x0, 
-  0x10, 0x13, 0x0, 0x28, 0x2a, 0x0, 0x30, 0x32, 
-  0x0, 0x33, 0x35, 0x0, 0x36, 0x38, 0x0, 0x39, 
-  0x3c, 0x3e, 0x0, 0x42, 0x47, 0x0, 0x48, 0x4b, 
-  0x0, 0x4d, 0x51, 0x59, 0x0, 0x5c, 0x5e, 0x66, 0x0, 
-  0x75, 0x81, 0x0, 0x83, 0x85, 0x0, 0x8d,
-  0x8f, 0x0, 0x91, 0x93, 0x0, 0xa8, 0xaa, 0x0, 
-  0xb0, 0xb2, 0x0, 0xb3, 0xb5, 0x0, 0xb9, 0xbc, 
-  0x0, 0xc5, 0xc7, 0x0, 0xc9, 0xcb, 0x0, 0xcd, 
-  0xd0, 0xe0, 0x0, 0xe3, 0xe6, 0x0, 0xef, 0xf1 };
-static const unsigned char ub[] = {
-  0x01, 0x0, 0x03, 
-  0x05, 0x0, 0x0c, 0x0f, 0x0, 0x10, 0x13, 0x0, 
-  0x28, 0x2a, 0x0, 0x30, 0x32, 0x0, 0x33, 0x35, 
-  0x0, 0x39, 0x3c, 0x0, 0x44, 0x47, 0x0, 0x48, 
-  0x4b, 0x0, 0x4d, 0x56, 0x0, 0x57, 0x5c, 0x0, 
-  0x5d, 0x5f, 0x0, 0x63, 0x66, 0x0, 0x71, 0x82, 
-  0x0, 0x83, 0x85, 0x0, 0x8a, 0x8e, 0x0, 0x90, 
-  0x92, 0x0, 0x95, 0x99, 0x0, 0x9a, 0x9c, 0x9e, 
-  0x0, 0x9f, 0xa3, 0x0, 0xa4, 0xa8, 0x0, 0xaa, 
-  0xae, 0x0, 0xb9, 0xbe, 0x0, 
-  0xc2, 0xc6, 0x0, 0xc8, 0xca, 0x0, 0xcd, 0xd0,
-  0xd7, 0xe6, 0xe7, 0x0, 0xfa };
-static const unsigned char uc[] = {
-  0x01, 0x0, 0x03, 0x05, 0x0, 
-  0x0c, 0x0e, 0x0, 0x10, 0x12, 0x0, 0x28, 0x2a, 
-  0x0, 0x33, 0x35, 0x0, 0x39, 0x3d, 0x0, 0x44, 
-  0x46, 0x0, 0x48, 0x4a, 0x0, 0x4d, 0x55, 0x0, 
-  0x56, 0x58, 0x59, 0x60, 0x0, 0x63, 0x66, 0x0, 0x6f,
-  0x78, 0x0, 0x7f, 0x82, 0x83, 0x85, 0x0, 0x8c,
-  0x8e, 0x0, 0x90, 0x92, 0x0, 0xa8, 0xaa, 0x0, 0xb3,
-  0xb5, 0x0, 0xb9, 0xbc, 0x0, 0xc4, 0xc6, 0x0, 0xc8,
-  0xca, 0x0, 0xcd, 0xd5, 0x0, 0xd6, 0xde, 0xe0, 0x0, 
-  0xe3, 0xe6, 0x0, 0xef, 0xf1, 0xf2 };
-static const unsigned char ud[] = {
-  0x02, 0x0, 0x03, 0x05, 
-  0x0, 0x0c, 0x0e, 0x0, 0x10, 0x12, 0x0, 0x28, 
-  0x2a, 0x0, 0x39, 0x3d, 0x0, 0x44, 0x46, 0x0, 
-  0x48, 0x4a, 0x0, 0x4d, 0x57, 0x60, 0x0, 0x63, 
-  0x66, 0x0, 0x75, 0x79, 0x0, 0x7f, 0x82, 0x0, 0x83,
-  0x85, 0x0, 0x96, 0x9a, 0x0, 0xb1, 0xb3, 0x0, 0xbb,
-  0xbd, 0xc0, 0x0, 0xc6, 0xca, 0xcf, 0x0, 0xd4, 0xd6, 
-  0xd8, 0x0, 0xdf, 0xf2, 0x0, 0xf4 };
-static const unsigned char ue[] = {
-  0x01, 0x0, 
-  0x3a, 0x3f, 0x0, 0x5b, 0x81, 0x0, 0x82, 0x84, 
-  0x87, 0x0, 0x88, 0x8a, 0x8d, 0x94, 0x0, 0x97, 
-  0x99, 0x0, 0x9f, 0xa1, 0x0, 0xa3, 0xa5, 0xa7, 
-  0xaa, 0x0, 0xab, 0xad, 0x0, 0xb9, 0xbb, 0x0, 
-  0xbd, 0xc0, 0x0, 0xc4, 0xc6, 0xc8, 0x0, 0xcd, 
-  0xd0, 0x0, 0xd9, 0xdc, 0x0, 0xdd };
-static const unsigned char uf[] = {
-  0x00, 0x0, 0x47, 0x49, 0x0, 0x6c,
-  0x71, 0x0, 0x8b, 0x90, 0x0, 0x97,
-  0x99, 0x0, 0xbc, 0xbe, 0x0, 0xcc, 
-  0xce, 0x0, 0xd8 };
-static const unsigned char u10[] = {
-  0x00, 0x0, 0xc5, 0xd0, 0x0, 0xfc };
-/* u11 is all-print */
-static const unsigned char u12[] = {
-  0x00, 0x0, 0x48, 0x4a, 0x0, 0x4d, 0x50, 0x0, 0x56, 
-  0x58, 0x5a, 0x0, 0x5d, 0x60, 0x0, 0x88, 
-  0x8a, 0x0, 0x8d, 0x90, 0x0, 0xb0, 0xb2, 
-  0x0, 0xb5, 0xb8, 0x0, 0xbe, 0xc0, 0xc2, 0x0, 
-  0xc5, 0xc8, 0x0, 0xd6, 0xd8, 0x0, 0xff };
-static const unsigned char u13[] = {
-  0x00, 0x0, 0x10, 0x12, 0x0, 0x15,
-  0x18, 0x0, 0x5a, 0x5f, 0x0, 0x7c,
-  0x80, 0x0, 0x99, 0xa0, 0x0, 0xf4 };
-/* u14 is all-print */
-/* u15 is all-print */
-static const unsigned char u16[] = {
-  0x00, 0x0, 0x9c, 0xa0, 0x0, 0xf0 };
-static const unsigned char u17[] = {
-  0x00, 0x0, 0x0c, 0x0e, 0x0, 0x14, 0x20, 
-  0x0, 0x36, 0x40, 0x0, 0x53, 0x60, 0x0, 0x6c, 
-  0x6e, 0x0, 0x70, 0x72, 0x0, 0x73, 0x80, 0x0, 
-  0xdd, 0xe0, 0x0, 0xe9, 0xf0, 0x0, 0xf9 };
-static const unsigned char u18[] = {
-  0x00, 0x0, 0x0e, 0x10, 
-  0x0, 0x19, 0x20, 0x0, 0x77, 0x80, 0x0, 0xaa,
-  0xb0, 0x0, 0xf5 };
-static const unsigned char u19[] = {
-  0x00, 0x0, 0x1c, 0x20, 0x0, 0x2b,
-  0x30, 0x0, 0x3b, 0x40, 0x44, 0x0, 0x6d,
-  0x70, 0x0, 0x74, 0x80, 0x0, 0xab,
-  0xb0, 0x0, 0xc9, 0xd0, 0x0, 0xda,
-  0xde, 0x0, 0xff };
-static const unsigned char u1a[] = {
-  0x00, 0x0, 0x1b, 0x1e, 0x0, 0x5e,
-  0x60, 0x0, 0x7c, 0x7f, 0x0, 0x89,
-  0x90, 0x0, 0x99, 0xa0, 0x0, 0xad };
-static const unsigned char u1b[] = {
-  0x00, 0x0, 0x4b, 0x50, 0x0, 0x7c,
-  0x80, 0x0, 0xaa, 0xae, 0x0, 0xb9 };
-static const unsigned char u1c[] = {
-  0x00, 0x0, 0x37, 0x3b, 0x0, 0x49,
-  0x4d, 0x0, 0x7f, 0xd0, 0x0, 0xf2 };
-static const unsigned char u1d[] = { 
-  0x00, 0x0, 0xe6, 0xfd, 0x0, 0xff };
-/* u1e is all-print */
-static const unsigned char u1f[] = {
-  0x00, 0x0, 
-  0x15, 0x18, 0x0, 0x1d, 0x20, 0x0, 0x45, 0x48, 
-  0x0, 0x4d, 0x50, 0x0, 0x57, 0x59, 0x5b, 0x5d, 
-  0x5f, 0x0, 0x7d, 0x80, 0x0, 0xb4, 0xb6, 0x0, 
-  0xc4, 0xc6, 0x0, 0xd3, 0xd6, 0x0, 0xdb, 0xdd, 
-  0x0, 0xef, 0xf2, 0x0, 0xf4, 0xf6, 0x0, 0xfe };
-static const unsigned char u20[] = { 
-  0x00, 0x0, 0x27, 0x2a, 0x0, 0x64,
-  0x6a, 0x0, 0x71, 0x74, 0x0, 0x8e, 
-  0x90, 0x0, 0x94, 0xa0, 0x0, 0xb8,
-  0xd0, 0x0, 0xf0 };
-static const unsigned char u21[] = {
-  0x00, 0x0, 0x89, 0x90, 0x0, 0xff };
-/* u22 is all-print */
-static const unsigned char u23[] = {
-  0x00, 0x0, 0xe8 };
-static const unsigned char u24[] = {
-  0x00, 0x0, 0x26, 0x40, 0x0, 0x4a, 
-  0x60, 0x0, 0xff };
-/* u25 is all-print */
-static const unsigned char u26[] = {
-  0x00, 0x0, 0xcd, 0xcf, 0x0, 0xe1,
-  0xe3, 0xe8, 0x0, 0xff };
-static const unsigned char u27[] = {
-  0x01, 0x0, 0x04, 0x06, 0x0, 0x09,
-  0x0c, 0x0, 0x27, 0x29, 0x0, 0x4b, 0x4d,
-  0x4f, 0x0, 0x52, 0x56, 0x0, 0x5e,
-  0x61, 0x0, 0x94, 0x98, 0x0, 0xaf,
-  0xb1, 0x0, 0xbe, 0xc0, 0x0, 0xca, 0xcc,
-  0xd0, 0x0, 0xff };
-/* u28 to u2a are all-print */
-static const unsigned char u2b[] = {
-  0x00, 0x0, 0x4c, 0x50, 0x0, 0x59 };
-static const unsigned char u2c[] = {
-  0x00, 0x0, 0x2e, 0x30, 0x0, 0x5e,
-  0x60, 0x0, 0xf1, 0xf9, 0x0, 0xff };
-static const unsigned char u2d[] = {
-  0x00, 0x0, 0x25, 0x30, 0x0, 0x65, 0x6f,
-  0x80, 0x0, 0x96, 0xa0, 0x0, 0xa6,
-  0xa8, 0x0, 0xae, 0xb0, 0x0, 0xb6,
-  0xb8, 0x0, 0xbe, 0xc0, 0x0, 0xc6,
-  0xc8, 0x0, 0xce, 0xd0, 0x0, 0xd6,
-  0xd8, 0x0, 0xde, 0xe0, 0x0, 0xff };
-static const unsigned char u2e[] = {
-  0x00, 0x0, 0x31, 0x80, 0x0, 0x99,
-  0x9b, 0x0, 0xf3 };
-static const unsigned char u2f[] = { 
-  0x00, 0x0, 0xd5, 0xf0, 0x0, 0xfb };
-static const unsigned char u30[] = {
-  0x00, 0x0, 
-  0x3f, 0x41, 0x0, 0x96, 0x99, 0x0, 0xff };
-static const unsigned char u31[] = {
-  0x05, 0x0, 0x2d, 0x31, 0x0, 0x8e,
-  0x90, 0x0, 0xb7, 0xc0, 0x0, 0xe3,
-  0xf0, 0x0, 0xff };
-static const unsigned char u32[] = {
-  0x00, 0x0, 0x1e, 0x20, 0x0, 0xfe };
-/* u33 to u4c is all-print */
-static const unsigned char u4d[] = { 
-  0x00, 0x0, 0xb5, 0xc0, 0x0, 0xff };
-/* u4e to u9e is all-print */
-static const unsigned char u9f[] = {
-  0x00, 0x0, 0xcb };
-/* ua0 to ua3 is all-print */
-static const unsigned char ua4[] = {
-  0x00, 0x0, 0x8c, 0x90, 0x0, 0xc6,
-  0xd0, 0x0, 0xff };
-/* ua5 is all-print */
-static const unsigned char ua6[] = {
-  0x00, 0x0, 0x2b, 0x40, 0x0, 0x5f,
-  0x62, 0x0, 0x73, 0x7c, 0x0, 0x97,
-  0xa0, 0x0, 0xf7 };
-static const unsigned char ua7[] = {
-  0x00, 0x0, 0x8c, 0xfb, 0x0, 0xff };
-static const unsigned char ua8[] = {
-  0x00, 0x0, 0x2b, 0x30, 0x0, 0x39,
-  0x40, 0x0, 0x77, 0x80, 0x0, 0xc4,
-  0xce, 0x0, 0xd9, 0xe0, 0x0, 0xfb };
-static const unsigned char ua9[] = {
-  0x00, 0x0, 0x53, 0x5f, 0x0, 0x7c,
-  0x80, 0x0, 0xcd, 0xcf, 0x0, 0xd9,
-  0xde, 0xdf };
-static const unsigned char uaa[] = {
-  0x00, 0x0, 0x36, 0x40, 0x0, 0x4d,
-  0x50, 0x0, 0x59, 0x5c, 0x0, 0x7b,
-  0x80, 0x0, 0xc2, 0xdb, 0x0, 0xdf };
-static const unsigned char uab[] = {
-  0xc0, 0x0, 0xed, 0xf0, 0x0, 0xf9 };
-/* uac to ud6 is all-print */
-static const unsigned char ud7[] = {
-  0x00, 0x0, 0xa3, 0xb0, 0x0, 0xc6,
-  0xcb, 0x0, 0xfb };
-/* ud8 to udf are UTF-16 surrogates, non-printable */
-/* ue0 to uf9 is all-print */
-static const unsigned char ufa[] = {
-  0x00, 0x0, 0x2d, 0x30, 0x0, 0x6d,
-  0x70, 0x0, 0xd9 };
-static const unsigned char ufb[] = {
-  0x00, 0x0, 0x06, 0x13, 0x0, 0x17,
-  0x1d, 0x0, 0x36, 0x38, 0x0, 0x3c,
-  0x3e, 0x40, 0x41, 0x43, 0x44, 
-  0x46, 0x0, 0xb1, 0xd3, 0x0, 0xff };
-/* ufc is all-print */
-static const unsigned char ufd[] = {
-  0x00, 0x0, 0x3f, 0x50, 0x0, 0x8f,
-  0x92, 0x0, 0xc7, 0xf0, 0x0, 0xfd };
-static const unsigned char ufe[] = {
-  0x00, 0x0, 0x19, 0x20, 0x0, 0x26,
-  0x30, 0x0, 0x52, 0x54, 0x0, 0x66,
-  0x68, 0x0, 0x6b, 0x70, 0x0, 0x74,
-  0x76, 0x0, 0xfc, 0xff };
-static const unsigned char uff[] = {
-  0x01, 0x0, 0xbe, 0xc2, 0x0, 0xc7, 0xca, 0x0, 
-  0xcf, 0xd2, 0x0, 0xd7, 0xda, 0x0, 0xdc, 0xe0, 
-  0x0, 0xe6, 0xe8, 0x0, 0xee, 0xf9, 0x0, 0xfd }; 
-static const unsigned char u100[] = {
-  0x00, 0x0, 0x0b, 0x0d, 0x0, 0x26,
-  0x28, 0x0, 0x3a, 0x3c, 0x3d, 0x3f, 0x0, 0x4d,
-  0x50, 0x0, 0x5d, 0x80, 0x0, 0xfa };
-static const unsigned char u101[] = {
-  0x00, 0x0, 0x02, 0x07, 0x0, 0x33,
-  0x37, 0x0, 0x8a, 0x90, 0x0, 0x9b,
-  0xd0, 0x0, 0xfd };
-static const unsigned char u102[] = {
-  0x80, 0x0, 0x9c, 0xa0, 0x0, 0xd0 [...]

[diff truncated at 100000 bytes]


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2018-03-12 10:40 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-03-12 10:40 [newlib-cygwin] use generated character data Corinna Vinschen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).