[newlib-cygwin/main] Cygwin: replace regex with latest verbatim FreeBSD version

public inbox for cygwin-cvs@sourceware.org
help / color / mirror / Atom feed

* [newlib-cygwin/main] Cygwin: replace regex with latest verbatim FreeBSD version
@ 2023-03-16 12:54 Corinna Vinschen
  0 siblings, 0 replies; only message in thread
From: Corinna Vinschen @ 2023-03-16 12:54 UTC (permalink / raw)
  To: cygwin-cvs

https://sourceware.org/git/gitweb.cgi?p=newlib-cygwin.git;h=24f34edc2a5dc09687c0efe7973b0db4a959dbb1

commit 24f34edc2a5dc09687c0efe7973b0db4a959dbb1
Author:     Corinna Vinschen <corinna@vinschen.de>
AuthorDate: Thu Mar 16 11:12:08 2023 +0100
Commit:     Corinna Vinschen <corinna@vinschen.de>
CommitDate: Thu Mar 16 11:12:08 2023 +0100

    Cygwin: replace regex with latest verbatim FreeBSD version
    
    Signed-off-by: Corinna Vinschen <corinna@vinschen.de>

Diff:
---
 winsup/cygwin/include/regex.h  |   20 +-
 winsup/cygwin/regex/COPYRIGHT  |    4 +
 winsup/cygwin/regex/cname.h    |    8 +-
 winsup/cygwin/regex/engine.c   |  423 ++++++++--------
 winsup/cygwin/regex/regcomp.c  | 1058 +++++++++++++++++++++++++++-------------
 winsup/cygwin/regex/regerror.c |   26 +-
 winsup/cygwin/regex/regex.3    |   91 +++-
 winsup/cygwin/regex/regex.7    |   22 +-
 winsup/cygwin/regex/regex2.h   |   60 ++-
 winsup/cygwin/regex/regexec.c  |   23 +-
 winsup/cygwin/regex/regfree.c  |    8 +-
 winsup/cygwin/regex/utils.h    |   10 +-
 12 files changed, 1093 insertions(+), 660 deletions(-)

diff --git a/winsup/cygwin/include/regex.h b/winsup/cygwin/include/regex.h
index 02c617e506cd..3bea3df4f3d4 100644
--- a/winsup/cygwin/include/regex.h
+++ b/winsup/cygwin/include/regex.h
@@ -1,4 +1,6 @@
 /*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
  * Copyright (c) 1992 Henry Spencer.
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -14,7 +16,7 @@
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
@@ -38,32 +40,20 @@
 #define	_REGEX_H_
 
 #include <sys/cdefs.h>
-#include <_ansi.h>
 #include <sys/_types.h>
 
 /* types */
-#ifdef __CYGWIN__
-typedef	_off_t		regoff_t;
-
-#define __need_size_t
-#include <stddef.h>
-#else /* !__CYGWIN__ */
 typedef	__off_t		regoff_t;
 
 #ifndef _SIZE_T_DECLARED
 typedef	__size_t	size_t;
 #define	_SIZE_T_DECLARED
 #endif
-#endif /* !__CYGWIN__ */
 
 typedef struct {
 	int re_magic;
 	size_t re_nsub;		/* number of parenthesized subexpressions */
-#ifdef __CYGWIN__
 	const char *re_endp;	/* end pointer for REG_PEND */
-#else
-	__const char *re_endp;	/* end pointer for REG_PEND */
-#endif
 	struct re_guts *re_g;	/* none of your business :-) */
 } regex_t;
 
@@ -81,12 +71,10 @@ typedef struct {
 #define	REG_NOSPEC	0020
 #define	REG_PEND	0040
 #define	REG_DUMP	0200
+#define	REG_POSIX	0400	/* only POSIX-compliant regex (libregex) */
 
 /* regerror() flags */
 #define	REG_ENOSYS	(-1)
-#ifdef __CYGWIN__
-#define	REG_NOERROR	 0	/* GNU extension */
-#endif
 #define	REG_NOMATCH	 1
 #define	REG_BADPAT	 2
 #define	REG_ECOLLATE	 3
diff --git a/winsup/cygwin/regex/COPYRIGHT b/winsup/cygwin/regex/COPYRIGHT
index dc823b1245a2..574f6bcec6c7 100644
--- a/winsup/cygwin/regex/COPYRIGHT
+++ b/winsup/cygwin/regex/COPYRIGHT
@@ -32,6 +32,10 @@ to the following restrictions:
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
diff --git a/winsup/cygwin/regex/cname.h b/winsup/cygwin/regex/cname.h
index c5c33f02c11d..3419181aac97 100644
--- a/winsup/cygwin/regex/cname.h
+++ b/winsup/cygwin/regex/cname.h
@@ -1,4 +1,6 @@
 /*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
  * Copyright (c) 1992, 1993, 1994 Henry Spencer.
  * Copyright (c) 1992, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
@@ -14,7 +16,7 @@
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
@@ -31,7 +33,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)cname.h	8.3 (Berkeley) 3/20/94
- * $FreeBSD: src/lib/libc/regex/cname.h,v 1.4 2007/01/09 00:28:04 imp Exp $
+ * $FreeBSD$
  */
 
 /* character-name table */
@@ -108,7 +110,7 @@ static struct cname {
 	{"four",		'4'},
 	{"five",		'5'},
 	{"six",			'6'},
-	{"seven",		'7'},
+	{"seven",      		'7'},
 	{"eight",		'8'},
 	{"nine",		'9'},
 	{"colon",		':'},
diff --git a/winsup/cygwin/regex/engine.c b/winsup/cygwin/regex/engine.c
index 94e418505f6d..bb40018c07e1 100644
--- a/winsup/cygwin/regex/engine.c
+++ b/winsup/cygwin/regex/engine.c
@@ -1,4 +1,6 @@
 /*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
  * Copyright (c) 1992, 1993, 1994 Henry Spencer.
  * Copyright (c) 1992, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
@@ -14,7 +16,7 @@
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
@@ -34,7 +36,9 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/lib/libc/regex/engine.c,v 1.23 2009/09/16 06:32:23 dds Exp $");
+__FBSDID("$FreeBSD$");
+
+#include <stdbool.h>
 
 /*
  * The matching engine and friends.  This file is #included by regexec.c
@@ -44,9 +48,9 @@ __FBSDID("$FreeBSD: src/lib/libc/regex/engine.c,v 1.23 2009/09/16 06:32:23 dds E
  */
 
 #ifdef SNAMES
+#define	stepback sstepback
 #define	matcher	smatcher
-#define	fast	sfast
-#define	slow	sslow
+#define	walk	swalk
 #define	dissect	sdissect
 #define	backref	sbackref
 #define	step	sstep
@@ -55,9 +59,9 @@ __FBSDID("$FreeBSD: src/lib/libc/regex/engine.c,v 1.23 2009/09/16 06:32:23 dds E
 #define	match	smat
 #endif
 #ifdef LNAMES
+#define	stepback lstepback
 #define	matcher	lmatcher
-#define	fast	lfast
-#define	slow	lslow
+#define	walk	lwalk
 #define	dissect	ldissect
 #define	backref	lbackref
 #define	step	lstep
@@ -66,9 +70,9 @@ __FBSDID("$FreeBSD: src/lib/libc/regex/engine.c,v 1.23 2009/09/16 06:32:23 dds E
 #define	match	lmat
 #endif
 #ifdef MNAMES
+#define	stepback mstepback
 #define	matcher	mmatcher
-#define	fast	mfast
-#define	slow	mslow
+#define	walk	mwalk
 #define	dissect	mdissect
 #define	backref	mbackref
 #define	step	mstep
@@ -104,9 +108,8 @@ extern "C" {
 static int matcher(struct re_guts *g, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags);
 static const char *dissect(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
 static const char *backref(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst, sopno lev, int);
-static const char *fast(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
-static const char *slow(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
-static states step(struct re_guts *g, sopno start, sopno stop, states bef, wint_t ch, states aft);
+static const char *walk(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst, bool fast);
+static states step(struct re_guts *g, sopno start, sopno stop, states bef, wint_t ch, states aft, int sflags);
 #define MAX_RECURSION	100
 #define	BOL	(OUT-1)
 #define	EOL	(BOL-1)
@@ -115,10 +118,12 @@ static states step(struct re_guts *g, sopno start, sopno stop, states bef, wint_
 #define	BOW	(BOL-4)
 #define	EOW	(BOL-5)
 #define	BADCHAR	(BOL-6)
-/* When using wint_t, which is defined as unsigned int on BSD,
-   as well as on Cygwin or Linux, the NONCHAR test is broken without
-   the below cast.  I'm wondering how this is supposed to work at all... */
-#define	NONCHAR(c)	((int)(c) <= OUT)
+#define	NWBND	(BOL-7)
+#define	NONCHAR(c)	((c) <= OUT)
+/* sflags */
+#define	SBOS	0x0001
+#define	SEOS	0x0002
+
 #ifdef REDEBUG
 static void print(struct match *m, const char *caption, states st, int ch, FILE *d);
 #endif
@@ -144,6 +149,39 @@ static const char *pchar(int ch);
 #define	NOTE(s)	/* nothing */
 #endif
 
+/*
+ * Given a multibyte string pointed to by start, step back nchar characters
+ * from current position pointed to by cur.
+ */
+static const char *
+stepback(const char *start, const char *cur, int nchar)
+{
+	const char *ret;
+	int wc, mbc;
+	mbstate_t mbs;
+	size_t clen;
+
+	if (MB_CUR_MAX == 1)
+		return ((cur - nchar) > start ? cur - nchar : NULL);
+
+	ret = cur;
+	for (wc = nchar; wc > 0; wc--) {
+		for (mbc = 1; mbc <= MB_CUR_MAX; mbc++) {
+			if ((ret - mbc) < start)
+				return (NULL);
+			memset(&mbs, 0, sizeof(mbs));
+			clen = mbrtowc(NULL, ret - mbc, mbc, &mbs);
+			if (clen != (size_t)-1 && clen != (size_t)-2)
+				break;
+		}
+		if (mbc > MB_CUR_MAX)
+			return (NULL);
+		ret -= mbc;
+	}
+
+	return (ret);
+}
+
 /*
  - matcher - the actual matching engine
  == static int matcher(struct re_guts *g, const char *string, \
@@ -157,7 +195,7 @@ matcher(struct re_guts *g,
 	int eflags)
 {
 	const char *endp;
-	int i;
+	size_t i;
 	struct match mv;
 	struct match *m = &mv;
 	const char *dp = NULL;
@@ -247,17 +285,19 @@ matcher(struct re_guts *g,
 	ZAPSTATE(&m->mbs);
 
 	/* Adjust start according to moffset, to speed things up */
-#ifndef MNAMES
-	/* The code evaluating moffset doesn't seem to work right
-	   in the multibyte case. */
-	if (g->moffset > -1)
-		start = ((dp - g->moffset) < start) ? start : dp - g->moffset;
-#endif
+	if (dp != NULL && g->moffset > -1) {
+		const char *nstart;
+
+		nstart = stepback(start, dp, g->moffset);
+		if (nstart != NULL)
+			start = nstart;
+	}
+
 	SP("mloop", m->st, *start);
 
 	/* this loop does only one repetition except for backrefs */
 	for (;;) {
-		endp = fast(m, start, stop, gf, gl);
+		endp = walk(m, start, stop, gf, gl, true);
 		if (endp == NULL) {		/* a miss */
 			if (m->pmatch != NULL)
 				free((char *)m->pmatch);
@@ -273,7 +313,7 @@ matcher(struct re_guts *g,
 		assert(m->coldp != NULL);
 		for (;;) {
 			NOTE("finding start");
-			endp = slow(m, m->coldp, stop, gf, gl);
+			endp = walk(m, m->coldp, stop, gf, gl, false);
 			if (endp != NULL)
 				break;
 			assert(m->coldp < m->endp);
@@ -318,7 +358,7 @@ matcher(struct re_guts *g,
 			if (dp != NULL || endp <= m->coldp)
 				break;		/* defeat */
 			NOTE("backoff");
-			endp = slow(m, m->coldp, endp-1, gf, gl);
+			endp = walk(m, m->coldp, endp-1, gf, gl, false);
 			if (endp == NULL)
 				break;		/* defeat */
 			/* try it on a shorter possibility */
@@ -391,7 +431,7 @@ dissect(struct match *m,
 	const char *ssp;	/* start of string matched by subsubRE */
 	const char *sep;	/* end of string matched by subsubRE */
 	const char *oldssp;	/* previous ssp */
-	const char *dp __attribute__ ((unused));
+	const char *dp __unused;
 
 	AT("diss", start, stop, startst, stopst);
 	sp = start;
@@ -404,7 +444,7 @@ dissect(struct match *m,
 			es += OPND(m->g->strip[es]);
 			break;
 		case OCH_:
-			while (OP(m->g->strip[es]) != O_CH)
+			while (OP(m->g->strip[es]) != (sop)O_CH)
 				es += OPND(m->g->strip[es]);
 			break;
 		}
@@ -422,6 +462,10 @@ dissect(struct match *m,
 		case OEOL:
 		case OBOW:
 		case OEOW:
+		case OBOS:
+		case OEOS:
+		case OWBND:
+		case ONWBND:
 			break;
 		case OANY:
 		case OANYOF:
@@ -436,10 +480,10 @@ dissect(struct match *m,
 			stp = stop;
 			for (;;) {
 				/* how long could this one be? */
-				rest = slow(m, sp, stp, ss, es);
+				rest = walk(m, sp, stp, ss, es, false);
 				assert(rest != NULL);	/* it did match */
 				/* could the rest match the rest? */
-				tail = slow(m, rest, stop, es, stopst);
+				tail = walk(m, rest, stop, es, stopst, false);
 				if (tail == stop)
 					break;		/* yes! */
 				/* no -- try a shorter match for this one */
@@ -449,7 +493,7 @@ dissect(struct match *m,
 			ssub = ss + 1;
 			esub = es - 1;
 			/* did innards match? */
-			if (slow(m, sp, rest, ssub, esub) != NULL) {
+			if (walk(m, sp, rest, ssub, esub, false) != NULL) {
 				dp = dissect(m, sp, rest, ssub, esub);
 				assert(dp == rest);
 			} else		/* no */
@@ -460,10 +504,10 @@ dissect(struct match *m,
 			stp = stop;
 			for (;;) {
 				/* how long could this one be? */
-				rest = slow(m, sp, stp, ss, es);
+				rest = walk(m, sp, stp, ss, es, false);
 				assert(rest != NULL);	/* it did match */
 				/* could the rest match the rest? */
-				tail = slow(m, rest, stop, es, stopst);
+				tail = walk(m, rest, stop, es, stopst, false);
 				if (tail == stop)
 					break;		/* yes! */
 				/* no -- try a shorter match for this one */
@@ -475,7 +519,7 @@ dissect(struct match *m,
 			ssp = sp;
 			oldssp = ssp;
 			for (;;) {	/* find last match of innards */
-				sep = slow(m, ssp, rest, ssub, esub);
+				sep = walk(m, ssp, rest, ssub, esub, false);
 				if (sep == NULL || sep == ssp)
 					break;	/* failed or matched null */
 				oldssp = ssp;	/* on to next try */
@@ -487,7 +531,7 @@ dissect(struct match *m,
 				ssp = oldssp;
 			}
 			assert(sep == rest);	/* must exhaust substring */
-			assert(slow(m, ssp, sep, ssub, esub) == rest);
+			assert(walk(m, ssp, sep, ssub, esub, false) == rest);
 			dp = dissect(m, ssp, sep, ssub, esub);
 			assert(dp == sep);
 			sp = rest;
@@ -496,10 +540,10 @@ dissect(struct match *m,
 			stp = stop;
 			for (;;) {
 				/* how long could this one be? */
-				rest = slow(m, sp, stp, ss, es);
+				rest = walk(m, sp, stp, ss, es, false);
 				assert(rest != NULL);	/* it did match */
 				/* could the rest match the rest? */
-				tail = slow(m, rest, stop, es, stopst);
+				tail = walk(m, rest, stop, es, stopst, false);
 				if (tail == stop)
 					break;		/* yes! */
 				/* no -- try a shorter match for this one */
@@ -510,7 +554,7 @@ dissect(struct match *m,
 			esub = ss + OPND(m->g->strip[ss]) - 1;
 			assert(OP(m->g->strip[esub]) == OOR1);
 			for (;;) {	/* find first matching branch */
-				if (slow(m, sp, rest, ssub, esub) == rest)
+				if (walk(m, sp, rest, ssub, esub, false) == rest)
 					break;	/* it matched all of it */
 				/* that one missed, try next one */
 				assert(OP(m->g->strip[esub]) == OOR1);
@@ -518,7 +562,7 @@ dissect(struct match *m,
 				assert(OP(m->g->strip[esub]) == OOR2);
 				ssub = esub + 1;
 				esub += OPND(m->g->strip[esub]);
-				if (OP(m->g->strip[esub]) == OOR2)
+				if (OP(m->g->strip[esub]) == (sop)OOR2)
 					esub--;
 				else
 					assert(OP(m->g->strip[esub]) == O_CH);
@@ -554,6 +598,17 @@ dissect(struct match *m,
 	return(sp);
 }
 
+#define	ISBOW(m, sp)					\
+    (sp < m->endp && ISWORD(*sp) &&			\
+    ((sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||	\
+    (sp > m->offp && !ISWORD(*(sp-1)))))
+#define	ISEOW(m, sp)					\
+    (((sp == m->endp && !(m->eflags&REG_NOTEOL)) ||	\
+    (sp < m->endp && *sp == '\n' &&			\
+    (m->g->cflags&REG_NEWLINE)) ||			\
+    (sp < m->endp && !ISWORD(*sp)) ) &&			\
+    (sp > m->beginp && ISWORD(*(sp-1))))		\
+
 /*
  - backref - figure out what matched what, figuring in back references
  == static const char *backref(struct match *m, const char *start, \
@@ -611,10 +666,22 @@ backref(struct match *m,
 			if (wc == BADCHAR || !CHIN(cs, wc))
 				return(NULL);
 			break;
+		case OBOS:
+			if (sp == m->beginp && (m->eflags & REG_NOTBOL) == 0)
+				{ /* yes */ }
+			else
+				return(NULL);
+			break;
+		case OEOS:
+			if (sp == m->endp && (m->eflags & REG_NOTEOL) == 0)
+				{ /* yes */ }
+			else
+				return(NULL);
+			break;
 		case OBOL:
-			if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
-					(sp < m->endp && *(sp-1) == '\n' &&
-						(m->g->cflags&REG_NEWLINE)) )
+			if ((sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
+			    (sp > m->offp && sp < m->endp &&
+			    *(sp-1) == '\n' && (m->g->cflags&REG_NEWLINE)))
 				{ /* yes */ }
 			else
 				return(NULL);
@@ -627,23 +694,29 @@ backref(struct match *m,
 			else
 				return(NULL);
 			break;
+		case OWBND:
+			if (ISBOW(m, sp) || ISEOW(m, sp))
+				{ /* yes */ }
+			else
+				return(NULL);
+			break;
+		case ONWBND:
+			if (((sp == m->beginp) && !ISWORD(*sp)) ||
+			    (sp == m->endp && !ISWORD(*(sp - 1))))
+				{ /* yes, beginning/end of subject */ }
+			else if (ISWORD(*(sp - 1)) == ISWORD(*sp))
+				{ /* yes, beginning/end of subject */ }
+			else
+				return(NULL);
+			break;
 		case OBOW:
-			if (( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
-					(sp < m->endp && *(sp-1) == '\n' &&
-						(m->g->cflags&REG_NEWLINE)) ||
-					(sp > m->beginp &&
-							!ISWORD(*(sp-1))) ) &&
-					(sp < m->endp && ISWORD(*sp)) )
+			if (ISBOW(m, sp))
 				{ /* yes */ }
 			else
 				return(NULL);
 			break;
 		case OEOW:
-			if (( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
-					(sp < m->endp && *sp == '\n' &&
-						(m->g->cflags&REG_NEWLINE)) ||
-					(sp < m->endp && !ISWORD(*sp)) ) &&
-					(sp > m->beginp && ISWORD(*(sp-1))) )
+			if (ISEOW(m, sp))
 				{ /* yes */ }
 			else
 				return(NULL);
@@ -656,7 +729,7 @@ backref(struct match *m,
 			do {
 				assert(OP(s) == OOR2);
 				ss += OPND(s);
-			} while (OP(s = m->g->strip[ss]) != O_CH);
+			} while (OP(s = m->g->strip[ss]) != (sop)O_CH);
 			/* note that the ss++ gets us past the O_CH */
 			break;
 		default:	/* have to make a choice */
@@ -689,22 +762,19 @@ backref(struct match *m,
 		ssp = m->offp + m->pmatch[i].rm_so;
 		if (memcmp(sp, ssp, len) != 0)
 			return(NULL);
-		while (m->g->strip[ss] != SOP(O_BACK, i))
+		while (m->g->strip[ss] != (sop)SOP(O_BACK, i))
 			ss++;
 		return(backref(m, sp+len, stop, ss+1, stopst, lev, rec));
-		break;
 	case OQUEST_:		/* to null or not */
 		dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
 		if (dp != NULL)
 			return(dp);	/* not */
 		return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec));
-		break;
 	case OPLUS_:
 		assert(m->lastpos != NULL);
 		assert(lev+1 <= m->g->nplus);
 		m->lastpos[lev+1] = sp;
 		return(backref(m, sp, stop, ss+1, stopst, lev+1, rec));
-		break;
 	case O_PLUS:
 		if (sp == m->lastpos[lev])	/* last pass matched null */
 			return(backref(m, sp, stop, ss+1, stopst, lev-1, rec));
@@ -715,7 +785,6 @@ backref(struct match *m,
 			return(backref(m, sp, stop, ss+1, stopst, lev-1, rec));
 		else
 			return(dp);
-		break;
 	case OCH_:		/* find the right one, if any */
 		ssub = ss + 1;
 		esub = ss + OPND(s) - 1;
@@ -725,17 +794,18 @@ backref(struct match *m,
 			if (dp != NULL)
 				return(dp);
 			/* that one missed, try next one */
-			if (OP(m->g->strip[esub]) == O_CH)
+			if (OP(m->g->strip[esub]) == (sop)O_CH)
 				return(NULL);	/* there is none */
 			esub++;
-			assert(OP(m->g->strip[esub]) == OOR2);
+			assert(OP(m->g->strip[esub]) == (sop)OOR2);
 			ssub = esub + 1;
 			esub += OPND(m->g->strip[esub]);
-			if (OP(m->g->strip[esub]) == OOR2)
+			if (OP(m->g->strip[esub]) == (sop)OOR2)
 				esub--;
 			else
 				assert(OP(m->g->strip[esub]) == O_CH);
 		}
+		/* NOTREACHED */
 		break;
 	case OLPAREN:		/* must undo assignment if rest fails */
 		i = OPND(s);
@@ -747,7 +817,6 @@ backref(struct match *m,
 			return(dp);
 		m->pmatch[i].rm_so = offsave;
 		return(NULL);
-		break;
 	case ORPAREN:		/* must undo assignment if rest fails */
 		i = OPND(s);
 		assert(0 < i && i <= m->g->nsub);
@@ -758,7 +827,6 @@ backref(struct match *m,
 			return(dp);
 		m->pmatch[i].rm_eo = offsave;
 		return(NULL);
-		break;
 	default:		/* uh oh */
 		assert(nope);
 		break;
@@ -771,141 +839,36 @@ backref(struct match *m,
 }
 
 /*
- - fast - step through the string at top speed
- == static const char *fast(struct match *m, const char *start, \
- ==	const char *stop, sopno startst, sopno stopst);
+ - walk - step through the string either quickly or slowly
+ == static const char *walk(struct match *m, const char *start, \
+ ==	const char *stop, sopno startst, sopno stopst, bool fast);
  */
-static const char *		/* where tentative match ended, or NULL */
-fast(	struct match *m,
-	const char *start,
-	const char *stop,
-	sopno startst,
-	sopno stopst)
+static const char * /* where it ended, or NULL */
+walk(struct match *m, const char *start, const char *stop, sopno startst,
+	sopno stopst, bool fast)
 {
 	states st = m->st;
 	states fresh = m->fresh;
-	states tmp = m->tmp;
-	const char *p = start;
-	wint_t c;
-	wint_t lastc;		/* previous c */
-	wint_t flagch;
-	int i;
-	const char *coldp;	/* last p after which no match was underway */
-	size_t clen;
-
-	CLEAR(st);
-	SET1(st, startst);
-	SP("fast", st, *p);
-	st = step(m->g, startst, stopst, st, NOTHING, st);
-	ASSIGN(fresh, st);
-	SP("start", st, *p);
-	coldp = NULL;
-	if (start == m->beginp)
-		c = OUT;
-	else {
-		/*
-		 * XXX Wrong if the previous character was multi-byte.
-		 * Newline never is (in encodings supported by FreeBSD),
-		 * so this only breaks the ISWORD tests below.
-		 */
-		c = (uch)*(start - 1);
-	}
-	for (;;) {
-		/* next character */
-		lastc = c;
-		if (p == m->endp) {
-			clen = 0;
-			c = OUT;
-		} else
-			clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR);
-		if (EQ(st, fresh))
-			coldp = p;
-
-		/* is there an EOL and/or BOL between lastc and c? */
-		flagch = '\0';
-		i = 0;
-		if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
-				(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
-			flagch = BOL;
-			i = m->g->nbol;
-		}
-		if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
-				(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
-			flagch = (flagch == BOL) ? BOLEOL : EOL;
-			i += m->g->neol;
-		}
-		if (i != 0) {
-			for (; i > 0; i--)
-				st = step(m->g, startst, stopst, st, flagch, st);
-			SP("boleol", st, c);
-		}
-
-		/* how about a word boundary? */
-		if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
-					(c != OUT && ISWORD(c)) ) {
-			flagch = BOW;
-		}
-		if ( (lastc != OUT && ISWORD(lastc)) &&
-				(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
-			flagch = EOW;
-		}
-		if (flagch == BOW || flagch == EOW) {
-			st = step(m->g, startst, stopst, st, flagch, st);
-			SP("boweow", st, c);
-		}
-
-		/* are we done? */
-		if (ISSET(st, stopst) || p == stop || clen > stop - p)
-			break;		/* NOTE BREAK OUT */
-
-		/* no, we must deal with this character */
-		ASSIGN(tmp, st);
-		ASSIGN(st, fresh);
-		assert(c != OUT);
-		st = step(m->g, startst, stopst, tmp, c, st);
-		SP("aft", st, c);
-		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
-		p += clen;
-	}
-
-	assert(coldp != NULL);
-	m->coldp = coldp;
-	if (ISSET(st, stopst))
-		return(p+XMBRTOWC(NULL, p, stop - p, &m->mbs, 0));
-	else
-		return(NULL);
-}
-
-/*
- - slow - step through the string more deliberately
- == static const char *slow(struct match *m, const char *start, \
- ==	const char *stop, sopno startst, sopno stopst);
- */
-static const char *		/* where it ended */
-slow(	struct match *m,
-	const char *start,
-	const char *stop,
-	sopno startst,
-	sopno stopst)
-{
-	states st = m->st;
 	states empty = m->empty;
 	states tmp = m->tmp;
 	const char *p = start;
 	wint_t c;
 	wint_t lastc;		/* previous c */
 	wint_t flagch;
-	int i;
+	int i, sflags;
 	const char *matchp;	/* last p at which a match ended */
 	size_t clen;
 
+	sflags = 0;
 	AT("slow", start, stop, startst, stopst);
 	CLEAR(st);
 	SET1(st, startst);
 	SP("sstart", st, *p);
-	st = step(m->g, startst, stopst, st, NOTHING, st);
+	st = step(m->g, startst, stopst, st, NOTHING, st, sflags);
+	if (fast)
+		ASSIGN(fresh, st);
 	matchp = NULL;
-	if (start == m->beginp)
+	if (start == m->offp || (start == m->beginp && !(m->eflags&REG_NOTBOL)))
 		c = OUT;
 	else {
 		/*
@@ -918,12 +881,16 @@ slow(	struct match *m,
 	for (;;) {
 		/* next character */
 		lastc = c;
+		sflags = 0;
 		if (p == m->endp) {
 			c = OUT;
 			clen = 0;
 		} else
 			clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR);
 
+		if (fast && EQ(st, fresh))
+			matchp = p;
+
 		/* is there an EOL and/or BOL between lastc and c? */
 		flagch = '\0';
 		i = 0;
@@ -937,9 +904,20 @@ slow(	struct match *m,
 			flagch = (flagch == BOL) ? BOLEOL : EOL;
 			i += m->g->neol;
 		}
+		if (lastc == OUT && (m->eflags & REG_NOTBOL) == 0) {
+			sflags |= SBOS;
+			/* Step one more for BOS. */
+			i++;
+		}
+		if (c == OUT && (m->eflags & REG_NOTEOL) == 0) {
+			sflags |= SEOS;
+			/* Step one more for EOS. */
+			i++;
+		}
 		if (i != 0) {
 			for (; i > 0; i--)
-				st = step(m->g, startst, stopst, st, flagch, st);
+				st = step(m->g, startst, stopst, st, flagch, st,
+				    sflags);
 			SP("sboleol", st, c);
 		}
 
@@ -953,30 +931,56 @@ slow(	struct match *m,
 			flagch = EOW;
 		}
 		if (flagch == BOW || flagch == EOW) {
-			st = step(m->g, startst, stopst, st, flagch, st);
+			st = step(m->g, startst, stopst, st, flagch, st, sflags);
 			SP("sboweow", st, c);
 		}
+		if (lastc != OUT && c != OUT &&
+		    ISWORD(lastc) == ISWORD(c)) {
+			flagch = NWBND;
+		} else if ((lastc == OUT && !ISWORD(c)) ||
+		    (c == OUT && !ISWORD(lastc))) {
+			flagch = NWBND;
+		}
+		if (flagch == NWBND) {
+			st = step(m->g, startst, stopst, st, flagch, st, sflags);
+			SP("snwbnd", st, c);
+		}
 
 		/* are we done? */
-		if (ISSET(st, stopst))
-			matchp = p;
-		if (EQ(st, empty) || p == stop || clen > stop - p)
+		if (ISSET(st, stopst)) {
+			if (fast)
+				break;
+			else
+				matchp = p;
+		}
+		if (EQ(st, empty) || p == stop || clen > (size_t)(stop - p))
 			break;		/* NOTE BREAK OUT */
 
 		/* no, we must deal with this character */
 		ASSIGN(tmp, st);
-		ASSIGN(st, empty);
+		if (fast)
+			ASSIGN(st, fresh);
+		else
+			ASSIGN(st, empty);
 		assert(c != OUT);
-		st = step(m->g, startst, stopst, tmp, c, st);
+		st = step(m->g, startst, stopst, tmp, c, st, sflags);
 		SP("saft", st, c);
-		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
+		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st, sflags),
+		    st));
 		p += clen;
 	}
 
-	return(matchp);
+	if (fast) {
+		assert(matchp != NULL);
+		m->coldp = matchp;
+		if (ISSET(st, stopst))
+			return (p + XMBRTOWC(NULL, p, stop - p, &m->mbs, 0));
+		else
+			return (NULL);
+	} else
+		return (matchp);
 }
 
-
 /*
  - step - map set of states reachable before char to set reachable after
  == static states step(struct re_guts *g, sopno start, sopno stop, \
@@ -996,7 +1000,8 @@ step(struct re_guts *g,
 	sopno stop,		/* state after stop state within strip */
 	states bef,		/* states reachable before */
 	wint_t ch,		/* character or NONCHAR code */
-	states aft)		/* states already known reachable after */
+	states aft,		/* states already known reachable after */
+	int sflags)		/* state flags */
 {
 	cset *cs;
 	sop s;
@@ -1017,6 +1022,14 @@ step(struct re_guts *g,
 			if (ch == OPND(s))
 				FWD(aft, bef, 1);
 			break;
+		case OBOS:
+			if ((ch == BOL || ch == BOLEOL) && (sflags & SBOS) != 0)
+				FWD(aft, bef, 1);
+			break;
+		case OEOS:
+			if ((ch == EOL || ch == BOLEOL) && (sflags & SEOS) != 0)
+				FWD(aft, bef, 1);
+			break;
 		case OBOL:
 			if (ch == BOL || ch == BOLEOL)
 				FWD(aft, bef, 1);
@@ -1033,6 +1046,14 @@ step(struct re_guts *g,
 			if (ch == EOW)
 				FWD(aft, bef, 1);
 			break;
+		case OWBND:
+			if (ch == BOW || ch == EOW)
+				FWD(aft, bef, 1);
+			break;
+		case ONWBND:
+			if (ch == NWBND)
+				FWD(aft, aft, 1);
+			break;
 		case OANY:
 			if (!NONCHAR(ch))
 				FWD(aft, bef, 1);
@@ -1072,22 +1093,22 @@ step(struct re_guts *g,
 			break;
 		case OCH_:		/* mark the first two branches */
 			FWD(aft, aft, 1);
-			assert(OP(g->strip[pc+OPND(s)]) == OOR2);
+			assert(OP(g->strip[pc+OPND(s)]) == (sop)OOR2);
 			FWD(aft, aft, OPND(s));
 			break;
 		case OOR1:		/* done a branch, find the O_CH */
 			if (ISSTATEIN(aft, here)) {
 				for (look = 1;
-						OP(s = g->strip[pc+look]) != O_CH;
-						look += OPND(s))
-					assert(OP(s) == OOR2);
+				    OP(s = g->strip[pc+look]) != (sop)O_CH;
+				    look += OPND(s))
+					assert(OP(s) == (sop)OOR2);
 				FWD(aft, aft, look + 1);
 			}
 			break;
 		case OOR2:		/* propagate OCH_'s marking */
 			FWD(aft, aft, 1);
-			if (OP(g->strip[pc+OPND(s)]) != O_CH) {
-				assert(OP(g->strip[pc+OPND(s)]) == OOR2);
+			if (OP(g->strip[pc+OPND(s)]) != (sop)O_CH) {
+				assert(OP(g->strip[pc+OPND(s)]) == (sop)OOR2);
 				FWD(aft, aft, OPND(s));
 			}
 			break;
@@ -1119,7 +1140,7 @@ print(struct match *m,
 	FILE *d)
 {
 	struct re_guts *g = m->g;
-	int i;
+	sopno i;
 	int first = 1;
 
 	if (!(m->eflags&REG_TRACE))
@@ -1130,7 +1151,7 @@ print(struct match *m,
 		fprintf(d, " %s", pchar(ch));
 	for (i = 0; i < g->nstates; i++)
 		if (ISSET(st, i)) {
-			fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
+			fprintf(d, "%s%lu", (first) ? "\t" : ", ", i);
 			first = 0;
 		}
 	fprintf(d, "\n");
@@ -1186,9 +1207,9 @@ pchar(int ch)
 #endif
 #endif
 
+#undef	stepback
 #undef	matcher
-#undef	fast
-#undef	slow
+#undef	walk
 #undef	dissect
 #undef	backref
 #undef	step
diff --git a/winsup/cygwin/regex/regcomp.c b/winsup/cygwin/regex/regcomp.c
index 9e29257cb3a8..6b7ddf5d0b88 100644
--- a/winsup/cygwin/regex/regcomp.c
+++ b/winsup/cygwin/regex/regcomp.c
@@ -1,8 +1,15 @@
 /*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
  * Copyright (c) 1992, 1993, 1994 Henry Spencer.
  * Copyright (c) 1992, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
  *
+ * Copyright (c) 2011 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by David Chisnall
+ * under sponsorship from the FreeBSD Foundation.
+ *
  * This code is derived from software contributed to Berkeley by
  * Henry Spencer.
  *
@@ -14,7 +21,7 @@
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
@@ -37,12 +44,8 @@
 static char sccsid[] = "@(#)regcomp.c	8.5 (Berkeley) 3/20/94";
 #endif /* LIBC_SCCS and not lint */
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/lib/libc/regex/regcomp.c,v 1.36 2007/06/11 03:05:54 delphij Exp $");
+__FBSDID("$FreeBSD$");
 
-#ifdef __CYGWIN__
-#include "winsup.h"
-#include "../locale/setlocale.h"
-#endif
 #include <sys/types.h>
 #include <stdio.h>
 #include <string.h>
@@ -50,27 +53,46 @@ __FBSDID("$FreeBSD: src/lib/libc/regex/regcomp.c,v 1.36 2007/06/11 03:05:54 delp
 #include <limits.h>
 #include <stdlib.h>
 #include <regex.h>
-#ifndef __CYGWIN__
-#include <runetype.h>
-#endif
+#include <stdbool.h>
 #include <wchar.h>
 #include <wctype.h>
 
+#ifndef LIBREGEX
 #include "collate.h"
+#endif
 
 #include "utils.h"
 #include "regex2.h"
 
 #include "cname.h"
 
+/*
+ * Branching context, used to keep track of branch state for all of the branch-
+ * aware functions. In addition to keeping track of branch positions for the
+ * p_branch_* functions, we use this to simplify some clumsiness in BREs for
+ * detection of whether ^ is acting as an anchor or being used erroneously and
+ * also for whether we're in a sub-expression or not.
+ */
+struct branchc {
+	sopno start;
+	sopno back;
+	sopno fwd;
+
+	int nbranch;
+	int nchain;
+	bool outer;
+	bool terminate;
+};
+
 /*
  * parse structure, passed up and down to avoid global variables and
  * other clumsinesses
  */
 struct parse {
-	char *next;		/* next character in RE */
-	char *end;		/* end of string (-> NUL normally) */
+	const char *next;	/* next character in RE */
+	const char *end;	/* end of string (-> NUL normally) */
 	int error;		/* has an error been seen? */
+	int gnuext;
 	sop *strip;		/* malloced strip */
 	sopno ssize;		/* malloced strip size (allocated) */
 	sopno slen;		/* malloced strip length (used) */
@@ -79,34 +101,44 @@ struct parse {
 #	define	NPAREN	10	/* we need to remember () 1-9 for back refs */
 	sopno pbegin[NPAREN];	/* -> ( ([0] unused) */
 	sopno pend[NPAREN];	/* -> ) ([0] unused) */
+	bool allowbranch;	/* can this expression branch? */
+	bool bre;		/* convenience; is this a BRE? */
+	int pflags;		/* other parsing flags -- legacy escapes? */
+	bool (*parse_expr)(struct parse *, struct branchc *);
+	void (*pre_parse)(struct parse *, struct branchc *);
+	void (*post_parse)(struct parse *, struct branchc *);
 };
 
+#define PFLAG_LEGACY_ESC	0x00000001
+
 /* ========= begin header generated by ./mkh ========= */
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 /* === regcomp.c === */
-#ifdef __CYGWIN__ /* Defined below `int stop'.  Our gcc chokes on that. */
-static void p_ere(struct parse *p, int stop);
-#else
-static void p_ere(struct parse *p, wint_t stop);
-#endif
-static void p_ere_exp(struct parse *p);
+static bool p_ere_exp(struct parse *p, struct branchc *bc);
 static void p_str(struct parse *p);
-#ifdef __CYGWIN__ /* Defined below `int end1/end2'.  Our gcc chokes on that. */
-static void p_bre(struct parse *p, int end1, int end2);
-#else
-static void p_bre(struct parse *p, wint_t end1, wint_t end2);
-#endif
-static int p_simp_re(struct parse *p, int starordinary);
+static int p_branch_eat_delim(struct parse *p, struct branchc *bc);
+static void p_branch_ins_offset(struct parse *p, struct branchc *bc);
+static void p_branch_fix_tail(struct parse *p, struct branchc *bc);
+static bool p_branch_empty(struct parse *p, struct branchc *bc);
+static bool p_branch_do(struct parse *p, struct branchc *bc);
+static void p_bre_pre_parse(struct parse *p, struct branchc *bc);
+static void p_bre_post_parse(struct parse *p, struct branchc *bc);
+static void p_re(struct parse *p, int end1, int end2);
+static bool p_simp_re(struct parse *p, struct branchc *bc);
 static int p_count(struct parse *p);
 static void p_bracket(struct parse *p);
+static int p_range_cmp(wchar_t c1, wchar_t c2);
 static void p_b_term(struct parse *p, cset *cs);
+static int p_b_pseudoclass(struct parse *p, char c);
 static void p_b_cclass(struct parse *p, cset *cs);
+static void p_b_cclass_named(struct parse *p, cset *cs, const char[]);
 static void p_b_eclass(struct parse *p, cset *cs);
 static wint_t p_b_symbol(struct parse *p);
 static wint_t p_b_coll_elem(struct parse *p, wint_t endc);
+static bool may_escape(struct parse *p, const wint_t ch);
 static wint_t othercase(wint_t ch);
 static void bothcases(struct parse *p, wint_t ch);
 static void ordinary(struct parse *p, wint_t ch);
@@ -123,7 +155,7 @@ static sopno dupl(struct parse *p, sopno start, sopno finish);
 static void doemit(struct parse *p, sop op, size_t opnd);
 static void doinsert(struct parse *p, sop op, size_t opnd, sopno pos);
 static void dofwd(struct parse *p, sopno pos, sop value);
-static void enlarge(struct parse *p, sopno size);
+static int enlarge(struct parse *p, sopno size);
 static void stripsnug(struct parse *p, struct re_guts *g);
 static void findmust(struct parse *p, struct re_guts *g);
 static int altoffset(sop *scan, int offset);
@@ -131,7 +163,6 @@ static void computejumps(struct parse *p, struct re_guts *g);
 static void computematchjumps(struct parse *p, struct re_guts *g);
 static sopno pluscount(struct parse *p, struct re_guts *g);
 static wint_t wgetnext(struct parse *p);
-static size_t xwcrtomb (char *s, wint_t wc, mbstate_t *ps);
 
 #ifdef __cplusplus
 }
@@ -146,12 +177,14 @@ static char nuls[10];		/* place to point scanner in event of error */
  */
 #define	PEEK()	(*p->next)
 #define	PEEK2()	(*(p->next+1))
-#define	MORE()	(p->next < p->end)
-#define	MORE2()	(p->next+1 < p->end)
+#define	MORE()	(p->end - p->next > 0)
+#define	MORE2()	(p->end - p->next > 1)
 #define	SEE(c)	(MORE() && PEEK() == (c))
-#define	SEETWO(a, b)	(MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
+#define	SEETWO(a, b)	(MORE2() && PEEK() == (a) && PEEK2() == (b))
+#define	SEESPEC(a)	(p->bre ? SEETWO('\\', a) : SEE(a))
 #define	EAT(c)	((SEE(c)) ? (NEXT(), 1) : 0)
 #define	EATTWO(a, b)	((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
+#define	EATSPEC(a)	(p->bre ? EATTWO('\\', a) : EAT(a))
 #define	NEXT()	(p->next++)
 #define	NEXT2()	(p->next += 2)
 #define	NEXTn(n)	(p->next += (n))
@@ -171,37 +204,20 @@ static char nuls[10];		/* place to point scanner in event of error */
 #define	THERETHERE()	(p->slen - 2)
 #define	DROP(n)	(p->slen -= (n))
 
-#ifndef NDEBUG
-static int never = 0;		/* for use in asserts; shuts lint up */
-#else
-#define	never	0		/* some <assert.h>s have bugs too */
-#endif
-
 /* Macro used by computejump()/computematchjump() */
 #define MIN(a,b)	((a)<(b)?(a):(b))
 
-/*
- - regcomp - interface for parser and compilation
- = extern int regcomp(regex_t *, const char *, int);
- = #define	REG_BASIC	0000
- = #define	REG_EXTENDED	0001
- = #define	REG_ICASE	0002
- = #define	REG_NOSUB	0004
- = #define	REG_NEWLINE	0010
- = #define	REG_NOSPEC	0020
- = #define	REG_PEND	0040
- = #define	REG_DUMP	0200
- */
-int				/* 0 success, otherwise REG_something */
-regcomp(regex_t * __restrict preg,
+static int				/* 0 success, otherwise REG_something */
+regcomp_internal(regex_t * __restrict preg,
 	const char * __restrict pattern,
-	int cflags)
+	int cflags, int pflags)
 {
 	struct parse pa;
 	struct re_guts *g;
 	struct parse *p = &pa;
 	int i;
 	size_t len;
+	size_t maxlen;
 #ifdef REDEBUG
 #	define	GOODFLAGS(f)	(f)
 #else
@@ -217,13 +233,29 @@ regcomp(regex_t * __restrict preg,
 			return(REG_INVARG);
 		len = preg->re_endp - pattern;
 	} else
-		len = strlen((char *)pattern);
+		len = strlen(pattern);
 
 	/* do the mallocs early so failure handling is easy */
 	g = (struct re_guts *)malloc(sizeof(struct re_guts));
 	if (g == NULL)
 		return(REG_ESPACE);
+	/*
+	 * Limit the pattern space to avoid a 32-bit overflow on buffer
+	 * extension.  Also avoid any signed overflow in case of conversion
+	 * so make the real limit based on a 31-bit overflow.
+	 *
+	 * Likely not applicable on 64-bit systems but handle the case
+	 * generically (who are we to stop people from using ~715MB+
+	 * patterns?).
+	 */
+	maxlen = ((size_t)-1 >> 1) / sizeof(sop) * 2 / 3;
+	if (len >= maxlen) {
+		free((char *)g);
+		return(REG_ESPACE);
+	}
 	p->ssize = len/(size_t)2*(size_t)3 + (size_t)1;	/* ugh */
+	assert(p->ssize >= len);
+
 	p->strip = (sop *)malloc(p->ssize * sizeof(sop));
 	p->slen = 0;
 	if (p->strip == NULL) {
@@ -233,14 +265,36 @@ regcomp(regex_t * __restrict preg,
 
 	/* set things up */
 	p->g = g;
-	p->next = (char *)pattern;	/* convenience; we do not modify it */
+	p->next = pattern;	/* convenience; we do not modify it */
 	p->end = p->next + len;
 	p->error = 0;
 	p->ncsalloc = 0;
+	p->pflags = pflags;
 	for (i = 0; i < NPAREN; i++) {
 		p->pbegin[i] = 0;
 		p->pend[i] = 0;
 	}
+#ifdef LIBREGEX
+	if (cflags&REG_POSIX) {
+		p->gnuext = false;
+		p->allowbranch = (cflags & REG_EXTENDED) != 0;
+	} else
+		p->gnuext = p->allowbranch = true;
+#else
+	p->gnuext = false;
+	p->allowbranch = (cflags & REG_EXTENDED) != 0;
+#endif
+	if (cflags & REG_EXTENDED) {
+		p->bre = false;
+		p->parse_expr = p_ere_exp;
+		p->pre_parse = NULL;
+		p->post_parse = NULL;
+	} else {
+		p->bre = true;
+		p->parse_expr = p_simp_re;
+		p->pre_parse = p_bre_pre_parse;
+		p->post_parse = p_bre_post_parse;
+	}
 	g->sets = NULL;
 	g->ncsets = 0;
 	g->cflags = cflags;
@@ -258,12 +312,10 @@ regcomp(regex_t * __restrict preg,
 	/* do it */
 	EMIT(OEND, 0);
 	g->firststate = THERE();
-	if (cflags&REG_EXTENDED)
-		p_ere(p, OUT);
-	else if (cflags&REG_NOSPEC)
+	if (cflags & REG_NOSPEC)
 		p_str(p);
 	else
-		p_bre(p, OUT, OUT);
+		p_re(p, OUT, OUT);
 	EMIT(OEND, 0);
 	g->laststate = THERE();
 
@@ -299,71 +351,69 @@ regcomp(regex_t * __restrict preg,
 }
 
 /*
- - p_ere - ERE parser top level, concatenation and alternation
- == static void p_ere(struct parse *p, int stop);
+ - regcomp - interface for parser and compilation
+ = extern int regcomp(regex_t *, const char *, int);
+ = #define	REG_BASIC	0000
+ = #define	REG_EXTENDED	0001
+ = #define	REG_ICASE	0002
+ = #define	REG_NOSUB	0004
+ = #define	REG_NEWLINE	0010
+ = #define	REG_NOSPEC	0020
+ = #define	REG_PEND	0040
+ = #define	REG_DUMP	0200
  */
-static void
-p_ere(struct parse *p,
-	int stop)		/* character this ERE should end at */
+int				/* 0 success, otherwise REG_something */
+regcomp(regex_t * __restrict preg,
+	const char * __restrict pattern,
+	int cflags)
 {
-	char c;
-	sopno prevback = 0;
-	sopno prevfwd = 0;
-	sopno conc;
-	int first = 1;		/* is this the first alternative? */
-
-	for (;;) {
-		/* do a bunch of concatenated expressions */
-		conc = HERE();
-		while (MORE() && (c = PEEK()) != '|' && c != stop)
-			p_ere_exp(p);
-#ifndef __CYGWIN__
-		/* undefined behaviour according to POSIX; allowed by glibc */
-		(void)REQUIRE(HERE() != conc, REG_EMPTY);	/* require nonempty */
-#endif
-
-		if (!EAT('|'))
-			break;		/* NOTE BREAK OUT */
 
-		if (first) {
-			INSERT(OCH_, conc);	/* offset is wrong */
-			prevfwd = conc;
-			prevback = conc;
-			first = 0;
-		}
-		ASTERN(OOR1, prevback);
-		prevback = THERE();
-		AHEAD(prevfwd);			/* fix previous offset */
-		prevfwd = HERE();
-		EMIT(OOR2, 0);			/* offset is very wrong */
-	}
+	return (regcomp_internal(preg, pattern, cflags, 0));
+}
 
-	if (!first) {		/* tail-end fixups */
-		AHEAD(prevfwd);
-		ASTERN(O_CH, prevback);
-	}
+#ifndef LIBREGEX
+/*
+ * Legacy interface that requires more lax escaping behavior.
+ */
+int
+freebsd12_regcomp(regex_t * __restrict preg,
+	const char * __restrict pattern,
+	int cflags, int pflags)
+{
 
-	assert(!MORE() || SEE(stop));
+	return (regcomp_internal(preg, pattern, cflags, PFLAG_LEGACY_ESC));
 }
 
+__sym_compat(regcomp, freebsd12_regcomp, FBSD_1.0);
+#endif	/* !LIBREGEX */
+
 /*
- - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
- == static void p_ere_exp(struct parse *p);
+ - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op,
+ - return whether we should terminate or not
+ == static bool p_ere_exp(struct parse *p);
  */
-static void
-p_ere_exp(struct parse *p)
+static bool
+p_ere_exp(struct parse *p, struct branchc *bc)
 {
 	char c;
 	wint_t wc;
 	sopno pos;
 	int count;
 	int count2;
+#ifdef LIBREGEX
+	int i;
+	int handled;
+#endif
 	sopno subno;
 	int wascaret = 0;
 
+	(void)bc;
 	assert(MORE());		/* caller should have ensured this */
 	c = GETNEXT();
 
+#ifdef LIBREGEX
+	handled = 0;
+#endif
 	pos = HERE();
 	switch (c) {
 	case '(':
@@ -374,7 +424,7 @@ p_ere_exp(struct parse *p)
 			p->pbegin[subno] = HERE();
 		EMIT(OLPAREN, subno);
 		if (!SEE(')'))
-			p_ere(p, ')');
+			p_re(p, ')', IGN);
 		if (subno < NPAREN) {
 			p->pend[subno] = HERE();
 			assert(p->pend[subno] != 0);
@@ -411,6 +461,7 @@ p_ere_exp(struct parse *p)
 	case '*':
 	case '+':
 	case '?':
+	case '{':
 		SETERROR(REG_BADRPT);
 		break;
 	case '.':
@@ -425,28 +476,77 @@ p_ere_exp(struct parse *p)
 	case '\\':
 		(void)REQUIRE(MORE(), REG_EESCAPE);
 		wc = WGETNEXT();
-#ifdef __CYGWIN__
-		/* \< and \> are the GNU equivalents to [[:<:]] and [[:>:]] */
-		switch (wc)
-		  {
-		  case L'<':
-		    EMIT(OBOW, 0);
-		    break;
-		  case L'>':
-		    EMIT(OEOW, 0);
-		    break;
-		  default:
-		    ordinary(p, wc);
-		    break;
-		  }
-#else
-		ordinary(p, wc);
+#ifdef LIBREGEX
+		if (p->gnuext) {
+			handled = 1;
+			switch (wc) {
+			case '`':
+				EMIT(OBOS, 0);
+				break;
+			case '\'':
+				EMIT(OEOS, 0);
+				break;
+			case 'B':
+				EMIT(ONWBND, 0);
+				break;
+			case 'b':
+				EMIT(OWBND, 0);
+				break;
+			case 'W':
+			case 'w':
+			case 'S':
+			case 's':
+				p_b_pseudoclass(p, wc);
+				break;
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
+				i = wc - '0';
+				assert(i < NPAREN);
+				if (p->pend[i] != 0) {
+					assert(i <= p->g->nsub);
+					EMIT(OBACK_, i);
+					assert(p->pbegin[i] != 0);
+					assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
+					assert(OP(p->strip[p->pend[i]]) == ORPAREN);
+					(void) dupl(p, p->pbegin[i]+1, p->pend[i]);
+					EMIT(O_BACK, i);
+				} else
+					SETERROR(REG_ESUBREG);
+				p->g->backrefs = 1;
+				break;
+			default:
+				handled = 0;
+			}
+			/* Don't proceed to the POSIX bits if we've already handled it */
+			if (handled)
+				break;
+		}
 #endif
+		switch (wc) {
+		case '<':
+			EMIT(OBOW, 0);
+			break;
+		case '>':
+			EMIT(OEOW, 0);
+			break;
+		default:
+			if (may_escape(p, wc))
+				ordinary(p, wc);
+			else
+				SETERROR(REG_EESCAPE);
+			break;
+		}
 		break;
-	case '{':		/* okay as ordinary except if digit follows */
-		(void)REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT);
-		fallthrough;
 	default:
+		if (p->error != 0)
+			return (false);
 		p->next--;
 		wc = WGETNEXT();
 		ordinary(p, wc);
@@ -454,12 +554,14 @@ p_ere_exp(struct parse *p)
 	}
 
 	if (!MORE())
-		return;
+		return (false);
 	c = PEEK();
 	/* we call { a repetition if followed by a digit */
-	if (!( c == '*' || c == '+' || c == '?' ||
-				(c == '{' && MORE2() && isdigit((uch)PEEK2())) ))
-		return;		/* no repetition, we're done */
+	if (!( c == '*' || c == '+' || c == '?' || c == '{'))
+		return (false);		/* no repetition, we're done */
+	else if (c == '{')
+		(void)REQUIRE(MORE2() && \
+		    (isdigit((uch)PEEK2()) || PEEK2() == ','), REG_BADRPT);
 	NEXT();
 
 	(void)REQUIRE(!wascaret, REG_BADRPT);
@@ -505,12 +607,13 @@ p_ere_exp(struct parse *p)
 	}
 
 	if (!MORE())
-		return;
+		return (false);
 	c = PEEK();
 	if (!( c == '*' || c == '+' || c == '?' ||
 				(c == '{' && MORE2() && isdigit((uch)PEEK2())) ) )
-		return;
+		return (false);
 	SETERROR(REG_BADRPT);
+	return (false);
 }
 
 /*
@@ -526,142 +629,321 @@ p_str(struct parse *p)
 }
 
 /*
- - p_bre - BRE parser top level, anchoring and concatenation
- == static void p_bre(struct parse *p, int end1, \
- ==	int end2);
- * Giving end1 as OUT essentially eliminates the end1/end2 check.
- *
- * This implementation is a bit of a kludge, in that a trailing $ is first
- * taken as an ordinary character and then revised to be an anchor.
- * The amount of lookahead needed to avoid this kludge is excessive.
+ * Eat consecutive branch delimiters for the kind of expression that we are
+ * parsing, return the number of delimiters that we ate.
+ */
+static int
+p_branch_eat_delim(struct parse *p, struct branchc *bc)
+{
+	int nskip;
+
+	(void)bc;
+	nskip = 0;
+	while (EATSPEC('|'))
+		++nskip;
+	return (nskip);
+}
+
+/*
+ * Insert necessary branch book-keeping operations. This emits a
+ * bogus 'next' offset, since we still have more to parse
+ */
+static void
+p_branch_ins_offset(struct parse *p, struct branchc *bc)
+{
+
+	if (bc->nbranch == 0) {
+		INSERT(OCH_, bc->start);	/* offset is wrong */
+		bc->fwd = bc->start;
+		bc->back = bc->start;
+	}
+
+	ASTERN(OOR1, bc->back);
+	bc->back = THERE();
+	AHEAD(bc->fwd);			/* fix previous offset */
+	bc->fwd = HERE();
+	EMIT(OOR2, 0);			/* offset is very wrong */
+	++bc->nbranch;
+}
+
+/*
+ * Fix the offset of the tail branch, if we actually had any branches.
+ * This is to correct the bogus placeholder offset that we use.
+ */
+static void
+p_branch_fix_tail(struct parse *p, struct branchc *bc)
+{
+
+	/* Fix bogus offset at the tail if we actually have branches */
+	if (bc->nbranch > 0) {
+		AHEAD(bc->fwd);
+		ASTERN(O_CH, bc->back);
+	}
+}
+
+/*
+ * Signal to the parser that an empty branch has been encountered; this will,
+ * in the future, be used to allow for more permissive behavior with empty
+ * branches. The return value should indicate whether parsing may continue
+ * or not.
  */
+static bool
+p_branch_empty(struct parse *p, struct branchc *bc)
+{
+
+	(void)bc;
+	SETERROR(REG_EMPTY);
+	return (false);
+}
+
+/*
+ * Take care of any branching requirements. This includes inserting the
+ * appropriate branching instructions as well as eating all of the branch
+ * delimiters until we either run out of pattern or need to parse more pattern.
+ */
+static bool
+p_branch_do(struct parse *p, struct branchc *bc)
+{
+	int ate = 0;
+
+	ate = p_branch_eat_delim(p, bc);
+	if (ate == 0)
+		return (false);
+	else if ((ate > 1 || (bc->outer && !MORE())) && !p_branch_empty(p, bc))
+		/*
+		 * Halt parsing only if we have an empty branch and p_branch_empty
+		 * indicates that we must not continue. In the future, this will not
+		 * necessarily be an error.
+		 */
+		return (false);
+	p_branch_ins_offset(p, bc);
+
+	return (true);
+}
+
 static void
-p_bre(struct parse *p,
-	int end1,		/* first terminating character */
-	int end2)		/* second terminating character */
+p_bre_pre_parse(struct parse *p, struct branchc *bc)
 {
-	sopno start = HERE();
-	int first = 1;			/* first subexpression? */
-	int wasdollar = 0;
 
+	(void) bc;
+	/*
+	 * Does not move cleanly into expression parser because of
+	 * ordinary interpration of * at the beginning position of
+	 * an expression.
+	 */
 	if (EAT('^')) {
 		EMIT(OBOL, 0);
 		p->g->iflags |= USEBOL;
 		p->g->nbol++;
 	}
-	while (MORE() && !SEETWO(end1, end2)) {
-		wasdollar = p_simp_re(p, first);
-		first = 0;
-	}
-	if (wasdollar) {	/* oops, that was a trailing anchor */
+}
+
+static void
+p_bre_post_parse(struct parse *p, struct branchc *bc)
+{
+
+	/* Expression is terminating due to EOL token */
+	if (bc->terminate) {
 		DROP(1);
 		EMIT(OEOL, 0);
 		p->g->iflags |= USEEOL;
 		p->g->neol++;
 	}
+}
+
+/*
+ - p_re - Top level parser, concatenation and BRE anchoring
+ == static void p_re(struct parse *p, int end1, int end2);
+ * Giving end1 as OUT essentially eliminates the end1/end2 check.
+ *
+ * This implementation is a bit of a kludge, in that a trailing $ is first
+ * taken as an ordinary character and then revised to be an anchor.
+ * The amount of lookahead needed to avoid this kludge is excessive.
+ */
+static void
+p_re(struct parse *p,
+	int end1,	/* first terminating character */
+	int end2)	/* second terminating character; ignored for EREs */
+{
+	struct branchc bc;
 
-	(void)REQUIRE(HERE() != start, REG_EMPTY);	/* require nonempty */
+	bc.nbranch = 0;
+	if (end1 == OUT && end2 == OUT)
+		bc.outer = true;
+	else
+		bc.outer = false;
+#define	SEEEND()	(!p->bre ? SEE(end1) : SEETWO(end1, end2))
+	for (;;) {
+		bc.start = HERE();
+		bc.nchain = 0;
+		bc.terminate = false;
+		if (p->pre_parse != NULL)
+			p->pre_parse(p, &bc);
+		while (MORE() && (!p->allowbranch || !SEESPEC('|')) && !SEEEND()) {
+			bc.terminate = p->parse_expr(p, &bc);
+			++bc.nchain;
+		}
+		if (p->post_parse != NULL)
+			p->post_parse(p, &bc);
+		(void) REQUIRE(p->gnuext || HERE() != bc.start, REG_EMPTY);
+#ifdef LIBREGEX
+		if (HERE() == bc.start && !p_branch_empty(p, &bc))
+			break;
+#endif
+		if (!p->allowbranch)
+			break;
+		/*
+		 * p_branch_do's return value indicates whether we should
+		 * continue parsing or not. This is both for correctness and
+		 * a slight optimization, because it will check if we've
+		 * encountered an empty branch or the end of the string
+		 * immediately following a branch delimiter.
+		 */
+		if (!p_branch_do(p, &bc))
+			break;
+	}
+#undef SEE_END
+	if (p->allowbranch)
+		p_branch_fix_tail(p, &bc);
+	assert(!MORE() || SEE(end1));
 }
 
 /*
  - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
- == static int p_simp_re(struct parse *p, int starordinary);
+ == static bool p_simp_re(struct parse *p, struct branchc *bc);
  */
-static int			/* was the simple RE an unbackslashed $? */
-p_simp_re(struct parse *p,
-	int starordinary)	/* is a leading * an ordinary character? */
+static bool			/* was the simple RE an unbackslashed $? */
+p_simp_re(struct parse *p, struct branchc *bc)
 {
 	int c;
+	int cc;			/* convenient/control character */
 	int count;
 	int count2;
 	sopno pos;
+	bool handled;
 	int i;
 	wint_t wc;
 	sopno subno;
 #	define	BACKSL	(1<<CHAR_BIT)
 
-	pos = HERE();		/* repetion op, if any, covers from here */
+	pos = HERE();		/* repetition op, if any, covers from here */
+	handled = false;
 
 	assert(MORE());		/* caller should have ensured this */
 	c = GETNEXT();
 	if (c == '\\') {
 		(void)REQUIRE(MORE(), REG_EESCAPE);
-		c = BACKSL | GETNEXT();
-	}
-	switch (c) {
-	case '.':
-		if (p->g->cflags&REG_NEWLINE)
-			nonnewline(p);
-		else
-			EMIT(OANY, 0);
-		break;
-	case '[':
-		p_bracket(p);
-		break;
-#ifdef __CYGWIN__
-	case BACKSL|'<':
-		/* \< is the GNU equivalents to [[:<:]] */
-		EMIT(OBOW, 0);
-		break;
-	case BACKSL|'>':
-		/* \> is the GNU equivalents to [[:>:]] */
-		EMIT(OEOW, 0);
-		break;
+		cc = GETNEXT();
+		c = BACKSL | cc;
+#ifdef LIBREGEX
+		if (p->gnuext) {
+			handled = true;
+			switch (c) {
+			case BACKSL|'`':
+				EMIT(OBOS, 0);
+				break;
+			case BACKSL|'\'':
+				EMIT(OEOS, 0);
+				break;
+			case BACKSL|'B':
+				EMIT(ONWBND, 0);
+				break;
+			case BACKSL|'b':
+				EMIT(OWBND, 0);
+				break;
+			case BACKSL|'W':
+			case BACKSL|'w':
+			case BACKSL|'S':
+			case BACKSL|'s':
+				p_b_pseudoclass(p, cc);
+				break;
+			default:
+				handled = false;
+			}
+		}
 #endif
-	case BACKSL|'{':
-		SETERROR(REG_BADRPT);
-		break;
-	case BACKSL|'(':
-		p->g->nsub++;
-		subno = p->g->nsub;
-		if (subno < NPAREN)
-			p->pbegin[subno] = HERE();
-		EMIT(OLPAREN, subno);
-		/* the MORE here is an error heuristic */
-		if (MORE() && !SEETWO('\\', ')'))
-			p_bre(p, '\\', ')');
-		if (subno < NPAREN) {
-			p->pend[subno] = HERE();
-			assert(p->pend[subno] != 0);
+	}
+	if (!handled) {
+		switch (c) {
+		case '.':
+			if (p->g->cflags&REG_NEWLINE)
+				nonnewline(p);
+			else
+				EMIT(OANY, 0);
+			break;
+		case '[':
+			p_bracket(p);
+			break;
+		case BACKSL|'<':
+			EMIT(OBOW, 0);
+			break;
+		case BACKSL|'>':
+			EMIT(OEOW, 0);
+			break;
+		case BACKSL|'{':
+			SETERROR(REG_BADRPT);
+			break;
+		case BACKSL|'(':
+			p->g->nsub++;
+			subno = p->g->nsub;
+			if (subno < NPAREN)
+				p->pbegin[subno] = HERE();
+			EMIT(OLPAREN, subno);
+			/* the MORE here is an error heuristic */
+			if (MORE() && !SEETWO('\\', ')'))
+				p_re(p, '\\', ')');
+			if (subno < NPAREN) {
+				p->pend[subno] = HERE();
+				assert(p->pend[subno] != 0);
+			}
+			EMIT(ORPAREN, subno);
+			(void)REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
+			break;
+		case BACKSL|')':	/* should not get here -- must be user */
+			SETERROR(REG_EPAREN);
+			break;
+		case BACKSL|'1':
+		case BACKSL|'2':
+		case BACKSL|'3':
+		case BACKSL|'4':
+		case BACKSL|'5':
+		case BACKSL|'6':
+		case BACKSL|'7':
+		case BACKSL|'8':
+		case BACKSL|'9':
+			i = (c&~BACKSL) - '0';
+			assert(i < NPAREN);
+			if (p->pend[i] != 0) {
+				assert(i <= p->g->nsub);
+				EMIT(OBACK_, i);
+				assert(p->pbegin[i] != 0);
+				assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
+				assert(OP(p->strip[p->pend[i]]) == ORPAREN);
+				(void) dupl(p, p->pbegin[i]+1, p->pend[i]);
+				EMIT(O_BACK, i);
+			} else
+				SETERROR(REG_ESUBREG);
+			p->g->backrefs = 1;
+			break;
+		case '*':
+			/*
+			 * Ordinary if used as the first character beyond BOL anchor of
+			 * a (sub-)expression, counts as a bad repetition operator if it
+			 * appears otherwise.
+			 */
+			(void)REQUIRE(bc->nchain == 0, REG_BADRPT);
+			/* FALLTHROUGH */
+		default:
+			if (p->error != 0)
+				return (false);	/* Definitely not $... */
+			p->next--;
+			wc = WGETNEXT();
+			if ((c & BACKSL) == 0 || may_escape(p, wc))
+				ordinary(p, wc);
+			else
+				SETERROR(REG_EESCAPE);
+			break;
 		}
-		EMIT(ORPAREN, subno);
-		(void)REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
-		break;
-	case BACKSL|')':	/* should not get here -- must be user */
-	case BACKSL|'}':
-		SETERROR(REG_EPAREN);
-		break;
-	case BACKSL|'1':
-	case BACKSL|'2':
-	case BACKSL|'3':
-	case BACKSL|'4':
-	case BACKSL|'5':
-	case BACKSL|'6':
-	case BACKSL|'7':
-	case BACKSL|'8':
-	case BACKSL|'9':
-		i = (c&~BACKSL) - '0';
-		assert(i < NPAREN);
-		if (p->pend[i] != 0) {
-			assert(i <= p->g->nsub);
-			EMIT(OBACK_, i);
-			assert(p->pbegin[i] != 0);
-			assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
-			assert(OP(p->strip[p->pend[i]]) == ORPAREN);
-			(void) dupl(p, p->pbegin[i]+1, p->pend[i]);
-			EMIT(O_BACK, i);
-		} else
-			SETERROR(REG_ESUBREG);
-		p->g->backrefs = 1;
-		break;
-	case '*':
-		(void)REQUIRE(starordinary, REG_BADRPT);
-		fallthrough;
-	default:
-		p->next--;
-		wc = WGETNEXT();
-		ordinary(p, wc);
-		break;
 	}
 
 	if (EAT('*')) {		/* implemented as +? */
@@ -670,6 +952,14 @@ p_simp_re(struct parse *p,
 		ASTERN(O_PLUS, pos);
 		INSERT(OQUEST_, pos);
 		ASTERN(O_QUEST, pos);
+#ifdef LIBREGEX
+	} else if (p->gnuext && EATTWO('\\', '?')) {
+		INSERT(OQUEST_, pos);
+		ASTERN(O_QUEST, pos);
+	} else if (p->gnuext && EATTWO('\\', '+')) {
+		INSERT(OPLUS_, pos);
+		ASTERN(O_PLUS, pos);
+#endif
 	} else if (EATTWO('\\', '{')) {
 		count = p_count(p);
 		if (EAT(',')) {
@@ -688,9 +978,9 @@ p_simp_re(struct parse *p,
 			SETERROR(REG_BADBR);
 		}
 	} else if (c == '$')     /* $ (but not \$) ends it */
-		return(1);
+		return (true);
 
-	return(0);
+	return (false);
 }
 
 /*
@@ -723,15 +1013,17 @@ p_bracket(struct parse *p)
 	wint_t ch;
 
 	/* Dept of Truly Sickening Special-Case Kludges */
-	if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
-		EMIT(OBOW, 0);
-		NEXTn(6);
-		return;
-	}
-	if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
-		EMIT(OEOW, 0);
-		NEXTn(6);
-		return;
+	if (p->end - p->next > 5) {
+		if (strncmp(p->next, "[:<:]]", 6) == 0) {
+			EMIT(OBOW, 0);
+			NEXTn(6);
+			return;
+		}
+		if (strncmp(p->next, "[:>:]]", 6) == 0) {
+			EMIT(OEOW, 0);
+			NEXTn(6);
+			return;
+		}
 	}
 
 	if ((cs = allocset(p)) == NULL)
@@ -757,14 +1049,30 @@ p_bracket(struct parse *p)
 	if (cs->invert && p->g->cflags&REG_NEWLINE)
 		cs->bmp['\n' >> 3] |= 1 << ('\n' & 7);
 
-	if ((ch = singleton(cs)) != OUT		/* optimize singleton sets */
-	     && cs->invert == 0) {		/* But not in invert case. */
+	if ((ch = singleton(cs)) != OUT) {	/* optimize singleton sets */
 		ordinary(p, ch);
 		freeset(p, cs);
 	} else
 		EMIT(OANYOF, (int)(cs - p->g->sets));
 }
 
+static int
+p_range_cmp(wchar_t c1, wchar_t c2)
+{
+#ifndef LIBREGEX
+	return __wcollate_range_cmp(c1, c2);
+#else
+	/* Copied from libc/collate __wcollate_range_cmp */
+	wchar_t s1[2], s2[2];
+
+	s1[0] = c1;
+	s1[1] = L'\0';
+	s2[0] = c2;
+	s2[1] = L'\0';
+	return (wcscoll(s1, s2));
+#endif
+}
+
 /*
  - p_b_term - parse one term of a bracketed character list
  == static void p_b_term(struct parse *p, cset *cs);
@@ -775,7 +1083,10 @@ p_b_term(struct parse *p, cset *cs)
 	char c;
 	wint_t start, finish;
 	wint_t i;
-
+#ifndef LIBREGEX
+	struct xlocale_collate *table =
+		(struct xlocale_collate*)__get_locale()->components[XLC_COLLATE];
+#endif
 	/* classify what we've got */
 	switch ((MORE()) ? PEEK() : '\0') {
 	case '[':
@@ -784,7 +1095,6 @@ p_b_term(struct parse *p, cset *cs)
 	case '-':
 		SETERROR(REG_ERANGE);
 		return;			/* NOTE RETURN */
-		break;
 	default:
 		c = '\0';
 		break;
@@ -818,27 +1128,23 @@ p_b_term(struct parse *p, cset *cs)
 				finish = '-';
 			else
 				finish = p_b_symbol(p);
-		} else if (SEE('-') && !MORE2()) {
-			SETERROR(REG_EBRACK);
-			return;
 		} else
 			finish = start;
 		if (start == finish)
 			CHadd(p, cs, start);
 		else {
-#ifdef __CYGWIN__
-			if (!__get_current_collate_locale ()->win_locale[0]) {
+#ifndef LIBREGEX
+			if (table->__collate_load_error || MB_CUR_MAX > 1) {
 #else
-			if (__collate_load_error) {
+			if (MB_CUR_MAX > 1) {
 #endif
-				(void)REQUIRE((uch)start <= (uch)finish, REG_ERANGE);
+				(void)REQUIRE(start <= finish, REG_ERANGE);
 				CHaddrange(p, cs, start, finish);
 			} else {
-				(void)REQUIRE(__wcollate_range_cmp(start, finish) <= 0, REG_ERANGE);
+				(void)REQUIRE(p_range_cmp(start, finish) <= 0, REG_ERANGE);
 				for (i = 0; i <= UCHAR_MAX; i++) {
-					if (   __wcollate_range_cmp(start, i) <= 0
-					    && __wcollate_range_cmp(i, finish) <= 0
-					   )
+					if (p_range_cmp(start, i) <= 0 &&
+					    p_range_cmp(i, finish) <= 0 )
 						CHadd(p, cs, i);
 				}
 			}
@@ -847,6 +1153,41 @@ p_b_term(struct parse *p, cset *cs)
 	}
 }
 
+/*
+ - p_b_pseudoclass - parse a pseudo-class (\w, \W, \s, \S)
+ == static int p_b_pseudoclass(struct parse *p, char c)
+ */
+static int
+p_b_pseudoclass(struct parse *p, char c) {
+	cset *cs;
+
+	if ((cs = allocset(p)) == NULL)
+		return(0);
+
+	if (p->g->cflags&REG_ICASE)
+		cs->icase = 1;
+
+	switch (c) {
+	case 'W':
+		cs->invert = 1;
+		/* PASSTHROUGH */
+	case 'w':
+		p_b_cclass_named(p, cs, "alnum");
+		break;
+	case 'S':
+		cs->invert = 1;
+		/* PASSTHROUGH */
+	case 's':
+		p_b_cclass_named(p, cs, "space");
+		break;
+	default:
+		return(0);
+	}
+
+	EMIT(OANYOF, (int)(cs - p->g->sets));
+	return(1);
+}
+
 /*
  - p_b_cclass - parse a character-class name and deal with it
  == static void p_b_cclass(struct parse *p, cset *cs);
@@ -854,9 +1195,8 @@ p_b_term(struct parse *p, cset *cs)
 static void
 p_b_cclass(struct parse *p, cset *cs)
 {
-	char *sp = p->next;
+	const char *sp = p->next;
 	size_t len;
-	wctype_t wct;
 	char clname[16];
 
 	while (MORE() && isalpha((uch)PEEK()))
@@ -868,6 +1208,17 @@ p_b_cclass(struct parse *p, cset *cs)
 	}
 	memcpy(clname, sp, len);
 	clname[len] = '\0';
+
+	p_b_cclass_named(p, cs, clname);
+}
+/*
+ - p_b_cclass_named - deal with a named character class
+ == static void p_b_cclass_named(struct parse *p, cset *cs, const char []);
+ */
+static void
+p_b_cclass_named(struct parse *p, cset *cs, const char clname[]) {
+	wctype_t wct;
+
 	if ((wct = wctype(clname)) == 0) {
 		SETERROR(REG_ECTYPE);
 		return;
@@ -892,7 +1243,7 @@ p_b_eclass(struct parse *p, cset *cs)
 
 /*
  - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
- == static char p_b_symbol(struct parse *p);
+ == static wint_t p_b_symbol(struct parse *p);
  */
 static wint_t			/* value of symbol */
 p_b_symbol(struct parse *p)
@@ -911,18 +1262,17 @@ p_b_symbol(struct parse *p)
 
 /*
  - p_b_coll_elem - parse a collating-element name and look it up
- == static char p_b_coll_elem(struct parse *p, int endc);
+ == static wint_t p_b_coll_elem(struct parse *p, wint_t endc);
  */
 static wint_t			/* value of collating element */
 p_b_coll_elem(struct parse *p,
 	wint_t endc)		/* name ended by endc,']' */
 {
-	char *sp = p->next;
+	const char *sp = p->next;
 	struct cname *cp;
-	int len;
 	mbstate_t mbs;
-	wint_t wc;
-	size_t clen;
+	wchar_t wc;
+	size_t clen, len;
 
 	while (MORE() && !SEETWO(endc, ']'))
 		NEXT();
@@ -932,10 +1282,10 @@ p_b_coll_elem(struct parse *p,
 	}
 	len = p->next - sp;
 	for (cp = cnames; cp->name != NULL; cp++)
-		if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
+		if (strncmp(cp->name, sp, len) == 0 && strlen(cp->name) == len)
 			return(cp->code);	/* known name */
 	memset(&mbs, 0, sizeof(mbs));
-	if ((clen = mbrtowi(&wc, sp, len, &mbs)) == len)
+	if ((clen = mbrtowc(&wc, sp, len, &mbs)) == len)
 		return (wc);			/* single character */
 	else if (clen == (size_t)-1 || clen == (size_t)-2)
 		SETERROR(REG_ILLSEQ);
@@ -944,9 +1294,58 @@ p_b_coll_elem(struct parse *p,
 	return(0);
 }
 
+/*
+ - may_escape - determine whether 'ch' is escape-able in the current context
+ == static int may_escape(struct parse *p, const wint_t ch)
+ */
+static bool
+may_escape(struct parse *p, const wint_t ch)
+{
+
+	if ((p->pflags & PFLAG_LEGACY_ESC) != 0)
+		return (true);
+	if (isalpha(ch) || ch == '\'' || ch == '`')
+		return (false);
+	return (true);
+#ifdef NOTYET
+	/*
+	 * Build a whitelist of characters that may be escaped to produce an
+	 * ordinary in the current context. This assumes that these have not
+	 * been otherwise interpreted as a special character. Escaping an
+	 * ordinary character yields undefined results according to
+	 * IEEE 1003.1-2008. Some extensions (notably, some GNU extensions) take
+	 * advantage of this and use escaped ordinary characters to provide
+	 * special meaning, e.g. \b, \B, \w, \W, \s, \S.
+	 */
+	switch(ch) {
+	case '|':
+	case '+':
+	case '?':
+		/* The above characters may not be escaped in BREs */
+		if (!(p->g->cflags&REG_EXTENDED))
+			return (false);
+		/* Fallthrough */
+	case '(':
+	case ')':
+	case '{':
+	case '}':
+	case '.':
+	case '[':
+	case ']':
+	case '\\':
+	case '*':
+	case '^':
+	case '$':
+		return (true);
+	default:
+		return (false);
+	}
+#endif
+}
+
 /*
  - othercase - return the case counterpart of an alphabetic
- == static char othercase(int ch);
+ == static wint_t othercase(wint_t ch);
  */
 static wint_t			/* if no counterpart, return ch */
 othercase(wint_t ch)
@@ -962,15 +1361,15 @@ othercase(wint_t ch)
 
 /*
  - bothcases - emit a dualcase version of a two-case character
- == static void bothcases(struct parse *p, int ch);
+ == static void bothcases(struct parse *p, wint_t ch);
  *
  * Boy, is this implementation ever a kludge...
  */
 static void
 bothcases(struct parse *p, wint_t ch)
 {
-	char *oldnext = p->next;
-	char *oldend = p->end;
+	const char *oldnext = p->next;
+	const char *oldend = p->end;
 	char bracket[3 + MB_LEN_MAX];
 	size_t n;
 	mbstate_t mbs;
@@ -978,7 +1377,7 @@ bothcases(struct parse *p, wint_t ch)
 	assert(othercase(ch) != ch);	/* p_bracket() would recurse */
 	p->next = bracket;
 	memset(&mbs, 0, sizeof(mbs));
-	n = xwcrtomb(bracket, ch, &mbs);
+	n = wcrtomb(bracket, ch, &mbs);
 	assert(n != (size_t)-1);
 	bracket[n] = ']';
 	bracket[n + 1] = '\0';
@@ -991,7 +1390,7 @@ bothcases(struct parse *p, wint_t ch)
 
 /*
  - ordinary - emit an ordinary character
- == static void ordinary(struct parse *p, int ch);
+ == static void ordinary(struct parse *p, wint_t ch);
  */
 static void
 ordinary(struct parse *p, wint_t ch)
@@ -1023,8 +1422,8 @@ ordinary(struct parse *p, wint_t ch)
 static void
 nonnewline(struct parse *p)
 {
-	char *oldnext = p->next;
-	char *oldend = p->end;
+	const char *oldnext = p->next;
+	const char *oldend = p->end;
 	char bracket[4];
 
 	p->next = bracket;
@@ -1119,23 +1518,11 @@ static wint_t
 wgetnext(struct parse *p)
 {
 	mbstate_t mbs;
-	wint_t wc;
+	wchar_t wc;
 	size_t n;
 
-#ifdef __CYGWIN__
-	/* Kludge for more glibc compatibility.  On Cygwin as well as on
-	   Linux, mbrtowc returns -1 if the current local's codeset is ASCII
-	   and the character is >= 0x80.  Nevertheless, glibc's regcomp allows
-	   any char value, even stuff like [\xc0-\xff], if the locale's codeset
-	   is ASCII, so in regcomp it ignores the fact that chars >= 0x80 are
-	   invalid ASCII chars.  To be more Linux-compatible, we align the
-	   behaviour to glibc here.  Allow any character value if the current
-	   local's codeset is ASCII. */
-	if (*__current_locale_charset () == 'A') /* SCII */
-	  return (wint_t) (unsigned char) *p->next++;
-#endif
 	memset(&mbs, 0, sizeof(mbs));
-	n = mbrtowi(&wc, p->next, p->end - p->next, &mbs);
+	n = mbrtowc(&wc, p->next, p->end - p->next, &mbs);
 	if (n == (size_t)-1 || n == (size_t)-2) {
 		SETERROR(REG_ILLSEQ);
 		return (0);
@@ -1146,27 +1533,6 @@ wgetnext(struct parse *p)
 	return (wc);
 }
 
-static size_t
-xwcrtomb (char *s, wint_t wc, mbstate_t *ps)
-{
-  if (sizeof (wchar_t) == 2 && wc >= 0x10000)
-    {
-      /* UTF-16 wcrtomb can't handle these values directly.  The rest of the
-	 code isn't surrogate pair aware, so we handle this here.  Convert
-	 value to UTF-16 surrogate and call wcsrtombs to convert the "string"
-	 to the correct multibyte representation, if any. */
-      wchar_t ws[2];
-      const wchar_t *wsp = ws;
-
-      wc -= 0x10000;
-      ws[0] = 0xd800 | (wc >> 10);
-      ws[1] = 0xdc00 | (wc & 0x3ff);
-      return wcsnrtombs (s, &wsp, 2, MB_CUR_MAX, ps);
-    }
-  return wcrtomb (s, wc, ps);
-}
-
-
 /*
  - seterr - set an error condition
  == static int seterr(struct parse *p, int e);
@@ -1190,7 +1556,7 @@ allocset(struct parse *p)
 {
 	cset *cs, *ncs;
 
-	ncs = realloc(p->g->sets, (p->g->ncsets + 1) * sizeof(*ncs));
+	ncs = reallocarray(p->g->sets, p->g->ncsets + 1, sizeof(*ncs));
 	if (ncs == NULL) {
 		SETERROR(REG_ESPACE);
 		return (NULL);
@@ -1226,16 +1592,16 @@ freeset(struct parse *p, cset *cs)
 static wint_t
 singleton(cset *cs)
 {
-	wint_t i, s = OUT, n;
+	wint_t i, s, n;
 
 	for (i = n = 0; i < NC; i++)
 		if (CHIN(cs, i)) {
 			n++;
 			s = i;
 		}
-	if (n == 1 && cs->nwides == 0)
+	if (n == 1)
 		return (s);
-	if (n == 0 && cs->nwides == 1 && cs->nranges == 0 && cs->ntypes == 0 &&
+	if (cs->nwides == 1 && cs->nranges == 0 && cs->ntypes == 0 &&
 	    cs->icase == 0)
 		return (cs->wides[0]);
 	/* Don't bother handling the other cases. */
@@ -1253,7 +1619,7 @@ CHadd(struct parse *p, cset *cs, wint_t ch)
 	if (ch < NC)
 		cs->bmp[ch >> 3] |= 1 << (ch & 7);
 	else {
-		newwides = realloc(cs->wides, (cs->nwides + 1) *
+		newwides = reallocarray(cs->wides, cs->nwides + 1,
 		    sizeof(*cs->wides));
 		if (newwides == NULL) {
 			SETERROR(REG_ESPACE);
@@ -1282,7 +1648,7 @@ CHaddrange(struct parse *p, cset *cs, wint_t min, wint_t max)
 		CHadd(p, cs, min);
 	if (min >= max)
 		return;
-	newranges = realloc(cs->ranges, (cs->nranges + 1) *
+	newranges = reallocarray(cs->ranges, cs->nranges + 1,
 	    sizeof(*cs->ranges));
 	if (newranges == NULL) {
 		SETERROR(REG_ESPACE);
@@ -1306,7 +1672,7 @@ CHaddtype(struct parse *p, cset *cs, wctype_t wct)
 	for (i = 0; i < NC; i++)
 		if (iswctype(i, wct))
 			CHadd(p, cs, i);
-	newtypes = realloc(cs->types, (cs->ntypes + 1) *
+	newtypes = reallocarray(cs->types, cs->ntypes + 1,
 	    sizeof(*cs->types));
 	if (newtypes == NULL) {
 		SETERROR(REG_ESPACE);
@@ -1331,8 +1697,8 @@ dupl(struct parse *p,
 	assert(finish >= start);
 	if (len == 0)
 		return(ret);
-	enlarge(p, p->ssize + len);	/* this many unexpected additions */
-	assert(p->ssize >= p->slen + len);
+	if (!enlarge(p, p->ssize + len)) /* this many unexpected additions */
+		return(ret);
 	(void) memcpy((char *)(p->strip + p->slen),
 		(char *)(p->strip + start), (size_t)len*sizeof(sop));
 	p->slen += len;
@@ -1359,8 +1725,8 @@ doemit(struct parse *p, sop op, size_t opnd)
 
 	/* deal with undersized strip */
 	if (p->slen >= p->ssize)
-		enlarge(p, (p->ssize+1) / 2 * 3);	/* +50% */
-	assert(p->slen < p->ssize);
+		if (!enlarge(p, (p->ssize+1) / 2 * 3))	/* +50% */
+			return;
 
 	/* finally, it's all reduced to the easy case */
 	p->strip[p->slen++] = SOP(op, opnd);
@@ -1419,23 +1785,24 @@ dofwd(struct parse *p, sopno pos, sop value)
 
 /*
  - enlarge - enlarge the strip
- == static void enlarge(struct parse *p, sopno size);
+ == static int enlarge(struct parse *p, sopno size);
  */
-static void
+static int
 enlarge(struct parse *p, sopno size)
 {
 	sop *sp;
 
 	if (p->ssize >= size)
-		return;
+		return 1;
 
-	sp = (sop *)realloc(p->strip, size*sizeof(sop));
+	sp = reallocarray(p->strip, size, sizeof(sop));
 	if (sp == NULL) {
 		SETERROR(REG_ESPACE);
-		return;
+		return 0;
 	}
 	p->strip = sp;
 	p->ssize = size;
+	return 1;
 }
 
 /*
@@ -1446,7 +1813,7 @@ static void
 stripsnug(struct parse *p, struct re_guts *g)
 {
 	g->nstates = p->slen;
-	g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop));
+	g->strip = reallocarray((char *)p->strip, p->slen, sizeof(sop));
 	if (g->strip == NULL) {
 		SETERROR(REG_ESPACE);
 		g->strip = p->strip;
@@ -1487,11 +1854,7 @@ findmust(struct parse *p, struct re_guts *g)
 	 * UTF-8 (see RFC 3629).
 	 */
 	if (MB_CUR_MAX > 1 &&
-#ifdef __CYGWIN__
-	    strcmp(__current_locale_charset (), "UTF-8") != 0)
-#else
 	    strcmp(_CurrentRuneLocale->__encoding, "UTF-8") != 0)
-#endif
 		return;
 
 	/* find the longest OCHAR sequence in strip */
@@ -1507,7 +1870,7 @@ findmust(struct parse *p, struct re_guts *g)
 				memset(&mbs, 0, sizeof(mbs));
 				newstart = scan - 1;
 			}
-			clen = xwcrtomb(buf, OPND(s), &mbs);
+			clen = wcrtomb(buf, OPND(s), &mbs);
 			if (clen == (size_t)-1)
 				goto toohard;
 			newlen += clen;
@@ -1524,21 +1887,25 @@ findmust(struct parse *p, struct re_guts *g)
 				scan += OPND(s);
 				s = *scan;
 				/* assert() interferes w debug printouts */
-				if (OP(s) != O_QUEST && OP(s) != O_CH &&
-							OP(s) != OOR2) {
+				if (OP(s) != (sop)O_QUEST &&
+				    OP(s) != (sop)O_CH && OP(s) != (sop)OOR2) {
 					g->iflags |= BAD;
 					return;
 				}
-			} while (OP(s) != O_QUEST && OP(s) != O_CH);
-			fallthrough;
+			} while (OP(s) != (sop)O_QUEST && OP(s) != (sop)O_CH);
+			/* FALLTHROUGH */
 		case OBOW:		/* things that break a sequence */
 		case OEOW:
 		case OBOL:
 		case OEOL:
+		case OBOS:
+		case OEOS:
+		case OWBND:
+		case ONWBND:
 		case O_QUEST:
 		case O_CH:
 		case OEND:
-			if (newlen > g->mlen) {		/* ends one */
+			if (newlen > (sopno)g->mlen) {		/* ends one */
 				start = newstart;
 				g->mlen = newlen;
 				if (offset > -1) {
@@ -1553,7 +1920,7 @@ findmust(struct parse *p, struct re_guts *g)
 			newlen = 0;
 			break;
 		case OANY:
-			if (newlen > g->mlen) {		/* ends one */
+			if (newlen > (sopno)g->mlen) {		/* ends one */
 				start = newstart;
 				g->mlen = newlen;
 				if (offset > -1) {
@@ -1571,7 +1938,7 @@ findmust(struct parse *p, struct re_guts *g)
 			break;
 		case OANYOF:		/* may or may not invalidate offset */
 			/* First, everything as OANY */
-			if (newlen > g->mlen) {		/* ends one */
+			if (newlen > (sopno)g->mlen) {		/* ends one */
 				start = newstart;
 				g->mlen = newlen;
 				if (offset > -1) {
@@ -1594,7 +1961,7 @@ findmust(struct parse *p, struct re_guts *g)
 			 * save the last known good offset, in case the
 			 * must sequence doesn't occur later.
 			 */
-			if (newlen > g->mlen) {		/* ends one */
+			if (newlen > (sopno)g->mlen) {		/* ends one */
 				start = newstart;
 				g->mlen = newlen;
 				if (offset > -1)
@@ -1626,7 +1993,7 @@ findmust(struct parse *p, struct re_guts *g)
 	while (cp < g->must + g->mlen) {
 		while (OP(s = *scan++) != OCHAR)
 			continue;
-		clen = xwcrtomb(cp, OPND(s), &mbs);
+		clen = wcrtomb(cp, OPND(s), &mbs);
 		assert(clen != (size_t)-1);
 		cp += clen;
 	}
@@ -1655,7 +2022,7 @@ altoffset(sop *scan, int offset)
 	largest = 0;
 	try = 0;
 	s = *scan++;
-	while (OP(s) != O_QUEST && OP(s) != O_CH) {
+	while (OP(s) != (sop)O_QUEST && OP(s) != (sop)O_CH) {
 		switch (OP(s)) {
 		case OOR1:
 			if (try > largest)
@@ -1671,10 +2038,10 @@ altoffset(sop *scan, int offset)
 			do {
 				scan += OPND(s);
 				s = *scan;
-				if (OP(s) != O_QUEST && OP(s) != O_CH &&
-							OP(s) != OOR2)
+				if (OP(s) != (sop)O_QUEST &&
+				    OP(s) != (sop)O_CH && OP(s) != (sop)OOR2)
 					return -1;
-			} while (OP(s) != O_QUEST && OP(s) != O_CH);
+			} while (OP(s) != (sop)O_QUEST && OP(s) != (sop)O_CH);
 			/* We must skip to the next position, or we'll
 			 * leave altoffset() too early.
 			 */
@@ -1686,6 +2053,8 @@ altoffset(sop *scan, int offset)
 			try++;
 		case OBOW:
 		case OEOW:
+		case OWBND:
+		case ONWBND:
 		case OLPAREN:
 		case ORPAREN:
 		case OOR2:
@@ -1726,7 +2095,7 @@ computejumps(struct parse *p, struct re_guts *g)
 	if (p->error != 0)
 		return;
 
-	g->charjump = (int*) malloc((NC + 1) * sizeof(int));
+	g->charjump = (int *)malloc((NC_MAX + 1) * sizeof(int));
 	if (g->charjump == NULL)	/* Not a fatal error */
 		return;
 	/* Adjust for signed chars, if necessary */
@@ -1775,16 +2144,15 @@ computematchjumps(struct parse *p, struct re_guts *g)
 	if (p->error != 0)
 		return;
 
-	pmatches = (int*) malloc(g->mlen * sizeof(unsigned int));
+	pmatches = (int*) malloc(g->mlen * sizeof(int));
 	if (pmatches == NULL) {
 		g->matchjump = NULL;
 		return;
 	}
 
-	g->matchjump = (int*) malloc(g->mlen * sizeof(unsigned int));
-	if (g->matchjump == NULL)	/* Not a fatal error */
-	{
-		free (pmatches);
+	g->matchjump = (int*) malloc(g->mlen * sizeof(int));
+	if (g->matchjump == NULL) {	/* Not a fatal error */
+		free(pmatches);
 		return;
 	}
 
@@ -1827,7 +2195,7 @@ computematchjumps(struct parse *p, struct re_guts *g)
                         suffix++;
                 }
 		if (suffix < g->mlen)
-			ssuffix = pmatches[ssuffix];
+                	ssuffix = pmatches[ssuffix];
         }
 
 	free(pmatches);
diff --git a/winsup/cygwin/regex/regerror.c b/winsup/cygwin/regex/regerror.c
index 1bba3e4a6f74..0e555784f0dc 100644
--- a/winsup/cygwin/regex/regerror.c
+++ b/winsup/cygwin/regex/regerror.c
@@ -1,4 +1,6 @@
 /*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
  * Copyright (c) 1992, 1993, 1994 Henry Spencer.
  * Copyright (c) 1992, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
@@ -14,7 +16,7 @@
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
@@ -37,7 +39,7 @@
 static char sccsid[] = "@(#)regerror.c	8.4 (Berkeley) 3/20/94";
 #endif /* LIBC_SCCS and not lint */
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/lib/libc/regex/regerror.c,v 1.11 2007/06/11 03:05:54 delphij Exp $");
+__FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <stdio.h>
@@ -54,7 +56,7 @@ extern "C" {
 #endif
 
 /* === regerror.c === */
-static char *regatoi(const regex_t *preg, char *localbuf);
+static const char *regatoi(const regex_t *preg, char *localbuf);
 
 #ifdef __cplusplus
 }
@@ -83,13 +85,8 @@ static char *regatoi(const regex_t *preg, char *localbuf);
  */
 static struct rerr {
 	int code;
-#ifdef __CYGWIN__ /* Avoid whining compiler */
 	const char *name;
 	const char *explain;
-#else
-	char *name;
-	char *explain;
-#endif
 } rerrs[] = {
 	{REG_NOMATCH,	"REG_NOMATCH",	"regexec() failed to match"},
 	{REG_BADPAT,	"REG_BADPAT",	"invalid regular expression"},
@@ -125,11 +122,7 @@ regerror(int errcode,
 	struct rerr *r;
 	size_t len;
 	int target = errcode &~ REG_ITOA;
-#ifdef __CYGWIN__ /* Avoid whining compiler */
 	const char *s;
-#else
-	char *s;
-#endif
 	char convbuf[50];
 
 	if (errcode == REG_ATOI)
@@ -167,7 +160,7 @@ regerror(int errcode,
  - regatoi - internal routine to implement REG_ATOI
  == static char *regatoi(const regex_t *preg, char *localbuf);
  */
-static char *
+static const char *
 regatoi(const regex_t *preg, char *localbuf)
 {
 	struct rerr *r;
@@ -176,14 +169,7 @@ regatoi(const regex_t *preg, char *localbuf)
 		if (strcmp(r->name, preg->re_endp) == 0)
 			break;
 	if (r->code == 0)
-#ifdef __CYGWIN__ /* Avoid whining compiler */
-	    {
-		static char null[] = "0";
-		return null;
-	    }
-#else
 		return("0");
-#endif
 
 	sprintf(localbuf, "%d", r->code);
 	return(localbuf);
diff --git a/winsup/cygwin/regex/regex.3 b/winsup/cygwin/regex/regex.3
index f848d66c3a3c..d22dec1e87f7 100644
--- a/winsup/cygwin/regex/regex.3
+++ b/winsup/cygwin/regex/regex.3
@@ -13,7 +13,7 @@
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
-.\" 4. Neither the name of the University nor the names of its contributors
+.\" 3. Neither the name of the University nor the names of its contributors
 .\"    may be used to endorse or promote products derived from this software
 .\"    without specific prior written permission.
 .\"
@@ -30,9 +30,9 @@
 .\" SUCH DAMAGE.
 .\"
 .\"	@(#)regex.3	8.4 (Berkeley) 3/20/94
-.\" $FreeBSD: src/lib/libc/regex/regex.3,v 1.21 2007/01/09 00:28:04 imp Exp $
+.\" $FreeBSD$
 .\"
-.Dd August 17, 2005
+.Dd April 15, 2017
 .Dt REGEX 3
 .Os
 .Sh NAME
@@ -183,6 +183,17 @@ compatible with but not specified by
 .St -p1003.2 ,
 and should be used with
 caution in software intended to be portable to other systems.
+.It Dv REG_POSIX
+Compile only
+.St -p1003.2
+compliant expressions.
+This flag has no effect unless linking against
+.Nm libregex .
+This is an extension,
+compatible with but not specified by
+.St -p1003.2 ,
+and should be used with
+caution in software intended to be portable to other systems.
 .El
 .Pp
 When successful,
@@ -235,11 +246,16 @@ The
 argument is the bitwise OR of zero or more of the following flags:
 .Bl -tag -width REG_STARTEND
 .It Dv REG_NOTBOL
-The first character of
-the string
-is not the beginning of a line, so the
-.Ql ^\&
-anchor should not match before it.
+The first character of the string is treated as the continuation
+of a line.
+This means that the anchors
+.Ql ^\& ,
+.Ql [[:<:]] ,
+and
+.Ql \e<
+do not match before it; but see
+.Dv REG_STARTEND
+below.
 This does not affect the behavior of newlines under
 .Dv REG_NEWLINE .
 .It Dv REG_NOTEOL
@@ -247,19 +263,16 @@ The NUL terminating
 the string
 does not end a line, so the
 .Ql $\&
-anchor should not match before it.
+anchor does not match before it.
 This does not affect the behavior of newlines under
 .Dv REG_NEWLINE .
 .It Dv REG_STARTEND
 The string is considered to start at
-.Fa string
-+
-.Fa pmatch Ns [0]. Ns Va rm_so
-and to have a terminating NUL located at
-.Fa string
-+
-.Fa pmatch Ns [0]. Ns Va rm_eo
-(there need not actually be a NUL at that location),
+.Fa string No +
+.Fa pmatch Ns [0]. Ns Fa rm_so
+and to end before the byte located at
+.Fa string No +
+.Fa pmatch Ns [0]. Ns Fa rm_eo ,
 regardless of the value of
 .Fa nmatch .
 See below for the definition of
@@ -271,13 +284,37 @@ compatible with but not specified by
 .St -p1003.2 ,
 and should be used with
 caution in software intended to be portable to other systems.
-Note that a non-zero
-.Va rm_so
-does not imply
-.Dv REG_NOTBOL ;
-.Dv REG_STARTEND
-affects only the location of the string,
-not how it is matched.
+.Pp
+Without
+.Dv REG_NOTBOL ,
+the position
+.Fa rm_so
+is considered the beginning of a line, such that
+.Ql ^
+matches before it, and the beginning of a word if there is a word
+character at this position, such that
+.Ql [[:<:]]
+and
+.Ql \e<
+match before it.
+.Pp
+With
+.Dv REG_NOTBOL ,
+the character at position
+.Fa rm_so
+is treated as the continuation of a line, and if
+.Fa rm_so
+is greater than 0, the preceding character is taken into consideration.
+If the preceding character is a newline and the regular expression was compiled
+with
+.Dv REG_NEWLINE ,
+.Ql ^
+matches before the string; if the preceding character is not a word character
+but the string starts with a word character,
+.Ql [[:<:]]
+and
+.Ql \e<
+match before the string.
 .El
 .Pp
 See
@@ -420,10 +457,12 @@ it should have been the result from the most recent
 using that
 .Ft regex_t .
 The
-.Fn ( regerror
+.Po
+.Fn regerror
 may be able to supply a more detailed message using information
 from the
-.Ft regex_t . )
+.Ft regex_t .
+.Pc
 The
 .Fn regerror
 function
diff --git a/winsup/cygwin/regex/regex.7 b/winsup/cygwin/regex/regex.7
index 79fecc197519..8380fd89e222 100644
--- a/winsup/cygwin/regex/regex.7
+++ b/winsup/cygwin/regex/regex.7
@@ -13,6 +13,10 @@
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\"    must display the following acknowledgement:
+.\"	This product includes software developed by the University of
+.\"	California, Berkeley and its contributors.
 .\" 4. Neither the name of the University nor the names of its contributors
 .\"    may be used to endorse or promote products derived from this software
 .\"    without specific prior written permission.
@@ -30,9 +34,9 @@
 .\" SUCH DAMAGE.
 .\"
 .\"	@(#)re_format.7	8.3 (Berkeley) 3/20/94
-.\" $FreeBSD: src/lib/libc/regex/re_format.7,v 1.12 2008/09/05 17:41:20 keramida Exp $
+.\" $FreeBSD$
 .\"
-.Dd March 20, 1994
+.Dd June 30, 2014
 .Dt RE_FORMAT 7
 .Os
 .Sh NAME
@@ -271,7 +275,6 @@ and
 stands for the list of all characters belonging to that
 class.
 Standard character class names are:
-.Pp
 .Bl -column "alnum" "digit" "xdigit" -offset indent
 .It Em "alnum	digit	punct"
 .It Em "alpha	graph	space"
@@ -311,6 +314,13 @@ compatible with but not specified by
 .St -p1003.2 ,
 and should be used with
 caution in software intended to be portable to other systems.
+The additional word delimiters
+.Ql \e<
+and
+.Ql \e>
+are provided to ease compatibility with traditional
+SVR4
+systems but are not portable and should be avoided.
 .Pp
 In the event that an RE could match more than one substring of a given
 string,
@@ -382,10 +392,12 @@ and
 .Ql ?\&
 are ordinary characters, and their functionality
 can be expressed using bounds
-.No ( Ql {1,}
+.Po
+.Ql {1,}
 or
 .Ql {0,1}
-respectively).
+respectively
+.Pc .
 Also note that
 .Ql x+
 in modern REs is equivalent to
diff --git a/winsup/cygwin/regex/regex2.h b/winsup/cygwin/regex/regex2.h
index b9a35a45fd30..38bbed90fd8f 100644
--- a/winsup/cygwin/regex/regex2.h
+++ b/winsup/cygwin/regex/regex2.h
@@ -1,4 +1,6 @@
 /*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
  * Copyright (c) 1992, 1993, 1994 Henry Spencer.
  * Copyright (c) 1992, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
@@ -14,7 +16,7 @@
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
@@ -31,21 +33,21 @@
  * SUCH DAMAGE.
  *
  *	@(#)regex2.h	8.4 (Berkeley) 3/20/94
- * $FreeBSD: src/lib/libc/regex/regex2.h,v 1.11 2007/01/09 00:28:04 imp Exp $
+ * $FreeBSD$
  */
 
 /*
  * First, the stuff that ends up in the outside-world include file
  = typedef off_t regoff_t;
  = typedef struct {
- =	int re_magic;
- =	size_t re_nsub;		// number of parenthesized subexpressions
- =	const char *re_endp;	// end pointer for REG_PEND
- =	struct re_guts *re_g;	// none of your business :-)
+ = 	int re_magic;
+ = 	size_t re_nsub;		// number of parenthesized subexpressions
+ = 	const char *re_endp;	// end pointer for REG_PEND
+ = 	struct re_guts *re_g;	// none of your business :-)
  = } regex_t;
  = typedef struct {
- =	regoff_t rm_so;		// start of match
- =	regoff_t rm_eo;		// end of match
+ = 	regoff_t rm_so;		// start of match
+ = 	regoff_t rm_eo;		// end of match
  = } regmatch_t;
  */
 /*
@@ -73,7 +75,7 @@
  * immediately *preceding* "execution" of that operator.
  */
 typedef unsigned long sop;	/* strip operator */
-typedef long sopno;
+typedef unsigned long sopno;
 #define	OPRMASK	0xf8000000L
 #define	OPDMASK	0x07ffffffL
 #define	OPSHIFT	((unsigned)27)
@@ -102,6 +104,10 @@ typedef long sopno;
 #define	O_CH	(18L<<OPSHIFT)	/* end choice	back to OOR1		*/
 #define	OBOW	(19L<<OPSHIFT)	/* begin word	-			*/
 #define	OEOW	(20L<<OPSHIFT)	/* end word	-			*/
+#define	OBOS	(21L<<OPSHIFT)	/* begin subj.  -			*/
+#define	OEOS	(22L<<OPSHIFT)	/* end subj.	-			*/
+#define	OWBND	(23L<<OPSHIFT)	/* word bound	-			*/
+#define	ONWBND	(24L<<OPSHIFT)	/* not bound	-			*/
 
 /*
  * Structures for [] character-set representation.
@@ -111,13 +117,13 @@ typedef struct {
 	wint_t		max;
 } crange;
 typedef struct {
-	unsigned char	bmp[NC / 8];
+	unsigned char	bmp[NC_MAX / 8];
 	wctype_t	*types;
-	int		ntypes;
+	unsigned int	ntypes;
 	wint_t		*wides;
-	int		nwides;
+	unsigned int	nwides;
 	crange		*ranges;
-	int		nranges;
+	unsigned int	nranges;
 	int		invert;
 	int		icase;
 } cset;
@@ -125,15 +131,20 @@ typedef struct {
 static int
 CHIN1(cset *cs, wint_t ch)
 {
-	int i;
+	unsigned int i;
 
 	assert(ch >= 0);
 	if (ch < NC)
 		return (((cs->bmp[ch >> 3] & (1 << (ch & 7))) != 0) ^
 		    cs->invert);
-	for (i = 0; i < cs->nwides; i++)
-		if (ch == cs->wides[i])
+	for (i = 0; i < cs->nwides; i++) {
+		if (cs->icase) {
+			if (ch == towlower(cs->wides[i]) ||
+			    ch == towupper(cs->wides[i]))
+				return (!cs->invert);
+		} else if (ch == cs->wides[i])
 			return (!cs->invert);
+	}
 	for (i = 0; i < cs->nranges; i++)
 		if (cs->ranges[i].min <= ch && ch <= cs->ranges[i].max)
 			return (!cs->invert);
@@ -151,14 +162,10 @@ CHIN(cset *cs, wint_t ch)
 	if (ch < NC)
 		return (((cs->bmp[ch >> 3] & (1 << (ch & 7))) != 0) ^
 		    cs->invert);
-	else if (cs->icase) {
-		if (cs->invert)
-			return (CHIN1(cs, ch) && CHIN1(cs, towlower(ch)) &&
-			    CHIN1(cs, towupper(ch)));
-		else
-			return (CHIN1(cs, ch) || CHIN1(cs, towlower(ch)) ||
-			    CHIN1(cs, towupper(ch)));
-	} else
+	else if (cs->icase)
+		return (CHIN1(cs, ch) || CHIN1(cs, towlower(ch)) ||
+		    CHIN1(cs, towupper(ch)));
+	else
 		return (CHIN1(cs, ch));
 }
 
@@ -169,7 +176,7 @@ struct re_guts {
 	int magic;
 #		define	MAGIC2	((('R'^0200)<<8)|'E')
 	sop *strip;		/* malloced area for strip */
-	int ncsets;		/* number of csets in use */
+	unsigned int ncsets;	/* number of csets in use */
 	cset *sets;		/* -> cset [ncsets] */
 	int cflags;		/* copy of regcomp() cflags argument */
 	sopno nstates;		/* = number of sops */
@@ -193,4 +200,5 @@ struct re_guts {
 
 /* misc utilities */
 #define	OUT	(CHAR_MIN - 1)	/* a non-character value */
-#define ISWORD(c)       (iswalnum((wint_t)(c)) || (c) == '_')
+#define	IGN	(CHAR_MIN - 2)
+#define ISWORD(c)       (iswalnum((uch)(c)) || (c) == '_')
diff --git a/winsup/cygwin/regex/regexec.c b/winsup/cygwin/regex/regexec.c
index 94e95e65abcd..d7aa46f45b2b 100644
--- a/winsup/cygwin/regex/regexec.c
+++ b/winsup/cygwin/regex/regexec.c
@@ -1,4 +1,6 @@
 /*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
  * Copyright (c) 1992, 1993, 1994 Henry Spencer.
  * Copyright (c) 1992, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
@@ -14,7 +16,7 @@
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
@@ -37,7 +39,7 @@
 static char sccsid[] = "@(#)regexec.c	8.3 (Berkeley) 3/20/94";
 #endif /* LIBC_SCCS and not lint */
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/lib/libc/regex/regexec.c,v 1.8 2007/06/11 03:05:54 delphij Exp $");
+__FBSDID("$FreeBSD$");
 
 /*
  * the outer shell of regexec()
@@ -46,9 +48,6 @@ __FBSDID("$FreeBSD: src/lib/libc/regex/regexec.c,v 1.8 2007/06/11 03:05:54 delph
  * macros that code uses.  This lets the same code operate on two different
  * representations for state sets and characters.
  */
-#ifdef __CYGWIN__
-#include "winsup.h"
-#endif
 #include <sys/types.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -68,9 +67,9 @@ static __inline size_t
 xmbrtowc(wint_t *wi, const char *s, size_t n, mbstate_t *mbs, wint_t dummy)
 {
 	size_t nr;
-	wint_t wc;
+	wchar_t wc;
 
-	nr = mbrtowi(&wc, s, n, mbs);
+	nr = mbrtowc(&wc, s, n, mbs);
 	if (wi != NULL)
 		*wi = wc;
 	if (nr == 0)
@@ -98,8 +97,8 @@ xmbrtowc_dummy(wint_t *wi,
 }
 
 /* macros for manipulating states, small version */
-#define	states	long
-#define	states1	states		/* for later use in regexec() decision */
+#define	states1	long		/* for later use in regexec() decision */
+#define	states	states1
 #define	CLEAR(v)	((v) = 0)
 #define	SET0(v, n)	((v) &= ~((unsigned long)1 << (n)))
 #define	SET1(v, n)	((v) |= (unsigned long)1 << (n))
@@ -228,9 +227,9 @@ regexec(const regex_t * __restrict preg,
 	eflags = GOODFLAGS(eflags);
 
 	if (MB_CUR_MAX > 1)
-		return(mmatcher(g, (char *)string, nmatch, pmatch, eflags));
+		return(mmatcher(g, string, nmatch, pmatch, eflags));
 	else if (g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags&REG_LARGE))
-		return(smatcher(g, (char *)string, nmatch, pmatch, eflags));
+		return(smatcher(g, string, nmatch, pmatch, eflags));
 	else
-		return(lmatcher(g, (char *)string, nmatch, pmatch, eflags));
+		return(lmatcher(g, string, nmatch, pmatch, eflags));
 }
diff --git a/winsup/cygwin/regex/regfree.c b/winsup/cygwin/regex/regfree.c
index aa795fa783dc..859c2a4b37b2 100644
--- a/winsup/cygwin/regex/regfree.c
+++ b/winsup/cygwin/regex/regfree.c
@@ -1,4 +1,6 @@
 /*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
  * Copyright (c) 1992, 1993, 1994 Henry Spencer.
  * Copyright (c) 1992, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
@@ -14,7 +16,7 @@
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
@@ -37,7 +39,7 @@
 static char sccsid[] = "@(#)regfree.c	8.3 (Berkeley) 3/20/94";
 #endif /* LIBC_SCCS and not lint */
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/lib/libc/regex/regfree.c,v 1.8 2007/06/11 03:05:54 delphij Exp $");
+__FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <stdio.h>
@@ -58,7 +60,7 @@ void
 regfree(regex_t *preg)
 {
 	struct re_guts *g;
-	int i;
+	unsigned int i;
 
 	if (preg->re_magic != MAGIC1)	/* oops */
 		return;			/* nice to complain, but hard */
diff --git a/winsup/cygwin/regex/utils.h b/winsup/cygwin/regex/utils.h
index 2a2ed9694de5..72f2286a0260 100644
--- a/winsup/cygwin/regex/utils.h
+++ b/winsup/cygwin/regex/utils.h
@@ -1,4 +1,6 @@
 /*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
  * Copyright (c) 1992, 1993, 1994 Henry Spencer.
  * Copyright (c) 1992, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
@@ -14,7 +16,7 @@
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
@@ -31,13 +33,15 @@
  * SUCH DAMAGE.
  *
  *	@(#)utils.h	8.3 (Berkeley) 3/20/94
- * $FreeBSD: src/lib/libc/regex/utils.h,v 1.3 2007/01/09 00:28:04 imp Exp $
+ * $FreeBSD$
  */
 
 /* utility definitions */
 #define	DUPMAX		_POSIX2_RE_DUP_MAX	/* xxx is this right? */
 #define	INFINITY	(DUPMAX + 1)
-#define	NC		(CHAR_MAX - CHAR_MIN + 1)
+
+#define	NC_MAX		(CHAR_MAX - CHAR_MIN + 1)
+#define	NC		((MB_CUR_MAX) == 1 ? (NC_MAX) : (128))
 typedef unsigned char uch;
 
 /* switch off assertions (if not already off) if no REDEBUG */

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-03-16 12:54 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-03-16 12:54 [newlib-cygwin/main] Cygwin: replace regex with latest verbatim FreeBSD version Corinna Vinschen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).