#include #include #include #include #include #include #include #include #include #include #include static std::vector get_locales() { FILE *fp = popen("locale -a", "r"); if (fp == NULL) err(1, "running locale -a"); std::vector result; while (!feof(fp)) { char *elem{}; int ret = fscanf(fp, "%ms", &elem); if (ret == 1) { if (elem == nullptr) errx(1, "invalid fscanf result"); result.emplace_back(elem); free(elem); } else if (ferror(fp)) err(1, "fscanf failed"); } int ret = pclose(fp); if (ret != 0) err(1, "locale -a failed with status %d", ret); std::sort(result.begin(), result.end()); return result; } static void test_regexp_range(const char *locale, const char *pattern, std::pair range) { regex_t reg; { int ret = regcomp(®, pattern, REG_EXTENDED | REG_NOSUB); if (ret != 0) errx(1, "Cannot compile regular expression /%s/: %d", pattern, ret); } const wchar_t maximum_character = 0x10FFFF; const unsigned maximum_length = 5; /* With NUL. */ for (wchar_t ch = 1; ch <= maximum_character; ++ch) { char uch[MB_LEN_MAX]; mbstate_t ps{}; { size_t ret = wcrtomb(uch, ch, &ps); if (ret == static_cast(-1)) { if (errno == EILSEQ) continue; err(1, "wcrtomb(0x%x)", static_cast(ch)); } else if (ret == 0) continue; // Some anomaly. if (ret >= maximum_length) errx(1, "multi-byte length %zu at 0x%x exceeds %u", ret, ch, maximum_length); uch[ret] = '\0'; } int ret = regexec(®, uch, 0, NULL, 0); if (ret != 0 && ret != REG_NOMATCH) errx(1, "regexec of /%s/ failed with code %d", pattern, ret); bool regex_matches = ret == 0; bool range_matches = range.first <= ch && ch <= range.second; if (regex_matches != range_matches) { if (regex_matches) printf("%s: U+%06X matches /%s/ unexpectedly\n", locale, static_cast(ch), pattern); else printf("%s: U+%06X fails to match /%s/\n", locale, static_cast(ch), pattern); } } regfree(®); } int main() { std::vector locales{get_locales()}; for (const auto &locale : locales) { if (setlocale(LC_ALL, locale.c_str()) == NULL) err(1, "Cannot set locale to %s", locale.c_str()); test_regexp_range(locale.c_str(), "[0-9]", std::make_pair(L'0', L'9')); test_regexp_range(locale.c_str(), "[a-z]", std::make_pair(L'a', L'z')); test_regexp_range(locale.c_str(), "[A-Z]", std::make_pair(L'A', L'Z')); } }