public inbox for cygwin-cvs@sourceware.org
help / color / mirror / Atom feed
* [newlib-cygwin/main] Cygwin: linux-locale-helpers: helper tools to generate locale data from Linux
@ 2023-02-20 22:01 Corinna Vinschen
0 siblings, 0 replies; only message in thread
From: Corinna Vinschen @ 2023-02-20 22:01 UTC (permalink / raw)
To: cygwin-cvs
https://sourceware.org/git/gitweb.cgi?p=newlib-cygwin.git;h=4ab778df242efdc364b9a42b225c071e0ecc3cb7
commit 4ab778df242efdc364b9a42b225c071e0ecc3cb7
Author: Corinna Vinschen <corinna@vinschen.de>
AuthorDate: Mon Feb 20 23:00:04 2023 +0100
Commit: Corinna Vinschen <corinna@vinschen.de>
CommitDate: Mon Feb 20 23:00:04 2023 +0100
Cygwin: linux-locale-helpers: helper tools to generate locale data from Linux
Signed-off-by: Corinna Vinschen <corinna@vinschen.de>
Diff:
---
winsup/cygwin/linux-locale-helpers/README | 10 +
.../fetch-lc_collate-elements-from-glibc | 61 ++++++
.../fetch-lc_messages-from-linux.c | 169 +++++++++++++++
.../fetch-lc_time_era-from-linux.c | 237 +++++++++++++++++++++
4 files changed, 477 insertions(+)
diff --git a/winsup/cygwin/linux-locale-helpers/README b/winsup/cygwin/linux-locale-helpers/README
new file mode 100644
index 000000000000..2489416bc877
--- /dev/null
+++ b/winsup/cygwin/linux-locale-helpers/README
@@ -0,0 +1,10 @@
+These scripts and helper applications are used to create locale data
+required for complete locale support, but either missing in Windows
+or implemented in a non-POSIXy way.
+
+The script has to run from inside a glibc git clone.
+The C tools can be built without any special options.
+
+All three tools generate the new locale headers (lc_collelem.h,
+lc_era.h, lc_msg.h) in the current working directory. They can just
+be copied to local_includes and commited without further changes.
diff --git a/winsup/cygwin/linux-locale-helpers/fetch-lc_collate-elements-from-glibc b/winsup/cygwin/linux-locale-helpers/fetch-lc_collate-elements-from-glibc
new file mode 100755
index 000000000000..a0ff0e62f15f
--- /dev/null
+++ b/winsup/cygwin/linux-locale-helpers/fetch-lc_collate-elements-from-glibc
@@ -0,0 +1,61 @@
+#!/bin/bash
+#
+# SPDX-License-Identifier: BSD-2-Clause
+#
+# Assuming
+#
+# git clone https://sourceware.org/git/glibc.git
+# cd glibc
+#
+topdir="$(git rev-parse --show-toplevel)"
+if [ -z "${topdir}" ]
+then
+ echo "Not a git dir? Exit."
+ exit 1
+fi
+cd "${topdir}"
+glibc_conf="$(grep 'GNU C Library' configure.ac)"
+if [ -z "${glibc_conf}" ]
+then
+ echo "No GLibc configure.ac? Wrong git repo? Exit."
+ exit 1
+fi
+if [ ! -f version.h ]
+then
+ echo "No version.h file? Exit."
+ exit 1
+fi
+version=$(sed -n -e 's/#define VERSION "\(.*\)"/\1/p' version.h)
+if [ -z "${version}" ]
+then
+ echo "Malformed version.h file. Exit."
+ exit 1
+fi
+if [ ! -d localedata/locales ]
+then
+ echo "No localedata/locales subdir. Broken repo? Exit."
+ exit 1
+fi
+(
+ cd localedata/locales
+ cat <<-EOF
+ /* This struct of collating elements data has been generated by fetching
+ locale data from a GLibc ${version} source dir on $(date +%F). */
+ struct collating_element_t
+ {
+ const char32_t *element;
+ const char *locale;
+ };
+
+ collating_element_t collating_element[] =
+ {
+ EOF
+ grep -r collating-element * \
+ | sed -e 's#^\([^:]*\):collating-element[ \t]*\([^ \t]*\)[ \t]*from[ \t]*"\(.*\)".*$# { U"\3", "\1" }, /* \2 */#
+ s/<U\([[:xdigit:]]\{4\}\)>/\\U0000\1/g
+ s/<U\([[:xdigit:]]\{5\}\)>/\\U000\1/g
+ s/<U\([[:xdigit:]]\{6\}\)>/\\U00\1/g
+ s/iso14651_t1_common//g' \
+ | sort
+ echo "};"
+) > lc_collelem.h
diff --git a/winsup/cygwin/linux-locale-helpers/fetch-lc_messages-from-linux.c b/winsup/cygwin/linux-locale-helpers/fetch-lc_messages-from-linux.c
new file mode 100644
index 000000000000..03755c6aa7d8
--- /dev/null
+++ b/winsup/cygwin/linux-locale-helpers/fetch-lc_messages-from-linux.c
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: BSD-2-Clause */
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <dirent.h>
+#include <time.h>
+#include <locale.h>
+#include <langinfo.h>
+#include <wchar.h>
+
+struct lc_msg_t {
+ char locale[64];
+ char yesstr[256];
+ char nostr[256];
+ char yesexpr[256];
+ char noexpr[256];
+} msg[512];
+int mcnt = 0;
+
+char *
+xfrm_utf (const wchar_t *ws)
+{
+ static char xfrm[256];
+ char *p = xfrm;
+ int wconst = 0;
+
+ while (*ws)
+ {
+ if (*ws < 0x80 && (!wconst || !wcschr (L"aAbBcCdDeEfF", *ws)))
+ {
+ *p++ = *ws;
+ wconst = 0;
+ }
+ else
+ {
+ p += sprintf (p, "\\x%04lx", *ws);
+ wconst = 1;
+ }
+ ++ws;
+ }
+ *p = '\0';
+ return xfrm;
+}
+
+void
+read_locale_messages (char *name)
+{
+ char *nl;
+ char locale[64];
+ wchar_t nlbuf[256];
+
+ strcpy (locale, name);
+ nl = strchr (locale, '@');
+ if (nl)
+ stpcpy (stpcpy (nl, ".utf8"), strchr (name, '@'));
+ else
+ strcat (locale, ".utf8");
+ printf ("%s\n", locale);
+ setlocale (LC_ALL, locale);
+
+ strcpy (msg[mcnt].locale, name);
+ nl = nl_langinfo (YESSTR);
+ mbstowcs (nlbuf, nl, 256);
+ strcpy (msg[mcnt].yesstr, xfrm_utf (nlbuf));
+ nl = nl_langinfo (NOSTR);
+ mbstowcs (nlbuf, nl, 256);
+ strcpy (msg[mcnt].nostr, xfrm_utf (nlbuf));
+ nl = nl_langinfo (YESEXPR);
+ mbstowcs (nlbuf, nl, 256);
+ strcpy (msg[mcnt].yesexpr, xfrm_utf (nlbuf));
+ nl = nl_langinfo (NOEXPR);
+ mbstowcs (nlbuf, nl, 256);
+ strcpy (msg[mcnt].noexpr, xfrm_utf (nlbuf));
+ /* Serbian locale rename weirdness */
+ if (!strncmp (msg[mcnt].locale, "sr_RS", 5))
+ {
+ /* Create additional equivalent entry for the old locale sr_SP. */
+ ++mcnt;
+ memcpy (&msg[mcnt], &msg[mcnt - 1], sizeof msg[mcnt]);
+ msg[mcnt].locale[3] = 'S';
+ msg[mcnt].locale[4] = 'P';
+ /* Create additional equivalent entry for sr_ME@latin missing in Linux. */
+ if (!strcmp (msg[mcnt].locale, "sr_SP@latin"))
+ {
+ ++mcnt;
+ memcpy (&msg[mcnt], &msg[mcnt - 1], sizeof msg[mcnt]);
+ msg[mcnt].locale[3] = 'M';
+ msg[mcnt].locale[4] = 'E';
+ }
+ }
+ ++mcnt;
+}
+
+int
+locale_cmp (const void *a, const void *b)
+{
+ struct lc_msg_t *la = (struct lc_msg_t *) a;
+ struct lc_msg_t *lb = (struct lc_msg_t *) b;
+ return strcmp (la->locale, lb->locale);
+}
+
+void
+create_list ()
+{
+ FILE *fp = fopen ("lc_msg.h", "w");
+ FILE *pp = popen ("rpm -q glibc", "r");
+ char vers[64];
+ int i;
+ struct tm *tm;
+ time_t tim;
+ char tstr[64];
+
+ fgets (vers, 64, pp);
+ pclose (pp);
+ if (strchr (vers, '\n'))
+ *strchr (vers, '\n') = '\0';
+ tim = time (NULL);
+ tm = gmtime (&tim);
+ strftime (tstr, 64, "%F", tm);
+ fprintf (fp,
+"/* This struct of LC_MESSAGES data has been generated by fetching locale\n"
+" data from a Linux system using %s on %s. */\n"
+"\n"
+"struct lc_msg_t\n"
+"{\n"
+" const char *locale;\n"
+" const wchar_t *yesexpr;\n"
+" const wchar_t *noexpr;\n"
+" const wchar_t *yesstr;\n"
+" const wchar_t *nostr;\n"
+"};\n"
+"\n"
+"static struct lc_msg_t lc_msg[] =\n"
+"{\n", vers, tstr);
+
+ qsort (msg, mcnt, sizeof (struct lc_msg_t), locale_cmp);
+ for (i = 0; i < mcnt; ++i)
+ fprintf (fp, " { \"%s\", L\"%s\", L\"%s\", L\"%s\", L\"%s\" },\n",
+ msg[i].locale,
+ msg[i].yesexpr, msg[i].noexpr,
+ msg[i].yesstr, msg[i].nostr);
+ fputs ("};\n", fp);
+ fclose (fp);
+}
+
+int
+main ()
+{
+ char name[32], *c;
+ FILE *pp;
+
+ pp = popen ("locale -a | grep -a '_' | fgrep -v .", "r");
+ if (!pp)
+ {
+ perror ("popen failed");
+ return 1;
+ }
+ while (fgets (name, 32, pp))
+ {
+ c = strchr (name, '\n');
+ if (c)
+ *c = '\0';
+ read_locale_messages (name);
+ }
+ pclose (pp);
+ create_list ();
+ return 0;
+}
diff --git a/winsup/cygwin/linux-locale-helpers/fetch-lc_time_era-from-linux.c b/winsup/cygwin/linux-locale-helpers/fetch-lc_time_era-from-linux.c
new file mode 100644
index 000000000000..1ee75ca6ba71
--- /dev/null
+++ b/winsup/cygwin/linux-locale-helpers/fetch-lc_time_era-from-linux.c
@@ -0,0 +1,237 @@
+/* SPDX-License-Identifier: BSD-2-Clause */
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <dirent.h>
+#include <time.h>
+#include <locale.h>
+#include <langinfo.h>
+#include <wchar.h>
+
+struct lc_era_t {
+ char locale[64];
+ char *date_fmt;
+ char *d_fmt;
+ char *d_t_fmt;
+ char *t_fmt;
+ char *t_fmt_ampm;
+ char *era;
+ char *era_d_fmt;
+ char *era_d_t_fmt;
+ char *era_t_fmt;
+ char *alt_digits;
+} era[512];
+int ecnt = 0;
+
+char *
+xfrm_utf (const wchar_t *ws, int slist)
+{
+ static char xfrm[4096];
+ char *p = xfrm;
+ int wconst = 0;
+
+ while (*ws)
+ {
+ if (*ws < 0x80 && (!wconst || !wcschr (L"aAbBcCdDeEfF", *ws)))
+ {
+ *p++ = *ws;
+ wconst = 0;
+ }
+ else
+ {
+ p += sprintf (p, "\\x%04lx", *ws);
+ wconst = 1;
+ }
+ ++ws;
+ if (!*ws && slist)
+ {
+ ++ws;
+ if (*ws)
+ p += sprintf (p, ";");
+ }
+ }
+ *p = '\0';
+ return xfrm;
+}
+
+char *
+xfrm_slist (const char *slist)
+{
+ static wchar_t wxfrm[4096], *wp;
+ char *xfrm, *p, *ret;
+
+ wp = wxfrm;
+ while (*slist)
+ {
+ size_t len = mbstowcs (wp, slist, wxfrm + 4096 - wp) + 1;
+ slist += strlen (slist) + 1;
+ wp += len;
+ }
+ *wp++ = L'\0';
+ xfrm = xfrm_utf (wxfrm, 1);
+ p = xfrm;
+ while (*p)
+ p += strlen (p) + 1;
+ ++p;
+ ret = (char *) malloc (p - xfrm);
+ memcpy (ret, xfrm, p - xfrm);
+ return ret;
+}
+
+void
+read_locale_era (char *name)
+{
+ char *nl, *nlera, *altd;
+ char locale[64];
+ wchar_t nlbuf[256];
+
+ strcpy (locale, name);
+ nl = strchr (locale, '@');
+ if (nl)
+ stpcpy (stpcpy (nl, ".utf8"), strchr (name, '@'));
+ else
+ strcat (locale, ".utf8");
+ printf ("%s\n", locale);
+ setlocale (LC_ALL, locale);
+
+ nlera = nl_langinfo (ERA);
+ altd = nl_langinfo (ALT_DIGITS);
+
+ if (!*nlera && !*altd)
+ return;
+
+ strcpy (era[ecnt].locale, name);
+ nl = nl_langinfo (_DATE_FMT);
+ mbstowcs (nlbuf, nl, 256);
+ era[ecnt].date_fmt = strdup (xfrm_utf (nlbuf, 0));
+ nl = nl_langinfo (D_FMT);
+ mbstowcs (nlbuf, nl, 256);
+ era[ecnt].d_fmt = strdup (xfrm_utf (nlbuf, 0));
+ nl = nl_langinfo (D_T_FMT);
+ mbstowcs (nlbuf, nl, 256);
+ era[ecnt].d_t_fmt = strdup (xfrm_utf (nlbuf, 0));
+ nl = nl_langinfo (T_FMT);
+ mbstowcs (nlbuf, nl, 256);
+ era[ecnt].t_fmt = strdup (xfrm_utf (nlbuf, 0));
+ nl = nl_langinfo (T_FMT_AMPM);
+ mbstowcs (nlbuf, nl, 256);
+ era[ecnt].t_fmt_ampm = strdup (xfrm_utf (nlbuf, 0));
+
+ era[ecnt].era = *nlera ? xfrm_slist (nlera) : "\0";
+ era[ecnt].alt_digits = *altd ? xfrm_slist (altd) : "\0";
+
+ nl = nl_langinfo (ERA_D_FMT);
+ mbstowcs (nlbuf, nl, 256);
+ era[ecnt].era_d_fmt = strdup (xfrm_utf (nlbuf, 0));
+ nl = nl_langinfo (ERA_D_T_FMT);
+ mbstowcs (nlbuf, nl, 256);
+ era[ecnt].era_d_t_fmt = strdup (xfrm_utf (nlbuf, 0));
+ nl = nl_langinfo (ERA_T_FMT);
+ mbstowcs (nlbuf, nl, 256);
+ era[ecnt].era_t_fmt = strdup (xfrm_utf (nlbuf, 0));
+ /* Serbian locale rename weirdness */
+ if (!strncmp (era[ecnt].locale, "sr_RS", 5))
+ {
+ /* Create additional equivalent entries for the old locale sr_SP. */
+ ++ecnt;
+ memcpy (&era[ecnt], &era[ecnt - 1], sizeof era[ecnt]);
+ era[ecnt].locale[3] = 'S';
+ era[ecnt].locale[4] = 'P';
+ /* Create additional equivalent entry for sr_ME@latin missing in Linux. */
+ if (!strcmp (era[ecnt].locale, "sr_SP@latin"))
+ {
+ ++ecnt;
+ memcpy (&era[ecnt], &era[ecnt - 1], sizeof era[ecnt]);
+ era[ecnt].locale[3] = 'M';
+ era[ecnt].locale[4] = 'E';
+ }
+ }
+ ++ecnt;
+}
+
+int
+locale_cmp (const void *a, const void *b)
+{
+ struct lc_era_t *la = (struct lc_era_t *) a;
+ struct lc_era_t *lb = (struct lc_era_t *) b;
+ return strcmp (la->locale, lb->locale);
+}
+
+void
+create_list ()
+{
+ FILE *fp = fopen ("lc_era.h", "w");
+ FILE *pp = popen ("rpm -q glibc", "r");
+ char vers[64];
+ int i;
+ struct tm *tm;
+ time_t tim;
+ char tstr[64];
+
+ fgets (vers, 64, pp);
+ pclose (pp);
+ if (strchr (vers, '\n'))
+ *strchr (vers, '\n') = '\0';
+ tim = time (NULL);
+ tm = gmtime (&tim);
+ strftime (tstr, 64, "%F", tm);
+ fprintf (fp,
+"/* This struct of LC_TIME ERA data has been generated by fetching locale\n"
+" data from a Linux system using %s on %s. */\n"
+"\n"
+"struct lc_era_t\n"
+"{\n"
+" const char *locale;\n"
+" const wchar_t *date_fmt;\n"
+" const wchar_t *d_fmt;\n"
+" const wchar_t *d_t_fmt;\n"
+" const wchar_t *t_fmt;\n"
+" const wchar_t *t_fmt_ampm;\n"
+" const wchar_t *era;\n"
+" const wchar_t *era_d_fmt;\n"
+" const wchar_t *era_d_t_fmt;\n"
+" const wchar_t *era_t_fmt;\n"
+" const wchar_t *alt_digits;\n"
+"};\n"
+"\n"
+"static struct lc_era_t lc_era[] =\n"
+"{\n", vers, tstr);
+
+ qsort (era, ecnt, sizeof (struct lc_era_t), locale_cmp);
+ for (i = 0; i < ecnt; ++i)
+ fprintf (fp, " { \"%s\", L\"%s\", L\"%s\", L\"%s\", L\"%s\", L\"%s\", "
+ "L\"%s\", L\"%s\", L\"%s\", L\"%s\", L\"%s\" },\n",
+ era[i].locale, era[i].date_fmt,
+ era[i].d_fmt, era[i].d_t_fmt,
+ era[i].t_fmt, era[i].t_fmt_ampm,
+ era[i].era, era[i].era_d_fmt,
+ era[i].era_d_t_fmt, era[i].era_t_fmt,
+ era[i].alt_digits);
+ fputs ("};\n", fp);
+ fclose (fp);
+}
+
+int
+main ()
+{
+ char name[32], *c;
+ FILE *pp;
+
+ pp = popen ("locale -a | grep -a '_' | fgrep -v .", "r");
+ if (!pp)
+ {
+ perror ("popen failed");
+ return 1;
+ }
+ while (fgets (name, 32, pp))
+ {
+ c = strchr (name, '\n');
+ if (c)
+ *c = '\0';
+ read_locale_era (name);
+ }
+ pclose (pp);
+ create_list ();
+ return 0;
+}
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2023-02-20 22:01 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-02-20 22:01 [newlib-cygwin/main] Cygwin: linux-locale-helpers: helper tools to generate locale data from Linux Corinna Vinschen
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).