From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2155) id 289A43858C2B; Mon, 20 Feb 2023 22:01:19 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 289A43858C2B DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=sourceware.org; s=default; t=1676930479; bh=fWUhnTUrLwtQu0/i86gcS7hcl3sYJdjzKEczyj0Ka2I=; h=From:To:Subject:Date:From; b=j1NywN+FTRXRzAin2Nlj8RqzXolnSl8yJmaNiOxPBqNynriS8ns9EU5fwFDulr2M/ N0BIHSWx9W0IP3Bgm0RW3TsQSsC9V7iqFhs9Dr9S3C8ohoLn5pZkBwoFYH/JsKmKw6 9BgSncqUEkysRc5N0G/Kq+hFJh4Ll7uQD6UotqLM= Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable From: Corinna Vinschen To: cygwin-cvs@sourceware.org Subject: [newlib-cygwin/main] Cygwin: linux-locale-helpers: helper tools to generate locale data from Linux X-Act-Checkin: newlib-cygwin X-Git-Author: Corinna Vinschen X-Git-Refname: refs/heads/main X-Git-Oldrev: ce5aa098071304cfd3bd1bd535a7571089344b1a X-Git-Newrev: 4ab778df242efdc364b9a42b225c071e0ecc3cb7 Message-Id: <20230220220119.289A43858C2B@sourceware.org> Date: Mon, 20 Feb 2023 22:01:19 +0000 (GMT) List-Id: https://sourceware.org/git/gitweb.cgi?p=3Dnewlib-cygwin.git;h=3D4ab778df242= efdc364b9a42b225c071e0ecc3cb7 commit 4ab778df242efdc364b9a42b225c071e0ecc3cb7 Author: Corinna Vinschen AuthorDate: Mon Feb 20 23:00:04 2023 +0100 Commit: Corinna Vinschen CommitDate: Mon Feb 20 23:00:04 2023 +0100 Cygwin: linux-locale-helpers: helper tools to generate locale data from= Linux =20 Signed-off-by: Corinna Vinschen Diff: --- winsup/cygwin/linux-locale-helpers/README | 10 + .../fetch-lc_collate-elements-from-glibc | 61 ++++++ .../fetch-lc_messages-from-linux.c | 169 +++++++++++++++ .../fetch-lc_time_era-from-linux.c | 237 +++++++++++++++++= ++++ 4 files changed, 477 insertions(+) diff --git a/winsup/cygwin/linux-locale-helpers/README b/winsup/cygwin/linu= x-locale-helpers/README new file mode 100644 index 000000000000..2489416bc877 --- /dev/null +++ b/winsup/cygwin/linux-locale-helpers/README @@ -0,0 +1,10 @@ +These scripts and helper applications are used to create locale data +required for complete locale support, but either missing in Windows +or implemented in a non-POSIXy way. + +The script has to run from inside a glibc git clone. +The C tools can be built without any special options. + +All three tools generate the new locale headers (lc_collelem.h, +lc_era.h, lc_msg.h) in the current working directory. They can just +be copied to local_includes and commited without further changes. diff --git a/winsup/cygwin/linux-locale-helpers/fetch-lc_collate-elements-f= rom-glibc b/winsup/cygwin/linux-locale-helpers/fetch-lc_collate-elements-fr= om-glibc new file mode 100755 index 000000000000..a0ff0e62f15f --- /dev/null +++ b/winsup/cygwin/linux-locale-helpers/fetch-lc_collate-elements-from-gli= bc @@ -0,0 +1,61 @@ +#!/bin/bash +# +# SPDX-License-Identifier: BSD-2-Clause +# +# Assuming +# +# git clone https://sourceware.org/git/glibc.git +# cd glibc +# +topdir=3D"$(git rev-parse --show-toplevel)" +if [ -z "${topdir}" ] +then + echo "Not a git dir? Exit." + exit 1 +fi +cd "${topdir}" +glibc_conf=3D"$(grep 'GNU C Library' configure.ac)" +if [ -z "${glibc_conf}" ] +then + echo "No GLibc configure.ac? Wrong git repo? Exit." + exit 1 +fi +if [ ! -f version.h ] +then + echo "No version.h file? Exit." + exit 1 +fi +version=3D$(sed -n -e 's/#define VERSION "\(.*\)"/\1/p' version.h) +if [ -z "${version}" ] +then + echo "Malformed version.h file. Exit." + exit 1 +fi +if [ ! -d localedata/locales ] +then + echo "No localedata/locales subdir. Broken repo? Exit." + exit 1 +fi +( + cd localedata/locales + cat <<-EOF + /* This struct of collating elements data has been generated by fetching + locale data from a GLibc ${version} source dir on $(date +%F). */ + struct collating_element_t + { + const char32_t *element; + const char *locale; + }; + + collating_element_t collating_element[] =3D + { + EOF + grep -r collating-element * \ + | sed -e 's#^\([^:]*\):collating-element[ \t]*\([^ \t]*\)[ \t]*from[ \t]= *"\(.*\)".*$# { U"\3", "\1" }, /* \2 */# + s//\\U0000\1/g + s//\\U000\1/g + s//\\U00\1/g + s/iso14651_t1_common//g' \ + | sort + echo "};" +) > lc_collelem.h diff --git a/winsup/cygwin/linux-locale-helpers/fetch-lc_messages-from-linu= x.c b/winsup/cygwin/linux-locale-helpers/fetch-lc_messages-from-linux.c new file mode 100644 index 000000000000..03755c6aa7d8 --- /dev/null +++ b/winsup/cygwin/linux-locale-helpers/fetch-lc_messages-from-linux.c @@ -0,0 +1,169 @@ +/* SPDX-License-Identifier: BSD-2-Clause */ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include + +struct lc_msg_t { + char locale[64]; + char yesstr[256]; + char nostr[256]; + char yesexpr[256]; + char noexpr[256]; +} msg[512]; +int mcnt =3D 0; + +char * +xfrm_utf (const wchar_t *ws) +{ + static char xfrm[256]; + char *p =3D xfrm; + int wconst =3D 0; + + while (*ws) + { + if (*ws < 0x80 && (!wconst || !wcschr (L"aAbBcCdDeEfF", *ws))) + { + *p++ =3D *ws; + wconst =3D 0; + } + else + { + p +=3D sprintf (p, "\\x%04lx", *ws); + wconst =3D 1; + } + ++ws; + } + *p =3D '\0'; + return xfrm; +} + +void +read_locale_messages (char *name) +{ + char *nl; + char locale[64]; + wchar_t nlbuf[256]; + + strcpy (locale, name); + nl =3D strchr (locale, '@'); + if (nl) + stpcpy (stpcpy (nl, ".utf8"), strchr (name, '@')); + else + strcat (locale, ".utf8"); + printf ("%s\n", locale); + setlocale (LC_ALL, locale); + + strcpy (msg[mcnt].locale, name); + nl =3D nl_langinfo (YESSTR); + mbstowcs (nlbuf, nl, 256); + strcpy (msg[mcnt].yesstr, xfrm_utf (nlbuf)); + nl =3D nl_langinfo (NOSTR); + mbstowcs (nlbuf, nl, 256); + strcpy (msg[mcnt].nostr, xfrm_utf (nlbuf)); + nl =3D nl_langinfo (YESEXPR); + mbstowcs (nlbuf, nl, 256); + strcpy (msg[mcnt].yesexpr, xfrm_utf (nlbuf)); + nl =3D nl_langinfo (NOEXPR); + mbstowcs (nlbuf, nl, 256); + strcpy (msg[mcnt].noexpr, xfrm_utf (nlbuf)); + /* Serbian locale rename weirdness */ + if (!strncmp (msg[mcnt].locale, "sr_RS", 5)) + { + /* Create additional equivalent entry for the old locale sr_SP. */ + ++mcnt; + memcpy (&msg[mcnt], &msg[mcnt - 1], sizeof msg[mcnt]); + msg[mcnt].locale[3] =3D 'S'; + msg[mcnt].locale[4] =3D 'P'; + /* Create additional equivalent entry for sr_ME@latin missing in Lin= ux. */ + if (!strcmp (msg[mcnt].locale, "sr_SP@latin")) + { + ++mcnt; + memcpy (&msg[mcnt], &msg[mcnt - 1], sizeof msg[mcnt]); + msg[mcnt].locale[3] =3D 'M'; + msg[mcnt].locale[4] =3D 'E'; + } + } + ++mcnt; +} + +int +locale_cmp (const void *a, const void *b) +{ + struct lc_msg_t *la =3D (struct lc_msg_t *) a; + struct lc_msg_t *lb =3D (struct lc_msg_t *) b; + return strcmp (la->locale, lb->locale); +} + +void +create_list () +{ + FILE *fp =3D fopen ("lc_msg.h", "w"); + FILE *pp =3D popen ("rpm -q glibc", "r"); + char vers[64]; + int i; + struct tm *tm; + time_t tim; + char tstr[64]; + + fgets (vers, 64, pp); + pclose (pp); + if (strchr (vers, '\n')) + *strchr (vers, '\n') =3D '\0'; + tim =3D time (NULL); + tm =3D gmtime (&tim); + strftime (tstr, 64, "%F", tm); + fprintf (fp, +"/* This struct of LC_MESSAGES data has been generated by fetching locale\= n" +" data from a Linux system using %s on %s. */\n" +"\n" +"struct lc_msg_t\n" +"{\n" +" const char *locale;\n" +" const wchar_t *yesexpr;\n" +" const wchar_t *noexpr;\n" +" const wchar_t *yesstr;\n" +" const wchar_t *nostr;\n" +"};\n" +"\n" +"static struct lc_msg_t lc_msg[] =3D\n" +"{\n", vers, tstr); + + qsort (msg, mcnt, sizeof (struct lc_msg_t), locale_cmp); + for (i =3D 0; i < mcnt; ++i) + fprintf (fp, " { \"%s\", L\"%s\", L\"%s\", L\"%s\", L\"%s\" },\n", + msg[i].locale, + msg[i].yesexpr, msg[i].noexpr, + msg[i].yesstr, msg[i].nostr); + fputs ("};\n", fp); + fclose (fp); +} + +int +main () +{ + char name[32], *c; + FILE *pp; + =20 + pp =3D popen ("locale -a | grep -a '_' | fgrep -v .", "r"); + if (!pp) + { + perror ("popen failed"); + return 1; + } + while (fgets (name, 32, pp)) + { + c =3D strchr (name, '\n'); + if (c) + *c =3D '\0'; + read_locale_messages (name); + } + pclose (pp); + create_list (); + return 0; +} diff --git a/winsup/cygwin/linux-locale-helpers/fetch-lc_time_era-from-linu= x.c b/winsup/cygwin/linux-locale-helpers/fetch-lc_time_era-from-linux.c new file mode 100644 index 000000000000..1ee75ca6ba71 --- /dev/null +++ b/winsup/cygwin/linux-locale-helpers/fetch-lc_time_era-from-linux.c @@ -0,0 +1,237 @@ +/* SPDX-License-Identifier: BSD-2-Clause */ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include + +struct lc_era_t { + char locale[64]; + char *date_fmt; + char *d_fmt; + char *d_t_fmt; + char *t_fmt; + char *t_fmt_ampm; + char *era; + char *era_d_fmt; + char *era_d_t_fmt; + char *era_t_fmt; + char *alt_digits; +} era[512]; +int ecnt =3D 0; + +char * +xfrm_utf (const wchar_t *ws, int slist) +{ + static char xfrm[4096]; + char *p =3D xfrm; + int wconst =3D 0; + + while (*ws) + { + if (*ws < 0x80 && (!wconst || !wcschr (L"aAbBcCdDeEfF", *ws))) + { + *p++ =3D *ws; + wconst =3D 0; + } + else + { + p +=3D sprintf (p, "\\x%04lx", *ws); + wconst =3D 1; + } + ++ws; + if (!*ws && slist) + { + ++ws; + if (*ws) + p +=3D sprintf (p, ";"); + } + } + *p =3D '\0'; + return xfrm; +} + +char * +xfrm_slist (const char *slist) +{ + static wchar_t wxfrm[4096], *wp; + char *xfrm, *p, *ret; + + wp =3D wxfrm; + while (*slist) + { + size_t len =3D mbstowcs (wp, slist, wxfrm + 4096 - wp) + 1; + slist +=3D strlen (slist) + 1; + wp +=3D len; + } + *wp++ =3D L'\0'; + xfrm =3D xfrm_utf (wxfrm, 1); + p =3D xfrm; + while (*p) + p +=3D strlen (p) + 1; + ++p; + ret =3D (char *) malloc (p - xfrm); + memcpy (ret, xfrm, p - xfrm); + return ret; +} + +void +read_locale_era (char *name) +{ + char *nl, *nlera, *altd; + char locale[64]; + wchar_t nlbuf[256]; + + strcpy (locale, name); + nl =3D strchr (locale, '@'); + if (nl) + stpcpy (stpcpy (nl, ".utf8"), strchr (name, '@')); + else + strcat (locale, ".utf8"); + printf ("%s\n", locale); + setlocale (LC_ALL, locale); + + nlera =3D nl_langinfo (ERA); + altd =3D nl_langinfo (ALT_DIGITS); + + if (!*nlera && !*altd) + return; + + strcpy (era[ecnt].locale, name); + nl =3D nl_langinfo (_DATE_FMT); + mbstowcs (nlbuf, nl, 256); + era[ecnt].date_fmt =3D strdup (xfrm_utf (nlbuf, 0)); + nl =3D nl_langinfo (D_FMT); + mbstowcs (nlbuf, nl, 256); + era[ecnt].d_fmt =3D strdup (xfrm_utf (nlbuf, 0)); + nl =3D nl_langinfo (D_T_FMT); + mbstowcs (nlbuf, nl, 256); + era[ecnt].d_t_fmt =3D strdup (xfrm_utf (nlbuf, 0)); + nl =3D nl_langinfo (T_FMT); + mbstowcs (nlbuf, nl, 256); + era[ecnt].t_fmt =3D strdup (xfrm_utf (nlbuf, 0)); + nl =3D nl_langinfo (T_FMT_AMPM); + mbstowcs (nlbuf, nl, 256); + era[ecnt].t_fmt_ampm =3D strdup (xfrm_utf (nlbuf, 0)); + + era[ecnt].era =3D *nlera ? xfrm_slist (nlera) : "\0"; + era[ecnt].alt_digits =3D *altd ? xfrm_slist (altd) : "\0"; + + nl =3D nl_langinfo (ERA_D_FMT); + mbstowcs (nlbuf, nl, 256); + era[ecnt].era_d_fmt =3D strdup (xfrm_utf (nlbuf, 0)); + nl =3D nl_langinfo (ERA_D_T_FMT); + mbstowcs (nlbuf, nl, 256); + era[ecnt].era_d_t_fmt =3D strdup (xfrm_utf (nlbuf, 0)); + nl =3D nl_langinfo (ERA_T_FMT); + mbstowcs (nlbuf, nl, 256); + era[ecnt].era_t_fmt =3D strdup (xfrm_utf (nlbuf, 0)); + /* Serbian locale rename weirdness */ + if (!strncmp (era[ecnt].locale, "sr_RS", 5)) + { + /* Create additional equivalent entries for the old locale sr_SP. */ + ++ecnt; + memcpy (&era[ecnt], &era[ecnt - 1], sizeof era[ecnt]); + era[ecnt].locale[3] =3D 'S'; + era[ecnt].locale[4] =3D 'P'; + /* Create additional equivalent entry for sr_ME@latin missing in Lin= ux. */ + if (!strcmp (era[ecnt].locale, "sr_SP@latin")) + { + ++ecnt; + memcpy (&era[ecnt], &era[ecnt - 1], sizeof era[ecnt]); + era[ecnt].locale[3] =3D 'M'; + era[ecnt].locale[4] =3D 'E'; + } + } + ++ecnt; +} + +int +locale_cmp (const void *a, const void *b) +{ + struct lc_era_t *la =3D (struct lc_era_t *) a; + struct lc_era_t *lb =3D (struct lc_era_t *) b; + return strcmp (la->locale, lb->locale); +} + +void +create_list () +{ + FILE *fp =3D fopen ("lc_era.h", "w"); + FILE *pp =3D popen ("rpm -q glibc", "r"); + char vers[64]; + int i; + struct tm *tm; + time_t tim; + char tstr[64]; + + fgets (vers, 64, pp); + pclose (pp); + if (strchr (vers, '\n')) + *strchr (vers, '\n') =3D '\0'; + tim =3D time (NULL); + tm =3D gmtime (&tim); + strftime (tstr, 64, "%F", tm); + fprintf (fp, +"/* This struct of LC_TIME ERA data has been generated by fetching locale\= n" +" data from a Linux system using %s on %s. */\n" +"\n" +"struct lc_era_t\n" +"{\n" +" const char *locale;\n" +" const wchar_t *date_fmt;\n" +" const wchar_t *d_fmt;\n" +" const wchar_t *d_t_fmt;\n" +" const wchar_t *t_fmt;\n" +" const wchar_t *t_fmt_ampm;\n" +" const wchar_t *era;\n" +" const wchar_t *era_d_fmt;\n" +" const wchar_t *era_d_t_fmt;\n" +" const wchar_t *era_t_fmt;\n" +" const wchar_t *alt_digits;\n" +"};\n" +"\n" +"static struct lc_era_t lc_era[] =3D\n" +"{\n", vers, tstr); + + qsort (era, ecnt, sizeof (struct lc_era_t), locale_cmp); + for (i =3D 0; i < ecnt; ++i) + fprintf (fp, " { \"%s\", L\"%s\", L\"%s\", L\"%s\", L\"%s\", L\"%s\",= " + "L\"%s\", L\"%s\", L\"%s\", L\"%s\", L\"%s\" },\n", + era[i].locale, era[i].date_fmt, + era[i].d_fmt, era[i].d_t_fmt, + era[i].t_fmt, era[i].t_fmt_ampm, + era[i].era, era[i].era_d_fmt, + era[i].era_d_t_fmt, era[i].era_t_fmt, + era[i].alt_digits); + fputs ("};\n", fp); + fclose (fp); +} + +int +main () +{ + char name[32], *c; + FILE *pp; + + pp =3D popen ("locale -a | grep -a '_' | fgrep -v .", "r"); + if (!pp) + { + perror ("popen failed"); + return 1; + } + while (fgets (name, 32, pp)) + { + c =3D strchr (name, '\n'); + if (c) + *c =3D '\0'; + read_locale_era (name); + } + pclose (pp); + create_list (); + return 0; +}