public inbox for cygwin-cvs@sourceware.org
help / color / mirror / Atom feed
* [newlib-cygwin/main] Cygwin: linux-locale-helpers: helper tools to generate locale data from Linux
@ 2023-02-20 22:01 Corinna Vinschen
  0 siblings, 0 replies; only message in thread
From: Corinna Vinschen @ 2023-02-20 22:01 UTC (permalink / raw)
  To: cygwin-cvs

https://sourceware.org/git/gitweb.cgi?p=newlib-cygwin.git;h=4ab778df242efdc364b9a42b225c071e0ecc3cb7

commit 4ab778df242efdc364b9a42b225c071e0ecc3cb7
Author:     Corinna Vinschen <corinna@vinschen.de>
AuthorDate: Mon Feb 20 23:00:04 2023 +0100
Commit:     Corinna Vinschen <corinna@vinschen.de>
CommitDate: Mon Feb 20 23:00:04 2023 +0100

    Cygwin: linux-locale-helpers: helper tools to generate locale data from Linux
    
    Signed-off-by: Corinna Vinschen <corinna@vinschen.de>

Diff:
---
 winsup/cygwin/linux-locale-helpers/README          |  10 +
 .../fetch-lc_collate-elements-from-glibc           |  61 ++++++
 .../fetch-lc_messages-from-linux.c                 | 169 +++++++++++++++
 .../fetch-lc_time_era-from-linux.c                 | 237 +++++++++++++++++++++
 4 files changed, 477 insertions(+)

diff --git a/winsup/cygwin/linux-locale-helpers/README b/winsup/cygwin/linux-locale-helpers/README
new file mode 100644
index 000000000000..2489416bc877
--- /dev/null
+++ b/winsup/cygwin/linux-locale-helpers/README
@@ -0,0 +1,10 @@
+These scripts and helper applications are used to create locale data
+required for complete locale support, but either missing in Windows
+or implemented in a non-POSIXy way.
+
+The script has to run from inside a glibc git clone.
+The C tools can be built without any special options.
+
+All three tools generate the new locale headers (lc_collelem.h,
+lc_era.h, lc_msg.h) in the current working directory.  They can just
+be copied to local_includes and commited without further changes.
diff --git a/winsup/cygwin/linux-locale-helpers/fetch-lc_collate-elements-from-glibc b/winsup/cygwin/linux-locale-helpers/fetch-lc_collate-elements-from-glibc
new file mode 100755
index 000000000000..a0ff0e62f15f
--- /dev/null
+++ b/winsup/cygwin/linux-locale-helpers/fetch-lc_collate-elements-from-glibc
@@ -0,0 +1,61 @@
+#!/bin/bash
+#
+# SPDX-License-Identifier: BSD-2-Clause
+#
+#  Assuming
+#
+#    git clone https://sourceware.org/git/glibc.git
+#    cd glibc
+#
+topdir="$(git rev-parse --show-toplevel)"
+if [ -z "${topdir}" ]
+then
+  echo "Not a git dir?  Exit."
+  exit 1
+fi
+cd "${topdir}"
+glibc_conf="$(grep 'GNU C Library' configure.ac)"
+if [ -z "${glibc_conf}" ]
+then
+  echo "No GLibc configure.ac?  Wrong git repo?  Exit."
+  exit 1
+fi
+if [ ! -f version.h ]
+then
+  echo "No version.h file?  Exit."
+  exit 1
+fi
+version=$(sed -n -e 's/#define VERSION "\(.*\)"/\1/p' version.h)
+if [ -z "${version}" ]
+then
+  echo "Malformed version.h file.  Exit."
+  exit 1
+fi
+if [ ! -d localedata/locales ]
+then
+  echo "No localedata/locales subdir.  Broken repo?  Exit."
+  exit 1
+fi
+(
+  cd localedata/locales
+  cat <<-EOF
+	/* This struct of collating elements data has been generated by fetching
+	   locale data from a GLibc ${version} source dir on $(date +%F). */
+	struct collating_element_t
+	{
+	  const char32_t *element;
+	  const char *locale;
+	};
+
+	collating_element_t collating_element[] =
+	{
+	EOF
+  grep -r collating-element * \
+  | sed -e 's#^\([^:]*\):collating-element[ \t]*\([^ \t]*\)[ \t]*from[ \t]*"\(.*\)".*$#  { U"\3", "\1" }, /* \2 */#
+	       s/<U\([[:xdigit:]]\{4\}\)>/\\U0000\1/g
+	       s/<U\([[:xdigit:]]\{5\}\)>/\\U000\1/g
+	       s/<U\([[:xdigit:]]\{6\}\)>/\\U00\1/g
+	       s/iso14651_t1_common//g' \
+  | sort
+  echo "};"
+) > lc_collelem.h
diff --git a/winsup/cygwin/linux-locale-helpers/fetch-lc_messages-from-linux.c b/winsup/cygwin/linux-locale-helpers/fetch-lc_messages-from-linux.c
new file mode 100644
index 000000000000..03755c6aa7d8
--- /dev/null
+++ b/winsup/cygwin/linux-locale-helpers/fetch-lc_messages-from-linux.c
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: BSD-2-Clause */
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <dirent.h>
+#include <time.h>
+#include <locale.h>
+#include <langinfo.h>
+#include <wchar.h>
+
+struct lc_msg_t {
+  char locale[64];
+  char yesstr[256];
+  char nostr[256];
+  char yesexpr[256];
+  char noexpr[256];
+} msg[512];
+int mcnt = 0;
+
+char *
+xfrm_utf (const wchar_t *ws)
+{
+  static char xfrm[256];
+  char *p = xfrm;
+  int wconst = 0;
+
+  while (*ws)
+    {
+      if (*ws < 0x80 && (!wconst || !wcschr (L"aAbBcCdDeEfF", *ws)))
+	{
+	  *p++ = *ws;
+	  wconst = 0;
+	}
+      else
+	{
+	  p += sprintf (p, "\\x%04lx", *ws);
+	  wconst = 1;
+	}
+      ++ws;
+    }
+  *p = '\0';
+  return xfrm;
+}
+
+void
+read_locale_messages (char *name)
+{
+  char *nl;
+  char locale[64];
+  wchar_t nlbuf[256];
+
+  strcpy (locale, name);
+  nl = strchr (locale, '@');
+  if (nl)
+    stpcpy (stpcpy (nl, ".utf8"), strchr (name, '@'));
+  else
+    strcat (locale, ".utf8");
+  printf ("%s\n", locale);
+  setlocale (LC_ALL, locale);
+
+  strcpy (msg[mcnt].locale, name);
+  nl = nl_langinfo (YESSTR);
+  mbstowcs (nlbuf, nl, 256);
+  strcpy (msg[mcnt].yesstr, xfrm_utf (nlbuf));
+  nl = nl_langinfo (NOSTR);
+  mbstowcs (nlbuf, nl, 256);
+  strcpy (msg[mcnt].nostr, xfrm_utf (nlbuf));
+  nl = nl_langinfo (YESEXPR);
+  mbstowcs (nlbuf, nl, 256);
+  strcpy (msg[mcnt].yesexpr, xfrm_utf (nlbuf));
+  nl = nl_langinfo (NOEXPR);
+  mbstowcs (nlbuf, nl, 256);
+  strcpy (msg[mcnt].noexpr, xfrm_utf (nlbuf));
+  /* Serbian locale rename weirdness */
+  if (!strncmp (msg[mcnt].locale, "sr_RS", 5))
+    {
+      /* Create additional equivalent entry for the old locale sr_SP. */
+      ++mcnt;
+      memcpy (&msg[mcnt], &msg[mcnt - 1], sizeof msg[mcnt]);
+      msg[mcnt].locale[3] = 'S';
+      msg[mcnt].locale[4] = 'P';
+      /* Create additional equivalent entry for sr_ME@latin missing in Linux. */
+      if (!strcmp (msg[mcnt].locale, "sr_SP@latin"))
+	{
+	  ++mcnt;
+	  memcpy (&msg[mcnt], &msg[mcnt - 1], sizeof msg[mcnt]);
+	  msg[mcnt].locale[3] = 'M';
+	  msg[mcnt].locale[4] = 'E';
+	}
+    }
+  ++mcnt;
+}
+
+int
+locale_cmp (const void *a, const void *b)
+{
+  struct lc_msg_t *la = (struct lc_msg_t *) a;
+  struct lc_msg_t *lb = (struct lc_msg_t *) b;
+  return strcmp (la->locale, lb->locale);
+}
+
+void
+create_list ()
+{
+  FILE *fp = fopen ("lc_msg.h", "w");
+  FILE *pp = popen ("rpm -q glibc", "r");
+  char vers[64];
+  int i;
+  struct tm *tm;
+  time_t tim;
+  char tstr[64];
+
+  fgets (vers, 64, pp);
+  pclose (pp);
+  if (strchr (vers, '\n'))
+    *strchr (vers, '\n') = '\0';
+  tim = time (NULL);
+  tm = gmtime (&tim);
+  strftime (tstr, 64, "%F", tm);
+  fprintf (fp,
+"/* This struct of LC_MESSAGES data has been generated by fetching locale\n"
+"   data from a Linux system using %s on %s. */\n"
+"\n"
+"struct lc_msg_t\n"
+"{\n"
+"  const char    *locale;\n"
+"  const wchar_t *yesexpr;\n"
+"  const wchar_t *noexpr;\n"
+"  const wchar_t *yesstr;\n"
+"  const wchar_t *nostr;\n"
+"};\n"
+"\n"
+"static struct lc_msg_t lc_msg[] =\n"
+"{\n", vers, tstr);
+
+  qsort (msg, mcnt, sizeof (struct lc_msg_t), locale_cmp);
+  for (i = 0; i < mcnt; ++i)
+    fprintf (fp, "  { \"%s\", L\"%s\", L\"%s\", L\"%s\", L\"%s\" },\n",
+		 msg[i].locale,
+		 msg[i].yesexpr, msg[i].noexpr,
+		 msg[i].yesstr, msg[i].nostr);
+  fputs ("};\n", fp);
+  fclose (fp);
+}
+
+int
+main ()
+{
+  char name[32], *c;
+  FILE *pp;
+  
+  pp = popen ("locale -a | grep -a '_' | fgrep -v .", "r");
+  if (!pp)
+    {
+      perror ("popen failed");
+      return 1;
+    }
+  while (fgets (name, 32, pp))
+    {
+      c = strchr (name, '\n');
+      if (c)
+      	*c = '\0';
+      read_locale_messages (name);
+    }
+  pclose (pp);
+  create_list ();
+  return 0;
+}
diff --git a/winsup/cygwin/linux-locale-helpers/fetch-lc_time_era-from-linux.c b/winsup/cygwin/linux-locale-helpers/fetch-lc_time_era-from-linux.c
new file mode 100644
index 000000000000..1ee75ca6ba71
--- /dev/null
+++ b/winsup/cygwin/linux-locale-helpers/fetch-lc_time_era-from-linux.c
@@ -0,0 +1,237 @@
+/* SPDX-License-Identifier: BSD-2-Clause */
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <dirent.h>
+#include <time.h>
+#include <locale.h>
+#include <langinfo.h>
+#include <wchar.h>
+
+struct lc_era_t {
+  char locale[64];
+  char *date_fmt;
+  char *d_fmt;
+  char *d_t_fmt;
+  char *t_fmt;
+  char *t_fmt_ampm;
+  char *era;
+  char *era_d_fmt;
+  char *era_d_t_fmt;
+  char *era_t_fmt;
+  char *alt_digits;
+} era[512];
+int ecnt = 0;
+
+char *
+xfrm_utf (const wchar_t *ws, int slist)
+{
+  static char xfrm[4096];
+  char *p = xfrm;
+  int wconst = 0;
+
+  while (*ws)
+    {
+      if (*ws < 0x80 && (!wconst || !wcschr (L"aAbBcCdDeEfF", *ws)))
+	{
+	  *p++ = *ws;
+	  wconst = 0;
+	}
+      else
+	{
+	  p += sprintf (p, "\\x%04lx", *ws);
+	  wconst = 1;
+	}
+      ++ws;
+      if (!*ws && slist)
+      	{
+	  ++ws;
+	  if (*ws)
+	    p += sprintf (p, ";");
+	}
+    }
+  *p = '\0';
+  return xfrm;
+}
+
+char *
+xfrm_slist (const char *slist)
+{
+  static wchar_t wxfrm[4096], *wp;
+  char *xfrm, *p, *ret;
+
+  wp = wxfrm;
+  while (*slist)
+    {
+      size_t len = mbstowcs (wp, slist, wxfrm + 4096 - wp) + 1;
+      slist += strlen (slist) + 1;
+      wp += len;
+    }
+  *wp++ = L'\0';
+  xfrm = xfrm_utf (wxfrm, 1);
+  p = xfrm;
+  while (*p)
+    p += strlen (p) + 1;
+  ++p;
+  ret = (char *) malloc (p - xfrm);
+  memcpy (ret, xfrm, p - xfrm);
+  return ret;
+}
+
+void
+read_locale_era (char *name)
+{
+  char *nl, *nlera, *altd;
+  char locale[64];
+  wchar_t nlbuf[256];
+
+  strcpy (locale, name);
+  nl = strchr (locale, '@');
+  if (nl)
+    stpcpy (stpcpy (nl, ".utf8"), strchr (name, '@'));
+  else
+    strcat (locale, ".utf8");
+  printf ("%s\n", locale);
+  setlocale (LC_ALL, locale);
+
+  nlera = nl_langinfo (ERA);
+  altd = nl_langinfo (ALT_DIGITS);
+
+  if (!*nlera && !*altd)
+    return;
+
+  strcpy (era[ecnt].locale, name);
+  nl = nl_langinfo (_DATE_FMT);
+  mbstowcs (nlbuf, nl, 256);
+  era[ecnt].date_fmt = strdup (xfrm_utf (nlbuf, 0));
+  nl = nl_langinfo (D_FMT);
+  mbstowcs (nlbuf, nl, 256);
+  era[ecnt].d_fmt = strdup (xfrm_utf (nlbuf, 0));
+  nl = nl_langinfo (D_T_FMT);
+  mbstowcs (nlbuf, nl, 256);
+  era[ecnt].d_t_fmt = strdup (xfrm_utf (nlbuf, 0));
+  nl = nl_langinfo (T_FMT);
+  mbstowcs (nlbuf, nl, 256);
+  era[ecnt].t_fmt = strdup (xfrm_utf (nlbuf, 0));
+  nl = nl_langinfo (T_FMT_AMPM);
+  mbstowcs (nlbuf, nl, 256);
+  era[ecnt].t_fmt_ampm = strdup (xfrm_utf (nlbuf, 0));
+
+  era[ecnt].era = *nlera ? xfrm_slist (nlera) : "\0";
+  era[ecnt].alt_digits = *altd ? xfrm_slist (altd) : "\0";
+
+  nl = nl_langinfo (ERA_D_FMT);
+  mbstowcs (nlbuf, nl, 256);
+  era[ecnt].era_d_fmt = strdup (xfrm_utf (nlbuf, 0));
+  nl = nl_langinfo (ERA_D_T_FMT);
+  mbstowcs (nlbuf, nl, 256);
+  era[ecnt].era_d_t_fmt = strdup (xfrm_utf (nlbuf, 0));
+  nl = nl_langinfo (ERA_T_FMT);
+  mbstowcs (nlbuf, nl, 256);
+  era[ecnt].era_t_fmt = strdup (xfrm_utf (nlbuf, 0));
+  /* Serbian locale rename weirdness */
+  if (!strncmp (era[ecnt].locale, "sr_RS", 5))
+    {
+      /* Create additional equivalent entries for the old locale sr_SP. */
+      ++ecnt;
+      memcpy (&era[ecnt], &era[ecnt - 1], sizeof era[ecnt]);
+      era[ecnt].locale[3] = 'S';
+      era[ecnt].locale[4] = 'P';
+      /* Create additional equivalent entry for sr_ME@latin missing in Linux. */
+      if (!strcmp (era[ecnt].locale, "sr_SP@latin"))
+	{
+	  ++ecnt;
+	  memcpy (&era[ecnt], &era[ecnt - 1], sizeof era[ecnt]);
+	  era[ecnt].locale[3] = 'M';
+	  era[ecnt].locale[4] = 'E';
+	}
+    }
+  ++ecnt;
+}
+
+int
+locale_cmp (const void *a, const void *b)
+{
+  struct lc_era_t *la = (struct lc_era_t *) a;
+  struct lc_era_t *lb = (struct lc_era_t *) b;
+  return strcmp (la->locale, lb->locale);
+}
+
+void
+create_list ()
+{
+  FILE *fp = fopen ("lc_era.h", "w");
+  FILE *pp = popen ("rpm -q glibc", "r");
+  char vers[64];
+  int i;
+  struct tm *tm;
+  time_t tim;
+  char tstr[64];
+
+  fgets (vers, 64, pp);
+  pclose (pp);
+  if (strchr (vers, '\n'))
+    *strchr (vers, '\n') = '\0';
+  tim = time (NULL);
+  tm = gmtime (&tim);
+  strftime (tstr, 64, "%F", tm);
+  fprintf (fp,
+"/* This struct of LC_TIME ERA data has been generated by fetching locale\n"
+"   data from a Linux system using %s on %s. */\n"
+"\n"
+"struct lc_era_t\n"
+"{\n"
+"  const char    *locale;\n"
+"  const wchar_t *date_fmt;\n"
+"  const wchar_t *d_fmt;\n"
+"  const wchar_t *d_t_fmt;\n"
+"  const wchar_t *t_fmt;\n"
+"  const wchar_t *t_fmt_ampm;\n"
+"  const wchar_t *era;\n"
+"  const wchar_t *era_d_fmt;\n"
+"  const wchar_t *era_d_t_fmt;\n"
+"  const wchar_t *era_t_fmt;\n"
+"  const wchar_t *alt_digits;\n"
+"};\n"
+"\n"
+"static struct lc_era_t lc_era[] =\n"
+"{\n", vers, tstr);
+
+  qsort (era, ecnt, sizeof (struct lc_era_t), locale_cmp);
+  for (i = 0; i < ecnt; ++i)
+    fprintf (fp, "  { \"%s\", L\"%s\", L\"%s\", L\"%s\", L\"%s\", L\"%s\", "
+		     "L\"%s\", L\"%s\", L\"%s\", L\"%s\", L\"%s\" },\n",
+		 era[i].locale, era[i].date_fmt,
+		 era[i].d_fmt, era[i].d_t_fmt,
+		 era[i].t_fmt, era[i].t_fmt_ampm,
+		 era[i].era, era[i].era_d_fmt,
+		 era[i].era_d_t_fmt, era[i].era_t_fmt,
+		 era[i].alt_digits);
+  fputs ("};\n", fp);
+  fclose (fp);
+}
+
+int
+main ()
+{
+  char name[32], *c;
+  FILE *pp;
+
+  pp = popen ("locale -a | grep -a '_' | fgrep -v .", "r");
+  if (!pp)
+    {
+      perror ("popen failed");
+      return 1;
+    }
+  while (fgets (name, 32, pp))
+    {
+      c = strchr (name, '\n');
+      if (c)
+	*c = '\0';
+      read_locale_era (name);
+    }
+  pclose (pp);
+  create_list ();
+  return 0;
+}

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-02-20 22:01 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-02-20 22:01 [newlib-cygwin/main] Cygwin: linux-locale-helpers: helper tools to generate locale data from Linux Corinna Vinschen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).