public inbox for cygwin-cvs@sourceware.org
help / color / mirror / Atom feed
* [newlib-cygwin/main] Cygwin: introduce /proc/codesets and /proc/locales
@ 2023-02-26 16:18 Corinna Vinschen
  0 siblings, 0 replies; only message in thread
From: Corinna Vinschen @ 2023-02-26 16:18 UTC (permalink / raw)
  To: cygwin-cvs

https://sourceware.org/git/gitweb.cgi?p=newlib-cygwin.git;h=c42b98bdc665fc108efde0a7979fe4c36e8f5f60

commit c42b98bdc665fc108efde0a7979fe4c36e8f5f60
Author:     Corinna Vinschen <corinna@vinschen.de>
AuthorDate: Sun Feb 26 17:04:03 2023 +0100
Commit:     Corinna Vinschen <corinna@vinschen.de>
CommitDate: Sun Feb 26 17:07:06 2023 +0100

    Cygwin: introduce /proc/codesets and /proc/locales
    
    So far locale(1) had to have knowledge how to construct, thus
    duplicating the effort how Cygwin handles locale strings.
    
    Move locale list and codeset list generation into Cygwin by
    providing /proc/codesets and /proc/locales files.  /proc/locales
    does not list aliases, those are still handled in locale(1).
    
    locale(1) opens the files and ueses that info for printing,
    like any other application can do now.
    
    Signed-off-by: Corinna Vinschen <corinna@vinschen.de>

Diff:
---
 winsup/cygwin/fhandler/proc.cc | 283 ++++++++++++++++++++++++++++++++++++
 winsup/cygwin/release/3.5.0    |   5 +
 winsup/doc/new-features.xml    |   7 +
 winsup/utils/locale.cc         | 317 ++++++++++++-----------------------------
 4 files changed, 387 insertions(+), 225 deletions(-)

diff --git a/winsup/cygwin/fhandler/proc.cc b/winsup/cygwin/fhandler/proc.cc
index 6a9139861244..74e5d95011d1 100644
--- a/winsup/cygwin/fhandler/proc.cc
+++ b/winsup/cygwin/fhandler/proc.cc
@@ -11,6 +11,7 @@ details. */
 #include <unistd.h>
 #include <stdlib.h>
 #include <stdio.h>
+#include <langinfo.h>
 #include "cygerrno.h"
 #include "security.h"
 #include "path.h"
@@ -49,16 +50,20 @@ static off_t format_proc_filesystems (void *, char *&);
 static off_t format_proc_swaps (void *, char *&);
 static off_t format_proc_devices (void *, char *&);
 static off_t format_proc_misc (void *, char *&);
+static off_t format_proc_locales (void *, char *&);
+static off_t format_proc_codesets (void *, char *&);
 
 /* names of objects in /proc */
 static const virt_tab_t proc_tab[] = {
   { _VN ("."),		 FH_PROC,	virt_directory,	NULL },
   { _VN (".."),		 FH_PROC,	virt_directory,	NULL },
+  { _VN ("codesets"),	 FH_PROC,	virt_file,	format_proc_codesets },
   { _VN ("cpuinfo"),	 FH_PROC,	virt_file,	format_proc_cpuinfo },
   { _VN ("cygdrive"),	 FH_PROC,	virt_symlink,	format_proc_cygdrive },
   { _VN ("devices"),	 FH_PROC,	virt_file,	format_proc_devices },
   { _VN ("filesystems"), FH_PROC,	virt_file,	format_proc_filesystems },
   { _VN ("loadavg"),	 FH_PROC,	virt_file,	format_proc_loadavg },
+  { _VN ("locales"),	 FH_PROC,	virt_file,	format_proc_locales },
   { _VN ("meminfo"),	 FH_PROC,	virt_file,	format_proc_meminfo },
   { _VN ("misc"),	 FH_PROC,	virt_file,	format_proc_misc },
   { _VN ("mounts"),	 FH_PROC,	virt_symlink,	format_proc_mounts },
@@ -2071,4 +2076,282 @@ format_proc_misc (void *, char *&destbuf)
   return bufptr - buf;
 }
 
+static char *
+add_locale (char *bufptr, const char *posix_locale, const char *codeset,
+	    bool explicit_utf8, const char *modifier, const wchar_t *win_locale)
+{
+  const char *start = bufptr;
+  bufptr = stpcpy (bufptr, posix_locale);
+  if (explicit_utf8)
+    bufptr = stpcpy (bufptr, ".utf8");
+  if (modifier && modifier[0])
+    bufptr = stpcpy (bufptr, modifier);
+  if (bufptr - start < 16)
+    {
+      if (bufptr - start < 8)
+	bufptr = stpcpy (bufptr, "\t");
+      bufptr = stpcpy (bufptr, "\t");
+    }
+  bufptr = stpcpy (bufptr, "\t");
+  start = bufptr;
+  bufptr = stpcpy (bufptr, codeset);
+  if (win_locale && win_locale[0])
+    {
+      if (bufptr - start < 16)
+	{
+	  if (bufptr - start < 8)
+	    bufptr = stpcpy (bufptr, "\t");
+	  bufptr = stpcpy (bufptr, "\t");
+	}
+      bufptr = stpcpy (bufptr, "\t");
+      bufptr += wcstombs (bufptr, win_locale, wcslen (win_locale) * 2);
+    }
+  bufptr = stpcpy (bufptr, "\n");
+  return bufptr;
+}
+
+static BOOL
+format_proc_locale_proc (LPWSTR win_locale, DWORD info, LPARAM param)
+{
+  char **bufptr_p = (char **) param;
+  wchar_t iso15924_postfix[32] = { 0 };
+  wchar_t iso15924[32] = { 0 };
+  wchar_t iso3166[32] = { 0 };
+  wchar_t iso639[32] = { 0 };
+  wchar_t currency[9] = { 0 };
+  char modifier[32] = { 0 };
+  char posix_loc[32];
+  char *codeset;
+  locale_t loc;
+  wchar_t *cp;
+
+  /* Skip language-only locales, e. g. "en" */
+  if (!(cp = wcschr (win_locale, L'-')))
+    return TRUE;
+  ++cp;
+  /* Script inside?  Scripts are Upper/Lower, e. g. "Latn" */
+  if (iswupper (cp[0]) && iswlower (cp[1]))
+    {
+      wchar_t *cp2;
+
+      /* Skip language-Script locales, missing country  */
+      if (!(cp2 = wcschr (cp + 2, L'-')))
+        return TRUE;
+      /* Otherwise, store in iso15924 */
+      if (iso15924)
+        wcpcpy (wcpncpy (iso15924, cp, cp2 - cp), L";");
+    }
+  cp = wcsrchr (win_locale, L'-');
+  if (cp)
+    {
+      /* Skip numeric iso3166 country name. */
+      if (iswdigit (cp[1]))
+        return TRUE;
+      /* Special case postfix after iso3166 country name: ca-ES-valencia.
+         Use the postfix thingy as script so it will become a @modifier */
+      if (iswlower (cp[1]))
+        wcpcpy (iso15924_postfix, cp + 1);
+    }
+
+  if (!GetLocaleInfoEx (win_locale, LOCALE_SISO639LANGNAME, iso639, 10))
+    return TRUE;
+  GetLocaleInfoEx (win_locale, LOCALE_SISO3166CTRYNAME, iso3166, 10);
+
+  snprintf (posix_loc, sizeof posix_loc, "%.3ls_%.3ls", iso639, iso3166);
+  /* Inuktitut: equivalent @latin due to lack of info on Linux */
+  if (!wcscmp (iso639, L"iu"))
+    {
+      if (wcscmp (iso15924, L"Latn;"))
+	return TRUE;
+    }
+  /* Javanese: only use @latin locale. */
+  else if (!wcscmp (iso639, L"jv"))
+    {
+      if (wcscmp (iso15924, L"Latn;"))
+	return TRUE;
+    }
+  /* Mongolian: only use @mongolian locale. */
+  else if (!wcscmp (iso639, L"mn"))
+    {
+      if (wcscmp (iso15924, L"Mong;"))
+	return TRUE;
+    }
+  /* Serbian: Windows default is Latin, Linux default is Cyrillic.
+     We want the Linux default and attach @latin otherwise */
+  else if (!wcscmp (iso639, L"sr")  && !wcscmp (iso15924, L"Latn;"))
+    stpcpy (modifier, "@latin");
+  /* Tamazight: no modifier, iso639 is "ber" on Linux.
+     "zgh-Tfng-MA" is equivalent to "ber_MA". */
+  else if (!wcscmp (iso639, L"zgh"))
+    snprintf (posix_loc, sizeof posix_loc, "ber_%.27ls", iso3166);
+  /* Tamazight: "tzm-Latn-DZ" is equivalent to "ber_DZ",
+		skip everything else. */
+  else if (!wcscmp (iso639, L"tzm"))
+    {
+      if (!wcscmp (iso3166, L"DZ") && !wcscmp (iso15924, L"Latn;"))
+	snprintf (posix_loc, sizeof posix_loc, "ber_%.27ls", iso3166);
+      else
+	return TRUE;
+    }
+  /* In all other cases, we check if the script from the Windows
+     locale is the default locale in that language.  If not, we
+     add it as modifier if possible, or skip it */
+  else if (iso15924[0])
+    {
+      wchar_t scriptless_win_locale[32];
+      wchar_t default_iso15924[32];
+
+      wcpcpy (wcpcpy (wcpcpy (scriptless_win_locale, iso639), L"-"),
+	      iso3166);
+      if ((GetLocaleInfoEx (scriptless_win_locale, LOCALE_SSCRIPTS,
+			    default_iso15924, 32)
+	   || GetLocaleInfoEx (iso639, LOCALE_SSCRIPTS,
+			       default_iso15924, 32))
+	  && !wcsstr (default_iso15924, iso15924))
+	{
+	  if (!wcscmp (iso15924, L"Latn;"))
+	    stpcpy (modifier, "@latin");
+	  else if (!wcscmp (iso15924, L"Cyrl;"))
+	    stpcpy (modifier, "@cyrillic");
+	  else if (!wcscmp (iso15924, L"Deva;"))
+	    stpcpy (modifier, "@devanagar");
+	  else if (!wcscmp (iso15924, L"Adlm;"))
+	    stpcpy (modifier, "@adlam");
+	  else
+	    return TRUE;
+	}
+    }
+  else if (iso15924_postfix[0])
+    {
+      modifier[0] = '@';
+      wcstombs (modifier + 1, iso15924_postfix, 31);
+    }
+
+  loc = newlocale (LC_CTYPE_MASK, posix_loc, (locale_t) 0);
+  codeset = nl_langinfo_l (CODESET, loc);
+  *bufptr_p = add_locale (*bufptr_p, posix_loc, codeset, false, modifier,
+			  win_locale);
+  *bufptr_p = add_locale (*bufptr_p, posix_loc, "UTF-8", true, modifier,
+			  win_locale);
+
+  /* Only one cross each */
+  if (modifier[0])
+    return TRUE;
+
+  /* Check for locales sporting an additional modifier for
+     changing the codeset and other stuff. */
+  if (!wcscmp (iso639, L"be") && !wcscmp (iso3166, L"BY"))
+    stpcpy (modifier, "@latin");
+  if (!wcscmp (iso639, L"tt") && !wcscmp (iso3166, L"RU"))
+    stpcpy (modifier, "@iqtelif");
+   /* If the base locale is ISO-8859-1 and the locale defines currency
+      as EUR, add a @euro locale. For historical reasons there's also
+      a greek @euro locale, albeit it doesn't change the codeset. */
+  else if ((!strcmp (codeset, "ISO-8859-1")
+	    || !strcmp (posix_loc, "el_GR"))
+	   && GetLocaleInfoEx (win_locale, LOCALE_SINTLSYMBOL, currency, 9)
+	   && !wcsncmp (currency, L"EUR", 3))
+    stpcpy (modifier, "@euro");
+  else if (!wcscmp (iso639, L"ja")
+	   || !wcscmp (iso639, L"ko")
+	   || !wcscmp (iso639, L"zh"))
+    stpcpy (modifier, "@cjknarrow");
+  else
+    return TRUE;
+
+  *bufptr_p = add_locale (*bufptr_p, posix_loc, codeset, false, modifier,
+			  win_locale);
+  *bufptr_p = add_locale (*bufptr_p, posix_loc, "UTF-8", true, modifier,
+			  win_locale);
+
+  freelocale (loc);
+  return TRUE;
+}
+
+static off_t
+format_proc_locales (void *, char *&destbuf)
+{
+  tmp_pathbuf tp;
+  char *buf = tp.t_get ();
+  char *bufptr = buf;
+
+  bufptr = stpcpy (bufptr, "Locale:\t\t\tCodeset:\t\tWindows-Locale:\n");
+  bufptr = add_locale (bufptr, "C", "ANSI_X3.4-1968", false, NULL, NULL);
+  bufptr = add_locale (bufptr, "C", "UTF-8", true, NULL, NULL);
+  bufptr = add_locale (bufptr, "POSIX", "ANSI_X3.4-1968", false, NULL, NULL);
+
+  EnumSystemLocalesEx (format_proc_locale_proc,
+		       LOCALE_WINDOWS | LOCALE_SUPPLEMENTAL,
+		       (LPARAM) &bufptr, NULL);
+
+  destbuf = (char *) crealloc_abort (destbuf, bufptr - buf);
+  memcpy (destbuf, buf, bufptr - buf);
+  return bufptr - buf;
+}
+
+static off_t
+format_proc_codesets (void *, char *&destbuf)
+{
+  tmp_pathbuf tp;
+  char *buf = tp.c_get ();
+  char *bufptr = stpcpy (buf,
+			 "ASCII\n"
+			 "BIG5\n"
+			 "CP1125\n"
+			 "CP1250\n"
+			 "CP1251\n"
+			 "CP1252\n"
+			 "CP1253\n"
+			 "CP1254\n"
+			 "CP1255\n"
+			 "CP1256\n"
+			 "CP1257\n"
+			 "CP1258\n"
+			 "CP437\n"
+			 "CP720\n"
+			 "CP737\n"
+			 "CP775\n"
+			 "CP850\n"
+			 "CP852\n"
+			 "CP855\n"
+			 "CP857\n"
+			 "CP858\n"
+			 "CP862\n"
+			 "CP866\n"
+			 "CP874\n"
+			 "CP932\n"
+			 "EUC-CN\n"
+			 "EUC-JP\n"
+			 "EUC-KR\n"
+			 "GB2312\n"
+			 "GBK\n"
+			 "GEORGIAN-PS\n"
+			 "ISO-8859-1\n"
+			 "ISO-8859-10\n"
+			 "ISO-8859-11\n"
+			 "ISO-8859-13\n"
+			 "ISO-8859-14\n"
+			 "ISO-8859-15\n"
+			 "ISO-8859-16\n"
+			 "ISO-8859-2\n"
+			 "ISO-8859-3\n"
+			 "ISO-8859-4\n"
+			 "ISO-8859-5\n"
+			 "ISO-8859-6\n"
+			 "ISO-8859-7\n"
+			 "ISO-8859-8\n"
+			 "ISO-8859-9\n"
+			 "KOI8-R\n"
+			 "KOI8-T\n"
+			 "KOI8-U\n"
+			 "PT154\n"
+			 "SJIS\n"
+			 "TIS-620\n"
+			 "UTF-8\n");
+
+  destbuf = (char *) crealloc_abort (destbuf, bufptr - buf);
+  memcpy (destbuf, buf, bufptr - buf);
+  return bufptr - buf;
+}
+
 #undef print
diff --git a/winsup/cygwin/release/3.5.0 b/winsup/cygwin/release/3.5.0
index 502c1f356bab..cc1b52bd223e 100644
--- a/winsup/cygwin/release/3.5.0
+++ b/winsup/cygwin/release/3.5.0
@@ -16,3 +16,8 @@ What's new:
 - fnmatch(3) and glob(3) now support named character classes and
   equivalence class expressions in the search pattern, i.e., [:alnum:],
   [=a=], etc.
+
+- Introduce /proc/codesets and /proc/locales with information on
+  supported codesets and locales for all interested parties.  Locale(1)
+  opens these files and uses the info for printing locale info like any
+  other process could do.
diff --git a/winsup/doc/new-features.xml b/winsup/doc/new-features.xml
index c310114613f3..80046247e50d 100644
--- a/winsup/doc/new-features.xml
+++ b/winsup/doc/new-features.xml
@@ -33,6 +33,13 @@ fnmatch(3) and glob(3) now support named character classes and equivalence
 class expressions in the search pattern, i.e., [:alnum:], [=a=], etc.
 </para></listitem>
 
+<listitem><para>
+Introduce /proc/codesets and /proc/locales with information on supported
+codesets and locales for all interested parties.  Locale(1) opens these
+files and uses the info for printing locale info like any other process
+could do.
+</para></listitem>
+
 </itemizedlist>
 
 </sect2>
diff --git a/winsup/utils/locale.cc b/winsup/utils/locale.cc
index 25c4d327023d..953cc2184615 100644
--- a/winsup/utils/locale.cc
+++ b/winsup/utils/locale.cc
@@ -159,58 +159,46 @@ size_t loc_max;
 size_t loc_num;
 
 void
-print_locale_with_codeset (int verbose, loc_t *locale, bool utf8,
-			   const char *modifier)
+print_locale (int verbose, loc_t *locale)
 {
-  static const char *sysroot;
-  char locname[32];
+  static const char *kernel32;
+  static const char *cygwin1;
 
   if (verbose
       && (!strcmp (locale->name, "C") || !strcmp (locale->name, "POSIX")))
     return;
-  if (!sysroot)
+  if (!kernel32)
     {
-      WCHAR sysbuf[PATH_MAX];
-      HMODULE k32 = GetModuleHandleW (L"kernel32.dll");
-      if (GetModuleFileNameW (k32, sysbuf, PATH_MAX))
-	sysroot = (const char *) cygwin_create_path (CCP_WIN_W_TO_POSIX,
-						     sysbuf);
-      if (!sysroot)
-	sysroot = "kernel32.dll";
+      WCHAR dllpathbuf[PATH_MAX];
+      HMODULE dll;
+
+      dll = GetModuleHandleW (L"kernel32.dll");
+      if (GetModuleFileNameW (dll, dllpathbuf, PATH_MAX))
+	kernel32 = (const char *) cygwin_create_path (CCP_WIN_W_TO_POSIX,
+						      dllpathbuf);
+      if (!kernel32)
+	kernel32 = "kernel32.dll";
+      dll = GetModuleHandleW (L"cygwin1.dll");
+      if (GetModuleFileNameW (dll, dllpathbuf, PATH_MAX))
+	cygwin1 = (const char *) cygwin_create_path (CCP_WIN_W_TO_POSIX,
+						     dllpathbuf);
+      if (!cygwin1)
+	cygwin1 = "cygwin1.dll";
     }
-  snprintf (locname, 32, "%s%s%s%s", locale->name, utf8 ? ".utf8" : "",
-				     modifier ? "@" : "", modifier ?: "");
-  if (verbose)
-    fputs ("locale: ", stdout);
   if (verbose)
     {
-      printf ("%-15s ", locname);
+      printf ("locale: %-15s ", locale->name);
       printf ("archive: %s\n",
-      locale->alias ? LOCALE_ALIAS : sysroot);
+	      locale->alias ? LOCALE_ALIAS
+			    : !strcmp (locale->name, "C.utf8") ? cygwin1
+							       : kernel32);
       puts ("-------------------------------------------------------------------------------");
       printf (" language | %ls\n", locale->language);
       printf ("territory | %ls\n", locale->territory);
-      printf ("  codeset | %s\n\n", utf8 ? "UTF-8" : locale->codeset);
+      printf ("  codeset | %s\n\n", locale->codeset);
     }
   else
-    printf ("%s\n", locname);
-}
-
-void
-print_locale (int verbose, loc_t *locale)
-{
-  print_locale_with_codeset (verbose, locale, false, NULL);
-  char *modifier = strchr (locale->name, '@');
-  if (!locale->alias)
-    {
-      if (!modifier)
-	print_locale_with_codeset (verbose, locale, true, NULL);
-      else if (strcmp (modifier, "@euro"))
-	{
-	  *modifier++ = '\0';
-	  print_locale_with_codeset (verbose, locale, true, modifier);
-	}
-    }
+    printf ("%s\n", locale->name);
 }
 
 int
@@ -222,10 +210,10 @@ compare_locales (const void *a, const void *b)
 }
 
 size_t
-add_locale (const char *name, const wchar_t *language, const wchar_t *territory,
+add_locale (const char *name, const char *codeset, const wchar_t *language, const wchar_t *territory,
 	    bool alias = false)
 {
-  char orig_locale[32];
+  locale_t loc;
 
   if (loc_num >= loc_max)
     {
@@ -241,10 +229,9 @@ add_locale (const char *name, const wchar_t *language, const wchar_t *territory,
   locale[loc_num].name = strdup (name);
   locale[loc_num].language = wcsdup (language);
   locale[loc_num].territory = wcsdup (territory);
-  strcpy (orig_locale, setlocale (LC_CTYPE, NULL));
-  setlocale (LC_CTYPE, name);
-  locale[loc_num].codeset = strdup (nl_langinfo (CODESET));
-  setlocale (LC_CTYPE, orig_locale);
+  loc = newlocale (LC_CTYPE_MASK, name, (locale_t) 0);
+  locale[loc_num].codeset = strdup (nl_langinfo_l (CODESET, loc));
+  freelocale (loc);
   locale[loc_num].alias = alias;
   return loc_num++;
 }
@@ -257,6 +244,7 @@ add_locale_alias_locales ()
   char orig_locale[32];
   loc_t search, *loc;
   size_t orig_loc_num = loc_num;
+  locale_t sysloc;
 
   FILE *fp = fopen (LOCALE_ALIAS, "rt");
   if (!fp)
@@ -292,138 +280,66 @@ add_locale_alias_locales ()
       loc = (loc_t *) bsearch (&search, locale, orig_loc_num, sizeof (loc_t),
 			       compare_locales);
 
-      add_locale (alias, loc ? loc->language : L"", loc ? loc->territory : L"",
-		  true);
+      sysloc = newlocale (LC_CTYPE_MASK, alias, (locale_t) 0);
+      add_locale (alias, nl_langinfo_l (CODESET, sysloc),
+		  loc ? loc->language : L"", loc ? loc->territory : L"", true);
+      freelocale (sysloc);
     }
   fclose (fp);
 }
 
-BOOL
-print_all_locales_proc (LPWSTR loc_name, DWORD info, LPARAM param)
+void
+print_all_locales (int verbose)
 {
-  wchar_t iso639[32] = { 0 };
-  wchar_t iso3166[32] = { 0 };
-  wchar_t iso15924[32] = { 0 };
-
-#if 0
-  struct {
-    wchar_t language[256];
-    wchar_t country[256];
-    char loc[32];
-  } loc_list[32];
-  int lcnt = 0;
-#endif
-
-  if (getlocale (loc_name, iso639, iso3166, iso15924))
+  FILE *fp = fopen ("/proc/locales", "r");
+  char line[80];
+
+  if (!fp)
     {
-      char *c, posix_loc[32];
-      wchar_t language[256];
-      wchar_t country[256];
-      wchar_t currency[9];
-
-      c = posix_loc + snprintf (posix_loc, sizeof posix_loc, "%ls_%ls",
-				iso639, iso3166);
-      /* Inuktitut: equivalent @latin due to lack of info on Linux */
-      if (!wcscmp (iso639, L"iu"))
-	{
-	  if (wcscmp (iso15924, L"Latn;"))
-	    return TRUE;
-	}
-      /* Javanese: only use @latin locale. */
-      else if (!wcscmp (iso639, L"jv"))
-	{
-	  if (wcscmp (iso15924, L"Latn;"))
-	    return TRUE;
-	}
-      /* Mongolian: only use @mongolian locale. */
-      else if (!wcscmp (iso639, L"mn"))
-	{
-	  if (wcscmp (iso15924, L"Mong;"))
-	    return TRUE;
-	}
-      /* Serbian: Windows default is Latin, Linux default is Cyrillic.
-	 We want the Linux default and attach @latin otherwise */
-      else if (!wcscmp (iso639, L"sr")  && !wcscmp (iso15924, L"Latn;"))
-	stpcpy (c, "@latin");
-      /* Tamazight: no modifier, iso639 is "ber" on Linux.
-	 "zgh-Tfng-MA" is equivalent to "ber_MA". */
-      else if (!wcscmp (iso639, L"zgh"))
-	snprintf (posix_loc, sizeof posix_loc, "ber_%.27ls", iso3166);
-      /* Tamazight: "tzm-Latn-DZ" is equivalent to "ber_DZ",
-		    skip everything else. */
-      else if (!wcscmp (iso639, L"tzm"))
-	{
-	  if (!wcscmp (iso3166, L"DZ") && !wcscmp (iso15924, L"Latn;"))
-	    snprintf (posix_loc, sizeof posix_loc, "ber_%.27ls", iso3166);
-	  else
-	    return TRUE;
-	}
-      /* In all other cases, we check if the script from the Windows
-	 locale is the default locale in that language.  If not, we
-	 add it as modifier if possible, or skip it */
-      else if (iso15924[0])
+      fprintf (stderr, "%s: can't open /proc/locales, old Cygwin DLL?\n",
+	       program_invocation_short_name);
+      return;
+    }
+  /* Skip header line */
+  fgets (line, 80, fp);
+  while (fgets (line, 80, fp))
+    {
+      char *posix_loc;
+      char *codeset;
+      char *win_loc;
+      char *nl;
+      wchar_t win_locale[32];
+      wchar_t language[64] = { 0 };
+      wchar_t country[64] = { 0 };
+
+      nl = strchr (line, '\n');
+      if (nl)
+	*nl = '\0';
+      posix_loc = line;
+      codeset = strchr (posix_loc, '\t');
+      if (!codeset)
+	continue;
+      *codeset = '\0';
+      while (*++codeset == '\t')
+	;
+      win_loc = strchr (codeset, '\t');
+      if (win_loc)
 	{
-	  wchar_t scriptless_win_locale[32];
-	  wchar_t default_iso15924[32];
-
-	  wcpcpy (wcpcpy (wcpcpy (scriptless_win_locale, iso639), L"-"),
-		  iso3166);
-	  if ((GetLocaleInfoEx (scriptless_win_locale, LOCALE_SSCRIPTS,
-				default_iso15924, 32)
-	       || GetLocaleInfoEx (iso639, LOCALE_SSCRIPTS,
-				   default_iso15924, 32))
-	      && !wcsstr (default_iso15924, iso15924))
+	  *win_loc = '\0';
+	  while (*++win_loc == '\t')
+	    ;
+	  if (win_loc[0])
 	    {
-	      if (!wcscmp (iso15924, L"Latn;"))
-		stpcpy (c, "@latin");
-	      else if (!wcscmp (iso15924, L"Cyrl;"))
-		stpcpy (c, "@cyrillic");
-	      else if (!wcscmp (iso15924, L"Deva;"))
-		stpcpy (c, "@devanagar");
-	      else if (!wcscmp (iso15924, L"Adlm;"))
-		stpcpy (c, "@adlam");
-	      else
-		return TRUE;
+	      mbstowcs (win_locale, win_loc, 32);
+	      GetLocaleInfoEx (win_locale, LOCALE_SENGLISHLANGUAGENAME,
+			       language, 64);
+	      GetLocaleInfoEx (win_locale, LOCALE_SENGLISHCOUNTRYNAME,
+			       country, 64);
 	    }
 	}
-
-      /* Print */
-      GetLocaleInfoEx (loc_name, LOCALE_SENGLISHLANGUAGENAME, language, 256);
-      GetLocaleInfoEx (loc_name, LOCALE_SENGLISHCOUNTRYNAME, country, 256);
-      size_t idx = add_locale (posix_loc, language, country);
-      /* Check for locales sporting an additional modifier for
-	 changing the codeset and other stuff. */
-      if (!wcscmp (iso639, L"be") && !wcscmp (iso3166, L"BY"))
-	stpcpy (c, "@latin");
-      if (!wcscmp (iso639, L"tt") && !wcscmp (iso3166, L"RU"))
-	stpcpy (c, "@iqtelif");
-       /* If the base locale is ISO-8859-1 and the locale defines currency
-          as EUR, add a @euro locale. For historical reasons there's also
-	  a greek @euro locale, albeit it doesn't change the codeset. */
-      else if ((!strcmp (locale[idx].codeset, "ISO-8859-1")
-		|| !strcmp (posix_loc, "el_GR"))
-	       && GetLocaleInfoEx (loc_name, LOCALE_SINTLSYMBOL, currency, 9)
-	       && !wcsncmp (currency, L"EUR", 3))
-	stpcpy (c, "@euro");
-      else if (!wcscmp (iso639, L"ja")
-	       || !wcscmp (iso639, L"ko")
-	       || !wcscmp (iso639, L"zh"))
-	stpcpy (c, "@cjknarrow");
-      else
-	return TRUE;
-      add_locale (posix_loc, language, country);
+      add_locale (posix_loc, codeset, language, country);
     }
-  return TRUE;
-}
-
-void
-print_all_locales (int verbose)
-{
-  add_locale ("C", L"C", L"POSIX");
-  add_locale ("POSIX", L"C", L"POSIX", true);
-  EnumSystemLocalesEx (print_all_locales_proc,
-		       LOCALE_WINDOWS | LOCALE_SUPPLEMENTAL,
-		       0, NULL);
+  fclose (fp);
   /* First sort allows add_locale_alias_locales to bsearch in locales. */
   qsort (locale, loc_num, sizeof (loc_t), compare_locales);
   add_locale_alias_locales ();
@@ -435,67 +351,18 @@ print_all_locales (int verbose)
 void
 print_charmaps ()
 {
-  /* FIXME: We need a method to fetch the available charsets from Cygwin, */
-  const char *charmaps[] =
-  {
-    "ASCII",
-    "BIG5",
-    "CP1125",
-    "CP1250",
-    "CP1251",
-    "CP1252",
-    "CP1253",
-    "CP1254",
-    "CP1255",
-    "CP1256",
-    "CP1257",
-    "CP1258",
-    "CP437",
-    "CP720",
-    "CP737",
-    "CP775",
-    "CP850",
-    "CP852",
-    "CP855",
-    "CP857",
-    "CP858",
-    "CP862",
-    "CP866",
-    "CP874",
-    "CP932",
-    "EUC-CN",
-    "EUC-JP",
-    "EUC-KR",
-    "GB2312",
-    "GBK",
-    "GEORGIAN-PS",
-    "ISO-8859-1",
-    "ISO-8859-10",
-    "ISO-8859-11",
-    "ISO-8859-13",
-    "ISO-8859-14",
-    "ISO-8859-15",
-    "ISO-8859-16",
-    "ISO-8859-2",
-    "ISO-8859-3",
-    "ISO-8859-4",
-    "ISO-8859-5",
-    "ISO-8859-6",
-    "ISO-8859-7",
-    "ISO-8859-8",
-    "ISO-8859-9",
-    "KOI8-R",
-    "KOI8-T",
-    "KOI8-U",
-    "PT154",
-    "SJIS",
-    "TIS-620",
-    "UTF-8",
-    NULL
-  };
-  const char **charmap = charmaps;
-  while (*charmap)
-    printf ("%s\n", *charmap++);
+  FILE *fp = fopen ("/proc/codesets", "r");
+  char line[80];
+
+  if (!fp)
+    {
+      fprintf (stderr, "%s: can't open /proc/codesets, old Cygwin DLL?\n",
+	       program_invocation_short_name);
+      return;
+    }
+  while (fgets (line, 80, fp))
+    fputs (line, stdout);
+  fclose (fp);
 }
 
 void

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-02-26 16:18 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-02-26 16:18 [newlib-cygwin/main] Cygwin: introduce /proc/codesets and /proc/locales Corinna Vinschen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).