From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 23773 invoked by alias); 3 Dec 2009 13:48:58 -0000 Received: (qmail 23729 invoked by uid 22791); 3 Dec 2009 13:48:55 -0000 X-Spam-Check-By: sourceware.org Received: from aquarius.hirmke.de (HELO calimero.vinschen.de) (217.91.18.234) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Thu, 03 Dec 2009 13:48:49 +0000 Received: by calimero.vinschen.de (Postfix, from userid 500) id 8E3E26D417D; Thu, 3 Dec 2009 14:48:37 +0100 (CET) Date: Thu, 03 Dec 2009 13:48:00 -0000 From: Corinna Vinschen To: cygwin-xfree@cygwin.com Subject: Re: X11R7.5 and C.UTF-8 Message-ID: <20091203134837.GX8059@calimero.vinschen.de> Reply-To: cygwin-xfree@cygwin.com Mail-Followup-To: cygwin-xfree@cygwin.com References: <4AE8539E.9080004@cornell.edu> <20091028172216.P60895@mail101.his.com> <4AE8BC12.1060109@cornell.edu> <416096c60910281507n4774534dode1d24ac47d5b0a2@mail.gmail.com> <4B1115EC.7010308@cornell.edu> <4B174C20.1040900@tlinx.org> <416096c60912022348i36504e14l726efc9fc9c360e6@mail.gmail.com> <20091203045401.L85368@mail101.his.com> <416096c60912030516r42f67c05yfaa3b64fcca68b43@mail.gmail.com> MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="UlVJffcvxoiEqYs2" Content-Disposition: inline Content-Transfer-Encoding: 8bit In-Reply-To: <416096c60912030516r42f67c05yfaa3b64fcca68b43@mail.gmail.com> User-Agent: Mutt/1.5.20 (2009-06-14) Mailing-List: contact cygwin-xfree-help@cygwin.com; run by ezmlm Precedence: bulk List-Id: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: cygwin-xfree-owner@cygwin.com Reply-To: cygwin-xfree@cygwin.com Mail-Followup-To: cygwin-xfree@cygwin.com X-SW-Source: 2009-12/txt/msg00019.txt.bz2 --UlVJffcvxoiEqYs2 Content-Type: text/plain; charset=iso-8859-1 Content-Disposition: inline Content-Transfer-Encoding: 8bit Content-length: 1339 On Dec 3 13:16, Andy Koppe wrote: > 2009/12/3 Thomas Dickey: > >> From > >> http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html, > >> §7.2: > >> > >> "The tables in Locale Definition describe the characteristics and > >> behavior of the POSIX locale for data consisting entirely of > >> characters from the portable character set and the control character > >> set. For other characters, the behavior is unspecified." > >> > >> This means that characters 0..127 have to be treated as ASCII, but > >> beyond that an implementation can do what it wants. And on Cygwin 1.7, > >> plain "C" actually does imply UTF-8, which happily is > >> backward-compatible with ASCII. > > > > That's an interpretation that so far hasn't been blessed by the standards > > people.  Any discussion of this topic should mention that, as a caveat. > > Fair point. It also means that apps are entitled to assume that "C" > supports no more than ASCII, which is why Cygwin 1.7's default locale > is C.UTF-8. A default locale setting based on the user's language > selection would be better, but we don't have that (yet?). Try the attached. Note: It has a hidden "--testloop" option... Corinna -- Corinna Vinschen Please, send mails regarding Cygwin to Cygwin Project Co-Leader cygwin AT cygwin DOT com Red Hat --UlVJffcvxoiEqYs2 Content-Type: text/x-c++src; charset=us-ascii Content-Disposition: attachment; filename="getlocale.cc" Content-length: 4612 #define WINVER 0x0600 #include #include #include #define VERSION "1.0" extern char *__progname; void version () __attribute__ ((noreturn)); void usage (FILE *, int) __attribute__ ((noreturn)); void version () { printf ("%s (Cygwin) %s\n", __progname, VERSION); exit (0); } void usage (FILE * stream, int status) { fprintf (stream, "\n\ Usage: %s [-suU] [-l LCID]\n\ \n\ Return POSIX LANG identifier corresponding to a locale, default is the\n\ system default locale\n\ Possible options are:\n\ \n\ -s, --system return LANG for the system's default locale\n\ -u, --user return LANG for the current user's default locale\n\ -l, --lcid LCID return LANG for the LCID given as argument\n\ -U, --UTF-8 always attach .UTF-8 to LANG\n\ -h, --help this text\n\ -V, --version print the version of %s and exit\n", __progname, __progname); exit (status); } struct option longopts[] = { {"system", no_argument, NULL, 's'}, {"user", no_argument, NULL, 'u'}, {"lcid", required_argument, NULL, 'l'}, {"UTF-8", no_argument, NULL, 'U'}, {"help", no_argument, NULL, 'h'}, {"version", no_argument, NULL, 'V'}, {"testloop", no_argument, NULL, 'T'}, {0, no_argument, NULL, 0} }; char *opts = "dsul:UhV"; int getlocale (LCID lcid, bool utf, bool test) { UINT codepage; char iso639[10]; char iso3166[10]; if (!GetLocaleInfo (lcid, LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER, (char *) &codepage, sizeof codepage) || !GetLocaleInfo (lcid, LOCALE_SISO639LANGNAME, iso639, 10) || !GetLocaleInfo (lcid, LOCALE_SISO3166CTRYNAME, iso3166, 10)) { if (!test) fprintf (stderr, "%s: Non existant locale\n", __progname); return 2; } if (utf) codepage = 0; if (test) { char cty[256]; char lang[256]; GetLocaleInfo (lcid, LOCALE_SENGCOUNTRY, cty, 256); GetLocaleInfo (lcid, LOCALE_SENGLANGUAGE, lang, 256); printf ("0x%04x=\"%s_%s\", %s (%s)\n", (unsigned) lcid, iso639, iso3166, lang, cty); } else printf ("LANG=\"%s_%s%s\"\n", iso639, iso3166, codepage ? "" : ".UTF-8"); return 0; } #define d(X) {X, #X} struct dl { LCTYPE t; const char *s; } dlist[] = { d(LOCALE_SLONGDATE), d(LOCALE_SSHORTDATE), d(LOCALE_STIMEFORMAT), d(LOCALE_SYEARMONTH), d(LOCALE_S1159), d(LOCALE_S2359), d(LOCALE_SDAYNAME1), d(LOCALE_SDAYNAME2), d(LOCALE_SDAYNAME3), d(LOCALE_SDAYNAME4), d(LOCALE_SDAYNAME5), d(LOCALE_SDAYNAME6), d(LOCALE_SDAYNAME7), d(LOCALE_SABBREVDAYNAME1), d(LOCALE_SABBREVDAYNAME2), d(LOCALE_SABBREVDAYNAME3), d(LOCALE_SABBREVDAYNAME4), d(LOCALE_SABBREVDAYNAME5), d(LOCALE_SABBREVDAYNAME6), d(LOCALE_SABBREVDAYNAME7), d(LOCALE_SMONTHNAME1), d(LOCALE_SMONTHNAME2), d(LOCALE_SMONTHNAME3), d(LOCALE_SMONTHNAME4), d(LOCALE_SMONTHNAME5), d(LOCALE_SMONTHNAME6), d(LOCALE_SMONTHNAME7), d(LOCALE_SMONTHNAME8), d(LOCALE_SMONTHNAME9), d(LOCALE_SMONTHNAME10), d(LOCALE_SMONTHNAME11), d(LOCALE_SMONTHNAME12), d(LOCALE_SMONTHNAME13), d(LOCALE_SABBREVMONTHNAME1), d(LOCALE_SABBREVMONTHNAME2), d(LOCALE_SABBREVMONTHNAME3), d(LOCALE_SABBREVMONTHNAME4), d(LOCALE_SABBREVMONTHNAME5), d(LOCALE_SABBREVMONTHNAME6), d(LOCALE_SABBREVMONTHNAME7), d(LOCALE_SABBREVMONTHNAME8), d(LOCALE_SABBREVMONTHNAME9), d(LOCALE_SABBREVMONTHNAME10), d(LOCALE_SABBREVMONTHNAME11), d(LOCALE_SABBREVMONTHNAME12), d(LOCALE_SABBREVMONTHNAME13), { 0, NULL } }; int main (int argc, char **argv) { int opt; LCID lcid = LOCALE_SYSTEM_DEFAULT; bool utf = false; bool test = false; bool dates = false; while ((opt = getopt_long (argc, argv, opts, longopts, NULL)) != EOF) switch (opt) { case 's': lcid = LOCALE_SYSTEM_DEFAULT; break; case 'u': lcid = LOCALE_USER_DEFAULT; break; case 'l': lcid = strtoul (optarg, NULL, 0); break; case 'U': utf = true; break; case 'h': usage (stdout, 0); break; case 'V': version (); break; case 'T': test = true; break; case 'd': dates = true; break; default: usage (stderr, 1); break; } if (test) { for (unsigned lang = 1; lang <= 0x3ff; ++lang) for (unsigned sublang = 1; sublang <= 0x3f; ++sublang) getlocale ((sublang << 10) | lang, false, true); return 0; } if (dates) { char buf[256]; for (dl *dp = dlist; dp->t; ++dp) if (GetLocaleInfo (lcid, dp->t, buf, 256)) printf ("%s: <%s>\n", dp->s, buf); return 0; } return getlocale (lcid, utf, false); } --UlVJffcvxoiEqYs2 Content-Type: text/plain; charset=us-ascii Content-length: 223 -- Unsubscribe info: http://cygwin.com/ml/#unsubscribe-simple Problem reports: http://cygwin.com/problems.html Documentation: http://x.cygwin.com/docs/ FAQ: http://x.cygwin.com/docs/faq/ --UlVJffcvxoiEqYs2--