public inbox for glibc-cvs@sourceware.org
help / color / mirror / Atom feed
* [glibc/fw/localedef-utf8] locale: localdef input files are now encoded in UTF-8
@ 2022-05-17  9:57 Florian Weimer
  0 siblings, 0 replies; only message in thread
From: Florian Weimer @ 2022-05-17  9:57 UTC (permalink / raw)
  To: glibc-cvs

https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=0c34593491e4ea2de79ae85fedb26252529b5f35

commit 0c34593491e4ea2de79ae85fedb26252529b5f35
Author: Florian Weimer <fweimer@redhat.com>
Date:   Tue May 17 11:38:29 2022 +0200

    locale: localdef input files are now encoded in UTF-8

Diff:
---
 locale/programs/linereader.c | 32 +++++++++++++++++++++++++++++---
 1 file changed, 29 insertions(+), 3 deletions(-)

diff --git a/locale/programs/linereader.c b/locale/programs/linereader.c
index ca4abb031c..485ccaff0a 100644
--- a/locale/programs/linereader.c
+++ b/locale/programs/linereader.c
@@ -688,7 +688,11 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
 
       buf2 = NULL;
       while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF)
-	addc (&lrb, ch);
+	{
+	  if (ch >= 0x80)
+	    lr_error (lr, _("illegal 8-bit character in untranslated string"));
+	  addc (&lrb, ch);
+	}
 
       /* Catch errors with trailing escape character.  */
       if (lrb.act > 0 && lrb.buf[lrb.act - 1] == lr->escape_char
@@ -733,13 +737,35 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
 	      if (ch == lr->escape_char)
 		{
 		  ch = lr_getc (lr);
+		  if (ch >= 0x80)
+		    {
+		      lr_error (lr, _("illegal 8-bit escape sequence"));
+		      illegal_string = true;
+		      break;
+		    }
 		  if (ch == '\n' || ch == EOF)
 		    break;
 		}
+	      else if (ch < 0x80)
+		{
+		  wch = ch;
+		  addc (&lrb, ch);
+		}
+	      else 		/* UTF-8 sequence.  */
+		{
+		 if (!get_string_decode_utf8 (lr, ch, &wch))
+		   {
+		     illegal_string = true;
+		     break;
+		   }
+		 get_string_U_char (locale, charmap, repertoire, wch,
+				    &lrb, &illegal_string);
+		 if (illegal_string)
+		   break;
+		}
 
-	      addc (&lrb, ch);
 	      if (return_widestr)
-		ADDWC ((uint32_t) ch);
+		ADDWC (wch);
 
 	      continue;
 	    }


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-05-17  9:57 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-17  9:57 [glibc/fw/localedef-utf8] locale: localdef input files are now encoded in UTF-8 Florian Weimer

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).