From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <fw@sourceware.org>
Received: by sourceware.org (Postfix, from userid 2178)
 id 9D0C73857806; Tue, 17 May 2022 09:57:36 +0000 (GMT)
DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 9D0C73857806
Content-Type: text/plain; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
From: Florian Weimer <fw@sourceware.org>
To: glibc-cvs@sourceware.org
Subject: [glibc/fw/localedef-utf8] locale: Introduce get_string_U_char into
 linereader.c
X-Act-Checkin: glibc
X-Git-Author: Florian Weimer <fweimer@redhat.com>
X-Git-Refname: refs/heads/fw/localedef-utf8
X-Git-Oldrev: 7c0759f4e1fc2f7ffc5ed7284bd47ef9cff4da44
X-Git-Newrev: de0b9d66446c553bdbae2c15a63ef8eb5f819d1d
Message-Id: <20220517095736.9D0C73857806@sourceware.org>
Date: Tue, 17 May 2022 09:57:36 +0000 (GMT)
X-BeenThere: glibc-cvs@sourceware.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Glibc-cvs mailing list <glibc-cvs.sourceware.org>
List-Unsubscribe: <https://sourceware.org/mailman/options/glibc-cvs>,
 <mailto:glibc-cvs-request@sourceware.org?subject=unsubscribe>
List-Archive: <https://sourceware.org/pipermail/glibc-cvs/>
List-Help: <mailto:glibc-cvs-request@sourceware.org?subject=help>
List-Subscribe: <https://sourceware.org/mailman/listinfo/glibc-cvs>,
 <mailto:glibc-cvs-request@sourceware.org?subject=subscribe>
X-List-Received-Date: Tue, 17 May 2022 09:57:36 -0000

https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=de0b9d66446c553bdbae2c15a63ef8eb5f819d1d

commit de0b9d66446c553bdbae2c15a63ef8eb5f819d1d
Author: Florian Weimer <fweimer@redhat.com>
Date:   Tue May 17 10:15:56 2022 +0200

    locale: Introduce get_string_U_char into linereader.c
    
    This will permit reusing the Unicode character processing for different
    character encodings, not just the current <U...> encoding.

Diff:
---
 locale/programs/linereader.c | 158 +++++++++++++++++++++----------------------
 1 file changed, 76 insertions(+), 82 deletions(-)

diff --git a/locale/programs/linereader.c b/locale/programs/linereader.c
index d5367e0a1e..ca4abb031c 100644
--- a/locale/programs/linereader.c
+++ b/locale/programs/linereader.c
@@ -596,6 +596,75 @@ get_ident (struct linereader *lr)
   return &lr->token;
 }
 
+/* Process a decoded Unicode character WCH in a string.  */
+static void
+get_string_U_char (struct localedef_t *locale, const struct charmap_t *charmap,
+		   const struct repertoire_t *repertoire,
+		   uint32_t wch, struct lr_buffer *lrb, bool *illegal_string)
+{
+  /* See whether the charmap contains the Uxxxxxxxx names.  */
+  char utmp[10];
+  snprintf (utmp, sizeof (utmp), "U%08X", wch);
+  struct charseq *seq = charmap_find_value (charmap, utmp, 9);
+
+  if (seq == NULL)
+    {
+      /* No, this isn't the case.  Now determine from
+	 the repertoire the name of the character and
+	 find it in the charmap.  */
+      if (repertoire != NULL)
+	{
+	  const char *symbol = repertoire_find_symbol (repertoire, wch);
+	  if (symbol != NULL)
+	    seq = charmap_find_value (charmap, symbol, strlen (symbol));
+	}
+
+      if (seq == NULL)
+	{
+#ifndef NO_TRANSLITERATION
+	  /* Transliterate if possible.  */
+	  if (locale != NULL)
+	    {
+	      if ((locale->avail & CTYPE_LOCALE) == 0)
+		{
+		  /* Load the CTYPE data now.  */
+		  int old_needed = locale->needed;
+
+		  locale->needed = 0;
+		  locale = load_locale (LC_CTYPE, locale->name,
+					locale->repertoire_name,
+					charmap, locale);
+		  locale->needed = old_needed;
+		}
+
+	      uint32_t *translit;
+	      if ((locale->avail & CTYPE_LOCALE) != 0
+		  && ((translit = find_translit (locale, charmap, wch))
+		      != NULL))
+		/* The CTYPE data contains a matching
+		   transliteration.  */
+		{
+		  for (int i = 0; translit[i] != 0; ++i)
+		    {
+		      snprintf (utmp, sizeof (utmp), "U%08X", translit[i]);
+		      seq = charmap_find_value (charmap, utmp, 9);
+		      assert (seq != NULL);
+		      adds (lrb, seq->bytes, seq->nbytes);
+		    }
+		  return;
+		}
+	    }
+#endif	/* NO_TRANSLITERATION */
+
+	  /* Not a known name.  */
+	  *illegal_string = true;
+	}
+    }
+
+  if (seq != NULL)
+    adds (lrb, seq->bytes, seq->nbytes);
+}
+
 
 static struct token *
 get_string (struct linereader *lr, const struct charmap_t *charmap,
@@ -635,7 +704,7 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
     }
   else
     {
-      int illegal_string = 0;
+      bool illegal_string = false;
       size_t buf2act = 0;
       size_t buf2max = 56 * sizeof (uint32_t);
       int ch;
@@ -695,7 +764,7 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
 	    {
 	      /* <> is no correct name.  Ignore it and also signal an
 		 error.  */
-	      illegal_string = 1;
+	      illegal_string = true;
 	      continue;
 	    }
 
@@ -709,8 +778,6 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
 
 	      if (cp == &lrb.buf[lrb.act])
 		{
-		  char utmp[10];
-
 		  /* Yes, it is.  */
 		  addc (&lrb, '\0');
 		  wch = strtoul (lrb.buf + startidx + 1, NULL, 16);
@@ -721,81 +788,8 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
 		  if (return_widestr)
 		    ADDWC (wch);
 
-		  /* See whether the charmap contains the Uxxxxxxxx names.  */
-		  snprintf (utmp, sizeof (utmp), "U%08X", wch);
-		  seq = charmap_find_value (charmap, utmp, 9);
-
-		  if (seq == NULL)
-		    {
-		     /* No, this isn't the case.  Now determine from
-			the repertoire the name of the character and
-			find it in the charmap.  */
-		      if (repertoire != NULL)
-			{
-			  const char *symbol;
-
-			  symbol = repertoire_find_symbol (repertoire, wch);
-
-			  if (symbol != NULL)
-			    seq = charmap_find_value (charmap, symbol,
-						      strlen (symbol));
-			}
-
-		      if (seq == NULL)
-			{
-#ifndef NO_TRANSLITERATION
-			  /* Transliterate if possible.  */
-			  if (locale != NULL)
-			    {
-			      uint32_t *translit;
-
-			      if ((locale->avail & CTYPE_LOCALE) == 0)
-				{
-				  /* Load the CTYPE data now.  */
-				  int old_needed = locale->needed;
-
-				  locale->needed = 0;
-				  locale = load_locale (LC_CTYPE,
-							locale->name,
-							locale->repertoire_name,
-							charmap, locale);
-				  locale->needed = old_needed;
-				}
-
-			      if ((locale->avail & CTYPE_LOCALE) != 0
-				  && ((translit = find_translit (locale,
-								 charmap, wch))
-				      != NULL))
-				/* The CTYPE data contains a matching
-				   transliteration.  */
-				{
-				  int i;
-
-				  for (i = 0; translit[i] != 0; ++i)
-				    {
-				      char utmp[10];
-
-				      snprintf (utmp, sizeof (utmp), "U%08X",
-						translit[i]);
-				      seq = charmap_find_value (charmap, utmp,
-								9);
-				      assert (seq != NULL);
-				      adds (&lrb, seq->bytes, seq->nbytes);
-				    }
-
-				  continue;
-				}
-			    }
-#endif	/* NO_TRANSLITERATION */
-
-			  /* Not a known name.  */
-			  illegal_string = 1;
-			}
-		    }
-
-		  if (seq != NULL)
-		    adds (&lrb, seq->bytes, seq->nbytes);
-
+		  get_string_U_char (locale, charmap, repertoire, wch,
+				     &lrb, &illegal_string);
 		  continue;
 		}
 	    }
@@ -812,7 +806,7 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
 	      /* This name is not in the charmap.  */
 	      lr_error (lr, _("symbol `%.*s' not in charmap"),
 			(int) (lrb.act - startidx), &lrb.buf[startidx]);
-	      illegal_string = 1;
+	      illegal_string = true;
 	    }
 
 	  if (return_widestr)
@@ -833,7 +827,7 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
 		  /* This name is not in the repertoire map.  */
 		  lr_error (lr, _("symbol `%.*s' not in repertoire map"),
 			    (int) (lrb.act - startidx), &lrb.buf[startidx]);
-		  illegal_string = 1;
+		  illegal_string = true;
 		}
 	      else
 		ADDWC (wch);
@@ -850,7 +844,7 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
       if (ch == '\n' || ch == EOF)
 	{
 	  lr_error (lr, _("unterminated string"));
-	  illegal_string = 1;
+	  illegal_string = true;
 	}
 
       if (illegal_string)