public inbox for glibc-cvs@sourceware.org
help / color / mirror / Atom feed
* [glibc] iconv: Better mapping to RFC for UTF-7
@ 2022-03-21 18:04 Adhemerval Zanella
  0 siblings, 0 replies; only message in thread
From: Adhemerval Zanella @ 2022-03-21 18:04 UTC (permalink / raw)
  To: glibc-cvs

https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=09abb567a94e4e33504bb863f9d36f253287d333

commit 09abb567a94e4e33504bb863f9d36f253287d333
Author: Max Gautier <mg@max.gautier.name>
Date:   Sun Mar 20 17:41:05 2022 +0100

    iconv: Better mapping to RFC for UTF-7
    
    - Direct use of characters instead of arcane arrays
    - isxbase64 is not the Modified BASE64 alphabet, but the characters who
      needs to trigger an explicit shift back to US-ASCII. Make that clearer
    
    Signed-off-by: Max Gautier <mg@max.gautier.name>
    Reviewed-by: Adhemerval Zanellla  <adhemerval.zanella@linaro.org>

Diff:
---
 iconvdata/utf-7.c | 60 ++++++++++++++++++++++++++++---------------------------
 1 file changed, 31 insertions(+), 29 deletions(-)

diff --git a/iconvdata/utf-7.c b/iconvdata/utf-7.c
index b5af9b217c..815b1891c7 100644
--- a/iconvdata/utf-7.c
+++ b/iconvdata/utf-7.c
@@ -30,20 +30,27 @@
 
 
 
+static bool
+between (uint32_t const ch,
+	 uint32_t const lower_bound, uint32_t const upper_bound)
+{
+  return (ch >= lower_bound && ch <= upper_bound);
+}
+
 /* The set of "direct characters":
    A-Z a-z 0-9 ' ( ) , - . / : ? space tab lf cr
 */
 
-static const unsigned char direct_tab[128 / 8] =
-  {
-    0x00, 0x26, 0x00, 0x00, 0x81, 0xf3, 0xff, 0x87,
-    0xfe, 0xff, 0xff, 0x07, 0xfe, 0xff, 0xff, 0x07
-  };
-
-static int
+static bool
 isdirect (uint32_t ch)
 {
-  return (ch < 128 && ((direct_tab[ch >> 3] >> (ch & 7)) & 1));
+  return (between (ch, 'A', 'Z')
+	  || between (ch, 'a', 'z')
+	  || between (ch, '0', '9')
+	  || ch == '\'' || ch == '(' || ch == ')'
+	  || between (ch, ',', '/')
+	  || ch == ':' || ch == '?'
+	  || ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r');
 }
 
 
@@ -52,33 +59,27 @@ isdirect (uint32_t ch)
    ! " # $ % & * ; < = > @ [ ] ^ _ ` { | }
 */
 
-static const unsigned char xdirect_tab[128 / 8] =
-  {
-    0x00, 0x26, 0x00, 0x00, 0xff, 0xf7, 0xff, 0xff,
-    0xff, 0xff, 0xff, 0xef, 0xff, 0xff, 0xff, 0x3f
-  };
-
-static int
+static bool
 isxdirect (uint32_t ch)
 {
-  return (ch < 128 && ((xdirect_tab[ch >> 3] >> (ch & 7)) & 1));
+  return (ch == '\t'
+	  || ch == '\n'
+	  || ch == '\r'
+	  || (between (ch, ' ', '}') && ch != '+' && ch != '\\'));
 }
 
 
-/* The set of "extended base64 characters":
+/* Characters which needs to trigger an explicit shift back to US-ASCII (UTF-7
+   only): Modified base64 + '-' (shift back character)
    A-Z a-z 0-9 + / -
 */
 
-static const unsigned char xbase64_tab[128 / 8] =
-  {
-    0x00, 0x00, 0x00, 0x00, 0x00, 0xa8, 0xff, 0x03,
-    0xfe, 0xff, 0xff, 0x07, 0xfe, 0xff, 0xff, 0x07
-  };
-
-static int
-isxbase64 (uint32_t ch)
+static bool
+needs_explicit_shift (uint32_t ch)
 {
-  return (ch < 128 && ((xbase64_tab[ch >> 3] >> (ch & 7)) & 1));
+  return (between (ch, 'A', 'Z')
+	  || between (ch, 'a', 'z')
+	  || between (ch, '/', '9') || ch == '+' || ch == '-');
 }
 
 
@@ -252,7 +253,7 @@ base64 (unsigned int i)
 		   indeed form a Low Surrogate.  */			      \
 		uint32_t wc2 = wch & 0xffff;				      \
 									      \
-		if (! __builtin_expect (wc2 >= 0xdc00 && wc2 < 0xe000, 1))    \
+		if (! __glibc_likely (wc2 >= 0xdc00 && wc2 < 0xe000))	      \
 		  {							      \
 		    STANDARD_FROM_LOOP_ERR_HANDLER ((statep->__count = 0, 1));\
 		  }							      \
@@ -372,7 +373,8 @@ base64 (unsigned int i)
 	    /* deactivate base64 encoding */				      \
 	    size_t count;						      \
 									      \
-	    count = ((statep->__count & 0x18) >= 0x10) + isxbase64 (ch) + 1;  \
+	    count = ((statep->__count & 0x18) >= 0x10)			      \
+	      + needs_explicit_shift (ch) + 1;				      \
 	    if (__glibc_unlikely (outptr + count > outend))		      \
 	      {								      \
 		result = __GCONV_FULL_OUTPUT;				      \
@@ -381,7 +383,7 @@ base64 (unsigned int i)
 									      \
 	    if ((statep->__count & 0x18) >= 0x10)			      \
 	      *outptr++ = base64 ((statep->__count >> 3) & ~3);		      \
-	    if (isxbase64 (ch))						      \
+	    if (needs_explicit_shift (ch))				      \
 	      *outptr++ = '-';						      \
 	    *outptr++ = (unsigned char) ch;				      \
 	    statep->__count = 0;					      \


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-03-21 18:04 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-21 18:04 [glibc] iconv: Better mapping to RFC for UTF-7 Adhemerval Zanella

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).