From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1791) id CE8A33858403; Mon, 21 Mar 2022 12:26:56 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org CE8A33858403 Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Adhemerval Zanella To: glibc-cvs@sourceware.org Subject: [glibc/azanella/imap-utf7] iconv: Better mapping to RFC for UTF-7 X-Act-Checkin: glibc X-Git-Author: Max Gautier X-Git-Refname: refs/heads/azanella/imap-utf7 X-Git-Oldrev: cd08388523c60c0b9270298699d769f16598ef88 X-Git-Newrev: 09abb567a94e4e33504bb863f9d36f253287d333 Message-Id: <20220321122656.CE8A33858403@sourceware.org> Date: Mon, 21 Mar 2022 12:26:56 +0000 (GMT) X-BeenThere: glibc-cvs@sourceware.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Glibc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 21 Mar 2022 12:26:56 -0000 https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=09abb567a94e4e33504bb863f9d36f253287d333 commit 09abb567a94e4e33504bb863f9d36f253287d333 Author: Max Gautier Date: Sun Mar 20 17:41:05 2022 +0100 iconv: Better mapping to RFC for UTF-7 - Direct use of characters instead of arcane arrays - isxbase64 is not the Modified BASE64 alphabet, but the characters who needs to trigger an explicit shift back to US-ASCII. Make that clearer Signed-off-by: Max Gautier Reviewed-by: Adhemerval Zanellla Diff: --- iconvdata/utf-7.c | 60 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 31 insertions(+), 29 deletions(-) diff --git a/iconvdata/utf-7.c b/iconvdata/utf-7.c index b5af9b217c..815b1891c7 100644 --- a/iconvdata/utf-7.c +++ b/iconvdata/utf-7.c @@ -30,20 +30,27 @@ +static bool +between (uint32_t const ch, + uint32_t const lower_bound, uint32_t const upper_bound) +{ + return (ch >= lower_bound && ch <= upper_bound); +} + /* The set of "direct characters": A-Z a-z 0-9 ' ( ) , - . / : ? space tab lf cr */ -static const unsigned char direct_tab[128 / 8] = - { - 0x00, 0x26, 0x00, 0x00, 0x81, 0xf3, 0xff, 0x87, - 0xfe, 0xff, 0xff, 0x07, 0xfe, 0xff, 0xff, 0x07 - }; - -static int +static bool isdirect (uint32_t ch) { - return (ch < 128 && ((direct_tab[ch >> 3] >> (ch & 7)) & 1)); + return (between (ch, 'A', 'Z') + || between (ch, 'a', 'z') + || between (ch, '0', '9') + || ch == '\'' || ch == '(' || ch == ')' + || between (ch, ',', '/') + || ch == ':' || ch == '?' + || ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'); } @@ -52,33 +59,27 @@ isdirect (uint32_t ch) ! " # $ % & * ; < = > @ [ ] ^ _ ` { | } */ -static const unsigned char xdirect_tab[128 / 8] = - { - 0x00, 0x26, 0x00, 0x00, 0xff, 0xf7, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xef, 0xff, 0xff, 0xff, 0x3f - }; - -static int +static bool isxdirect (uint32_t ch) { - return (ch < 128 && ((xdirect_tab[ch >> 3] >> (ch & 7)) & 1)); + return (ch == '\t' + || ch == '\n' + || ch == '\r' + || (between (ch, ' ', '}') && ch != '+' && ch != '\\')); } -/* The set of "extended base64 characters": +/* Characters which needs to trigger an explicit shift back to US-ASCII (UTF-7 + only): Modified base64 + '-' (shift back character) A-Z a-z 0-9 + / - */ -static const unsigned char xbase64_tab[128 / 8] = - { - 0x00, 0x00, 0x00, 0x00, 0x00, 0xa8, 0xff, 0x03, - 0xfe, 0xff, 0xff, 0x07, 0xfe, 0xff, 0xff, 0x07 - }; - -static int -isxbase64 (uint32_t ch) +static bool +needs_explicit_shift (uint32_t ch) { - return (ch < 128 && ((xbase64_tab[ch >> 3] >> (ch & 7)) & 1)); + return (between (ch, 'A', 'Z') + || between (ch, 'a', 'z') + || between (ch, '/', '9') || ch == '+' || ch == '-'); } @@ -252,7 +253,7 @@ base64 (unsigned int i) indeed form a Low Surrogate. */ \ uint32_t wc2 = wch & 0xffff; \ \ - if (! __builtin_expect (wc2 >= 0xdc00 && wc2 < 0xe000, 1)) \ + if (! __glibc_likely (wc2 >= 0xdc00 && wc2 < 0xe000)) \ { \ STANDARD_FROM_LOOP_ERR_HANDLER ((statep->__count = 0, 1));\ } \ @@ -372,7 +373,8 @@ base64 (unsigned int i) /* deactivate base64 encoding */ \ size_t count; \ \ - count = ((statep->__count & 0x18) >= 0x10) + isxbase64 (ch) + 1; \ + count = ((statep->__count & 0x18) >= 0x10) \ + + needs_explicit_shift (ch) + 1; \ if (__glibc_unlikely (outptr + count > outend)) \ { \ result = __GCONV_FULL_OUTPUT; \ @@ -381,7 +383,7 @@ base64 (unsigned int i) \ if ((statep->__count & 0x18) >= 0x10) \ *outptr++ = base64 ((statep->__count >> 3) & ~3); \ - if (isxbase64 (ch)) \ + if (needs_explicit_shift (ch)) \ *outptr++ = '-'; \ *outptr++ = (unsigned char) ch; \ statep->__count = 0; \