public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
From: Adhemerval Zanella <adhemerval.zanella@linaro.org>
To: Max Gautier <mg@max.gautier.name>, libc-alpha@sourceware.org
Subject: Re: [PATCH v5 2/4] iconv: Better mapping to RFC for UTF-7
Date: Mon, 21 Mar 2022 08:59:27 -0300	[thread overview]
Message-ID: <d883303c-853c-b651-0d88-67e08a54b80f@linaro.org> (raw)
In-Reply-To: <56099b45-e0de-17ac-7cbb-de7d4cec27dc@linaro.org>



On 21/03/2022 08:53, Adhemerval Zanella wrote:
> 
> 
> On 20/03/2022 13:41, Max Gautier via Libc-alpha wrote:
>> - Direct use of characters instead of arcane arrays
>> - isxbase64 is not the Modified BASE64 alphabet, but the characters who
>>   needs to trigger an explicit shift back to US-ASCII. Make that clearer
>>
>> Signed-off-by: Max Gautier <mg@max.gautier.name>
> 
> 
> LGTM, thanks.
> 
> Reviewed-by: Adhemerval Zanellla  <adhemerval.zanella@linaro.org>
> 
>> ---
>>  iconvdata/utf-7.c | 64 ++++++++++++++++++++++++-----------------------
>>  1 file changed, 33 insertions(+), 31 deletions(-)
>>
>> diff --git a/iconvdata/utf-7.c b/iconvdata/utf-7.c
>> index 9ba0974959..15f3669ac8 100644
>> --- a/iconvdata/utf-7.c
>> +++ b/iconvdata/utf-7.c
>> @@ -30,20 +30,27 @@
>>  
>>  
>>  
>> +static bool
>> +between (uint32_t const ch,
>> +	 uint32_t const lower_bound, uint32_t const upper_bound)
>> +{
>> +  return (ch >= lower_bound && ch <= upper_bound);
>> +}
>> +
>>  /* The set of "direct characters":
>>     A-Z a-z 0-9 ' ( ) , - . / : ? space tab lf cr
>>  */
>>  
>> -static const unsigned char direct_tab[128 / 8] =
>> -  {
>> -    0x00, 0x26, 0x00, 0x00, 0x81, 0xf3, 0xff, 0x87,
>> -    0xfe, 0xff, 0xff, 0x07, 0xfe, 0xff, 0xff, 0x07
>> -  };
>> -
>> -static int
>> -isdirect (uint32_t ch)
>> +static bool
>> +isdirect (uint32_t ch, enum variant var)
>>  {

In fact I am seeing this failure:

utf-7.c:45:29: error: ‘enum variant’ declared inside parameter list will not be visible outside of this definition o
r declaration [-Werror]
   45 | isdirect (uint32_t ch, enum variant var)
      |                             ^~~~~~~

Since 'enum variant' in only defined on next patch.  Usually the best
practice is keep each patch consistent, so could you move the definition
on this patch?

Or I can fix it for you before installing, it is up to you.

>> -  return (ch < 128 && ((direct_tab[ch >> 3] >> (ch & 7)) & 1));
>> +  return (between (ch, 'A', 'Z')
>> +	  || between (ch, 'a', 'z')
>> +	  || between (ch, '0', '9')
>> +	  || ch == '\'' || ch == '(' || ch == ')'
>> +	  || between (ch, ',', '/')
>> +	  || ch == ':' || ch == '?'
>> +	  || ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r');
>>  }
>>  
>>  
>> @@ -52,33 +59,27 @@ isdirect (uint32_t ch)
>>     ! " # $ % & * ; < = > @ [ ] ^ _ ` { | }
>>  */
>>  
>> -static const unsigned char xdirect_tab[128 / 8] =
>> -  {
>> -    0x00, 0x26, 0x00, 0x00, 0xff, 0xf7, 0xff, 0xff,
>> -    0xff, 0xff, 0xff, 0xef, 0xff, 0xff, 0xff, 0x3f
>> -  };
>> -
>> -static int
>> -isxdirect (uint32_t ch)
>> +static bool
>> +isxdirect (uint32_t ch, enum variant var)
>>  {
>> -  return (ch < 128 && ((xdirect_tab[ch >> 3] >> (ch & 7)) & 1));
>> +  return (ch == '\t'
>> +	  || ch == '\n'
>> +	  || ch == '\r'
>> +	  || (between (ch, ' ', '}') && ch != '+' && ch != '\\'));
>>  }
>>  
>>  
>> -/* The set of "extended base64 characters":
>> +/* Characters which needs to trigger an explicit shift back to US-ASCII (UTF-7
>> +   only): Modified base64 + '-' (shift back character)
>>     A-Z a-z 0-9 + / -
>>  */
>>  
>> -static const unsigned char xbase64_tab[128 / 8] =
>> -  {
>> -    0x00, 0x00, 0x00, 0x00, 0x00, 0xa8, 0xff, 0x03,
>> -    0xfe, 0xff, 0xff, 0x07, 0xfe, 0xff, 0xff, 0x07
>> -  };
>> -
>> -static int
>> -isxbase64 (uint32_t ch)
>> +static bool
>> +needs_explicit_shift (uint32_t ch)
>>  {
>> -  return (ch < 128 && ((xbase64_tab[ch >> 3] >> (ch & 7)) & 1));
>> +  return (between (ch, 'A', 'Z')
>> +	  || between (ch, 'a', 'z')
>> +	  || between (ch, '/', '9') || ch == '+' || ch == '-');
>>  }
>>  
>>  
>> @@ -252,7 +253,7 @@ base64 (unsigned int i)
>>  		   indeed form a Low Surrogate.  */			      \
>>  		uint32_t wc2 = wch & 0xffff;				      \
>>  									      \
>> -		if (! __builtin_expect (wc2 >= 0xdc00 && wc2 < 0xe000, 1))    \
>> +		if (! __glibc_likely (wc2 >= 0xdc00 && wc2 < 0xe000))	      \
>>  		  {							      \
>>  		    STANDARD_FROM_LOOP_ERR_HANDLER ((statep->__count = 0, 1));\
>>  		  }							      \
>> @@ -372,7 +373,8 @@ base64 (unsigned int i)
>>  	    /* deactivate base64 encoding */				      \
>>  	    size_t count;						      \
>>  									      \
>> -	    count = ((statep->__count & 0x18) >= 0x10) + isxbase64 (ch) + 1;  \
>> +	    count = ((statep->__count & 0x18) >= 0x10)			      \
>> +	      + needs_explicit_shift (ch) + 1;				      \
>>  	    if (__glibc_unlikely (outptr + count > outend))		      \
>>  	      {								      \
>>  		result = __GCONV_FULL_OUTPUT;				      \
>> @@ -381,7 +383,7 @@ base64 (unsigned int i)
>>  									      \
>>  	    if ((statep->__count & 0x18) >= 0x10)			      \
>>  	      *outptr++ = base64 ((statep->__count >> 3) & ~3);		      \
>> -	    if (isxbase64 (ch))						      \
>> +	    if (needs_explicit_shift (ch))				      \
>>  	      *outptr++ = '-';						      \
>>  	    *outptr++ = (unsigned char) ch;				      \
>>  	    statep->__count = 0;					      \

  reply	other threads:[~2022-03-21 11:59 UTC|newest]

Thread overview: 60+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-08-19 23:06 [PATCH 0/5] iconv: module for MODIFIED-UTF-7 Max Gautier
2020-08-19 23:06 ` [PATCH 1/5] Copy utf-7 module to modified-utf-7 Max Gautier
2020-08-19 23:06 ` [PATCH 2/5] Update gconv-modules file Max Gautier
2020-08-19 23:07 ` [PATCH 3/5] Transform UTF-7 to MODIFIED-UTF-7 Max Gautier
2020-08-19 23:07 ` [PATCH 4/5] Make terminating base64 sequences mandatory Max Gautier
2020-08-19 23:07 ` [PATCH 5/5] Add test case for MODIFIED-UTF-7 Max Gautier
2020-08-20  7:18   ` Andreas Schwab
2020-08-20 15:40     ` [PATCH v2 " Max Gautier
2020-08-20  8:03 ` [PATCH 0/5] iconv: module " Florian Weimer
2020-08-20 15:19   ` Max Gautier
2020-08-20 15:58     ` Florian Weimer
2020-09-02 15:24   ` Max Gautier
2020-09-02 20:01     ` Adhemerval Zanella
2020-09-03  9:47       ` Max Gautier
2020-09-03 10:56         ` Andreas Schwab
2021-01-25  9:02   ` [PATCH v3 0/5] iconv: module for IMAP-UTF-7 Max Gautier
2021-01-25  9:02     ` [PATCH v3 1/5] Copy utf-7 module to modified-utf-7 Max Gautier
2021-01-25  9:31       ` Andreas Schwab
2021-01-25 13:51         ` Max Gautier
2021-02-07  9:42           ` Florian Weimer
2021-02-07 12:29             ` Max Gautier
2021-02-07 12:34               ` Florian Weimer
2021-12-09  9:31             ` [PATCH v4 0/4] iconv: Add support for UTF-7-IMAP Max Gautier
2021-12-09  9:31               ` [PATCH v4 1/4] iconv: Always encode "optional direct" UTF-7 characters Max Gautier
2022-03-07 12:10                 ` Adhemerval Zanella
2021-12-09  9:31               ` [PATCH v4 2/4] iconv: Better mapping to RFC for UTF-7 Max Gautier
2022-03-07 12:14                 ` Adhemerval Zanella
2022-03-20 16:41                 ` [PATCH v5 " Max Gautier
2022-03-21 11:53                   ` Adhemerval Zanella
2022-03-21 11:59                     ` Adhemerval Zanella [this message]
2022-03-21 12:06                       ` Adhemerval Zanella
2022-03-21 14:07                       ` Max Gautier
2021-12-09  9:31               ` [PATCH v4 3/4] iconv: make utf-7.c able to use variants Max Gautier
2022-03-07 12:34                 ` Adhemerval Zanella
2022-03-12 11:07                   ` Max Gautier
2022-03-14 12:17                     ` Adhemerval Zanella
2022-03-20 16:42                 ` [PATCH v5 " Max Gautier
2022-03-21 12:24                   ` Adhemerval Zanella
2021-12-09  9:31               ` [PATCH v4 4/4] iconv: Add UTF-7-IMAP variant in utf-7.c Max Gautier
2022-03-07 12:46                 ` Adhemerval Zanella
2022-03-20 16:43                 ` [PATCH v5 " Max Gautier
2022-03-21 12:24                   ` Adhemerval Zanella
2021-12-17 13:15               ` [PATCH v4 0/4] iconv: Add support for UTF-7-IMAP Max Gautier
2022-01-24 14:19                 ` Adhemerval Zanella
2022-02-10 13:16                   ` Max Gautier
2022-02-10 13:17                     ` Adhemerval Zanella
2022-03-04  8:53                       ` Max Gautier
2022-01-17 14:07               ` Max Gautier
2022-01-24  9:17               ` Max Gautier
2021-01-25  9:02     ` [PATCH v3 2/5] Update gconv-modules file Max Gautier
2021-02-07  9:49       ` Florian Weimer
2021-01-25  9:02     ` [PATCH v3 3/5] Transform UTF-7 to IMAP-UTF-7 Max Gautier
2021-01-25  9:02     ` [PATCH v3 4/5] Make terminating base64 sequences mandatory Max Gautier
2021-02-07  9:45       ` Florian Weimer
2021-01-25  9:02     ` [PATCH v3 5/5] Add test case for IMAP-UTF-7 Max Gautier
2021-02-07  9:49       ` Florian Weimer
2021-03-16 14:39     ` [PATCH v3 5/5][pw utf test] " Siddhesh Poyarekar
2022-03-21 12:28     ` [PATCH v3 0/5] iconv: module " Adhemerval Zanella
2022-03-21 14:09       ` Max Gautier
2021-01-12  9:12 ` [PATCH 0/5] iconv: module for MODIFIED-UTF-7 Florian Weimer

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=d883303c-853c-b651-0d88-67e08a54b80f@linaro.org \
    --to=adhemerval.zanella@linaro.org \
    --cc=libc-alpha@sourceware.org \
    --cc=mg@max.gautier.name \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).