From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-oi1-x22c.google.com (mail-oi1-x22c.google.com [IPv6:2607:f8b0:4864:20::22c]) by sourceware.org (Postfix) with ESMTPS id 195903858D35 for ; Mon, 7 Mar 2022 12:34:51 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 195903858D35 Received: by mail-oi1-x22c.google.com with SMTP id z7so14970257oid.4 for ; Mon, 07 Mar 2022 04:34:51 -0800 (PST) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:message-id:date:mime-version:user-agent:subject :content-language:to:references:from:in-reply-to :content-transfer-encoding; bh=PewOYsPVjYSZi4rPXWpn0uAnxjvyQ9F1AaFk+RNPmSo=; b=OmXQWTKKotVhLVUCq+xo4IYACFmwQ1cDDYI9uOlf0IkF82l4bqcI8TB979FKW1xw6+ 38bTCWSFSc57hL4+7Cy8j87Jb+IhtkLcgg/pTuu3haPUgqFc8Z1fHiScax1CKFMcPy6z 3wtyh57frjp1SbOGpZENwEo1ufOOJTeHlWRXa+k/vlGZhMe9ZURUD5U2EC+Y3QVL3Wch /T4y9tABPUGlobJh9wTtyNJZyhBFUHKQ6Oae+xH0jeMGqxg1lJLRnqwJGZnv0sp9gyTr BdGAquX9METIhHec/poRID7qLwQt4MriU4UuB4K5rd6ShLMmucmGTmzR4tXA8zY6edt3 V25g== X-Gm-Message-State: AOAM533KZp2OQykK+4X/OAgYyHCzarAJciASvwtjX3KUl2kDXqoRKy8u BBCP0D0jMAuCEq5vZqT3vdT0FG1Zc2nyTw== X-Google-Smtp-Source: ABdhPJxA77AZAX3P1e74L9KzvD2Ulf3BPgfsUN7DkPQv/PcsoXqrqhZHvR9wTJjrMxIabZhqfluFMQ== X-Received: by 2002:aca:b10b:0:b0:2d7:a316:47d with SMTP id a11-20020acab10b000000b002d7a316047dmr6923914oif.134.1646656490251; Mon, 07 Mar 2022 04:34:50 -0800 (PST) Received: from ?IPV6:2804:431:c7ca:2dcb:c0d9:9b45:50c5:bc8e? ([2804:431:c7ca:2dcb:c0d9:9b45:50c5:bc8e]) by smtp.gmail.com with ESMTPSA id p14-20020a9d744e000000b005b235f5cf92sm1229615otk.65.2022.03.07.04.34.48 (version=TLS1_3 cipher=TLS_AES_128_GCM_SHA256 bits=128/128); Mon, 07 Mar 2022 04:34:49 -0800 (PST) Message-ID: <1b82f293-bff9-f642-e836-38981023ec94@linaro.org> Date: Mon, 7 Mar 2022 09:34:47 -0300 MIME-Version: 1.0 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:91.0) Gecko/20100101 Thunderbird/91.7.0 Subject: Re: [PATCH v4 3/4] iconv: make utf-7.c able to use variants Content-Language: en-US To: Max Gautier , libc-alpha@sourceware.org References: <87blcw9ptq.fsf@oldenburg.str.redhat.com> <20211209093152.313872-1-mg@max.gautier.name> <20211209093152.313872-4-mg@max.gautier.name> From: Adhemerval Zanella In-Reply-To: <20211209093152.313872-4-mg@max.gautier.name> Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 7bit X-Spam-Status: No, score=-12.2 required=5.0 tests=BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, DKIM_VALID_EF, GIT_PATCH_0, NICE_REPLY_A, RCVD_IN_DNSWL_NONE, SPF_HELO_NONE, SPF_PASS, TXREP, T_FILL_THIS_FORM_SHORT, T_SCC_BODY_TEXT_LINE autolearn=ham autolearn_force=no version=3.4.4 X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on server2.sourceware.org X-BeenThere: libc-alpha@sourceware.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Libc-alpha mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 07 Mar 2022 12:34:53 -0000 On 09/12/2021 06:31, Max Gautier via Libc-alpha wrote: > Add infrastructure in utf-7.c to handle variants. The approach comes from > iso646.c > The variant is defined at gconv_init time and is passed as a > supplementary variable. > > Signed-off-by: Max Gautier Patch looks ok in general, there are style issues that needed to be fixed and some minor suggestions below. > --- > iconvdata/utf-7.c | 239 +++++++++++++++++++++++++++++++++------------- > 1 file changed, 174 insertions(+), 65 deletions(-) > > diff --git a/iconvdata/utf-7.c b/iconvdata/utf-7.c > index ac7d78141a..965d4220f1 100644 > --- a/iconvdata/utf-7.c > +++ b/iconvdata/utf-7.c > @@ -29,6 +29,24 @@ > #include > > > +enum variant > +{ > + UTF7, > +}; > + > +/* Must be in the same order as enum variant above. */ > +static const char names[] = > + "UTF-7//\0" > + "\0"; > + > +static uint32_t > +shift_character(enum variant const var) > +{ > + if (var == UTF7) > + return '+'; > + else > + abort(); > +} Please use the expected indentation on glibc [1] and other places as well. [1] https://sourceware.org/glibc/wiki/Style_and_Conventions > > static int > between(uint32_t const ch, > @@ -38,37 +56,43 @@ between(uint32_t const ch, > } > > /* The set of "direct characters": > + FOR UTF-7 > A-Z a-z 0-9 ' ( ) , - . / : ? space tab lf cr > */ > > static int > -isdirect (uint32_t ch) > +isdirect (uint32_t ch, enum variant var) > { > - return (between(ch, 'A', 'Z') > - || between(ch, 'a', 'z') > - || between(ch, '0', '9') > - || ch == '\'' || ch == '(' || ch == ')' > - || between(ch, ',', '/') > - || ch == ':' || ch == '?' > - || ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'); > + if (var == UTF7) > + return (between(ch, 'A', 'Z') > + || between(ch, 'a', 'z') > + || between(ch, '0', '9') > + || ch == '\'' || ch == '(' || ch == ')' > + || between(ch, ',', '/') > + || ch == ':' || ch == '?' > + || ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'); > + abort(); > } > > > /* The set of "direct and optional direct characters": > + (UTF-7 only) > A-Z a-z 0-9 ' ( ) , - . / : ? space tab lf cr > ! " # $ % & * ; < = > @ [ ] ^ _ ` { | } > */ > > - > static int > -isxdirect (uint32_t ch) > +isxdirect (uint32_t ch, enum variant var) > { > - return (ch == '\t' > - || ch == '\n' > - || ch == '\r' > - || (between(ch, ' ','}') > - && ch != '+' && ch != '\\') > - ); > + return(isdirect(ch, var) > + || (var == UTF7 && > + (between(ch, '!', '&') > + || ch == '*' > + || between(ch, ';', '@') > + || (between(ch, '[', '`') && ch != '\\') > + || between(ch, '{', '}')) > + ) > + ); > } > The change is ok, but maybe adding the variant out makes it more clear: if (var != UTF7) return 0; [...] Also I think since you change it, it would be better to use 'bool' as return type. > > @@ -91,7 +115,7 @@ needs_explicit_shift (uint32_t ch) > > /* Converts a value in the range 0..63 to a base64 encoded char. */ > static unsigned char > -base64 (unsigned int i) > +base64 (unsigned int i, enum variant var) > { > if (i < 26) > return i + 'A'; > @@ -101,7 +125,7 @@ base64 (unsigned int i) > return i - 52 + '0'; > else if (i == 62) > return '+'; > - else if (i == 63) > + else if (i == 63 && var == UTF7) > return '/'; > else > abort (); > @@ -109,9 +133,8 @@ base64 (unsigned int i) > > Ok. > /* Definitions used in the body of the `gconv' function. */ > -#define CHARSET_NAME "UTF-7//" > -#define DEFINE_INIT 1 > -#define DEFINE_FINI 1 > +#define DEFINE_INIT 0 > +#define DEFINE_FINI 0 > #define FROM_LOOP from_utf7_loop > #define TO_LOOP to_utf7_loop > #define MIN_NEEDED_FROM 1 > @@ -119,11 +142,27 @@ base64 (unsigned int i) > #define MIN_NEEDED_TO 4 > #define MAX_NEEDED_TO 4 > #define ONE_DIRECTION 0 > +#define FROM_DIRECTION (dir == from_utf7) > #define PREPARE_LOOP \ > mbstate_t saved_state; \ > - mbstate_t *statep = data->__statep; > -#define EXTRA_LOOP_ARGS , statep > + mbstate_t *statep = data->__statep; \ > + enum direction dir = ((struct utf7_data *) step->__data)->dir; \ > + enum direction var = ((struct utf7_data *) step->__data)->var; > +#define EXTRA_LOOP_ARGS , statep, var > + > + > +enum direction > +{ > + illegal_dir, > + from_utf7, > + to_utf7 > +}; Style, use two spaces. > > +struct utf7_data > +{ > + enum direction dir; > + enum variant var; > +}; > > /* Since we might have to reset input pointer we must be able to save > and restore the state. */ > @@ -133,6 +172,72 @@ base64 (unsigned int i) > else \ > *statep = saved_state > > +extern int gconv_init (struct __gconv_step *step); I think there is no need to add the prototype here. > +int > +gconv_init (struct __gconv_step *step) > +{ > + /* Determine which direction. */ > + struct utf7_data *new_data; > + enum direction dir = illegal_dir; > + > + enum variant var = 0; > + for (const char *name = names; *name != '\0'; > + name = __rawmemchr (name, '\0') + 1) > + { > + if (__strcasecmp (step->__from_name, name) == 0) > + { > + dir = from_utf7; > + break; > + } > + else if (__strcasecmp (step->__to_name, name) == 0) > + { > + dir = to_utf7; > + break; > + } > + ++var; > + } > + > + if (__builtin_expect (dir, from_utf7) != illegal_dir) Use __glibc_likely. > + { > + new_data = malloc (sizeof (*new_data)); > + if (new_data == NULL) > + return __GCONV_NOMEM; > + > + new_data->dir = dir; > + new_data->var = var; > + step->__data = new_data; > + > + if (dir == from_utf7) > + { > + step->__min_needed_from = MIN_NEEDED_FROM; > + step->__max_needed_from = MAX_NEEDED_FROM; > + step->__min_needed_to = MIN_NEEDED_TO; > + step->__max_needed_to = MAX_NEEDED_TO; > + } > + else > + { > + step->__min_needed_from = MIN_NEEDED_TO; > + step->__max_needed_from = MAX_NEEDED_TO; > + step->__min_needed_to = MIN_NEEDED_FROM; > + step->__max_needed_to = MAX_NEEDED_FROM; > + } > + } > + else > + return __GCONV_NOCONV; > + > + step->__stateful = 1; > + > + return __GCONV_OK; > +} > + > +extern void gconv_end (struct __gconv_step *data); > +void > +gconv_end (struct __gconv_step *data) > +{ > + free (data->__data); > +} > + > + > > /* First define the conversion function from UTF-7 to UCS4. > The state is structured as follows: > @@ -160,13 +265,13 @@ base64 (unsigned int i) > if ((statep->__count >> 3) == 0) \ > { \ > /* base64 encoding inactive. */ \ > - if (isxdirect (ch)) \ > + if (isxdirect (ch, var)) \ > { \ > inptr++; \ > put32 (outptr, ch); \ > outptr += 4; \ > } \ > - else if (__glibc_likely (ch == '+')) \ > + else if (__glibc_likely (ch == shift_character(var))) \ > { \ > if (__glibc_unlikely (inptr + 2 > inend)) \ > { \ > @@ -291,7 +396,7 @@ base64 (unsigned int i) > } \ > } > #define LOOP_NEED_FLAGS > -#define EXTRA_LOOP_DECLS , mbstate_t *statep > +#define EXTRA_LOOP_DECLS , mbstate_t *statep, enum variant var > #include > > > @@ -322,7 +427,7 @@ base64 (unsigned int i) > if ((statep->__count & 0x18) == 0) \ > { \ > /* base64 encoding inactive */ \ > - if (isdirect (ch)) \ > + if (isdirect (ch, var)) \ > { \ > *outptr++ = (unsigned char) ch; \ > } \ > @@ -330,7 +435,7 @@ base64 (unsigned int i) > { \ > size_t count; \ > \ > - if (ch == '+') \ > + if (ch == shift_character(var)) \ > count = 2; \ > else if (ch < 0x10000) \ > count = 3; \ > @@ -345,13 +450,13 @@ base64 (unsigned int i) > break; \ > } \ > \ > - *outptr++ = '+'; \ > - if (ch == '+') \ > + *outptr++ = shift_character(var); \ > + if (ch == shift_character(var)) \ > *outptr++ = '-'; \ > else if (ch < 0x10000) \ > { \ > - *outptr++ = base64 (ch >> 10); \ > - *outptr++ = base64 ((ch >> 4) & 0x3f); \ > + *outptr++ = base64 (ch >> 10, var); \ > + *outptr++ = base64 ((ch >> 4) & 0x3f, var); \ > statep->__count = ((ch & 15) << 5) | (3 << 3); \ > } \ > else if (ch < 0x110000) \ > @@ -360,11 +465,11 @@ base64 (unsigned int i) > uint32_t ch2 = 0xdc00 + ((ch - 0x10000) & 0x3ff); \ > \ > ch = (ch1 << 16) | ch2; \ > - *outptr++ = base64 (ch >> 26); \ > - *outptr++ = base64 ((ch >> 20) & 0x3f); \ > - *outptr++ = base64 ((ch >> 14) & 0x3f); \ > - *outptr++ = base64 ((ch >> 8) & 0x3f); \ > - *outptr++ = base64 ((ch >> 2) & 0x3f); \ > + *outptr++ = base64 (ch >> 26, var); \ > + *outptr++ = base64 ((ch >> 20) & 0x3f, var); \ > + *outptr++ = base64 ((ch >> 14) & 0x3f, var); \ > + *outptr++ = base64 ((ch >> 8) & 0x3f, var); \ > + *outptr++ = base64 ((ch >> 2) & 0x3f, var); \ > statep->__count = ((ch & 3) << 7) | (2 << 3); \ > } \ > else \ > @@ -374,7 +479,7 @@ base64 (unsigned int i) > else \ > { \ > /* base64 encoding active */ \ > - if (isdirect (ch)) \ > + if (isdirect (ch, var)) \ > { \ > /* deactivate base64 encoding */ \ > size_t count; \ > @@ -388,7 +493,7 @@ base64 (unsigned int i) > } \ > \ > if ((statep->__count & 0x18) >= 0x10) \ > - *outptr++ = base64 ((statep->__count >> 3) & ~3); \ > + *outptr++ = base64 ((statep->__count >> 3) & ~3, var); \ > if (needs_explicit_shift (ch)) \ > *outptr++ = '-'; \ > *outptr++ = (unsigned char) ch; \ > @@ -416,22 +521,24 @@ base64 (unsigned int i) > switch ((statep->__count >> 3) & 3) \ > { \ > case 1: \ > - *outptr++ = base64 (ch >> 10); \ > - *outptr++ = base64 ((ch >> 4) & 0x3f); \ > + *outptr++ = base64 (ch >> 10, var); \ > + *outptr++ = base64 ((ch >> 4) & 0x3f, var); \ > statep->__count = ((ch & 15) << 5) | (3 << 3); \ > break; \ > case 2: \ > *outptr++ = \ > - base64 (((statep->__count >> 3) & ~3) | (ch >> 12)); \ > - *outptr++ = base64 ((ch >> 6) & 0x3f); \ > - *outptr++ = base64 (ch & 0x3f); \ > + base64 (((statep->__count >> 3) & ~3) | (ch >> 12), \ > + var); \ > + *outptr++ = base64 ((ch >> 6) & 0x3f, var); \ > + *outptr++ = base64 (ch & 0x3f, var); \ > statep->__count = (1 << 3); \ > break; \ > case 3: \ > *outptr++ = \ > - base64 (((statep->__count >> 3) & ~3) | (ch >> 14)); \ > - *outptr++ = base64 ((ch >> 8) & 0x3f); \ > - *outptr++ = base64 ((ch >> 2) & 0x3f); \ > + base64 (((statep->__count >> 3) & ~3) | (ch >> 14), \ > + var); \ > + *outptr++ = base64 ((ch >> 8) & 0x3f, var); \ > + *outptr++ = base64 ((ch >> 2) & 0x3f, var); \ > statep->__count = ((ch & 3) << 7) | (2 << 3); \ > break; \ > default: \ > @@ -447,30 +554,32 @@ base64 (unsigned int i) > switch ((statep->__count >> 3) & 3) \ > { \ > case 1: \ > - *outptr++ = base64 (ch >> 26); \ > - *outptr++ = base64 ((ch >> 20) & 0x3f); \ > - *outptr++ = base64 ((ch >> 14) & 0x3f); \ > - *outptr++ = base64 ((ch >> 8) & 0x3f); \ > - *outptr++ = base64 ((ch >> 2) & 0x3f); \ > + *outptr++ = base64 (ch >> 26, var); \ > + *outptr++ = base64 ((ch >> 20) & 0x3f, var); \ > + *outptr++ = base64 ((ch >> 14) & 0x3f, var); \ > + *outptr++ = base64 ((ch >> 8) & 0x3f, var); \ > + *outptr++ = base64 ((ch >> 2) & 0x3f, var); \ > statep->__count = ((ch & 3) << 7) | (2 << 3); \ > break; \ > case 2: \ > *outptr++ = \ > - base64 (((statep->__count >> 3) & ~3) | (ch >> 28)); \ > - *outptr++ = base64 ((ch >> 22) & 0x3f); \ > - *outptr++ = base64 ((ch >> 16) & 0x3f); \ > - *outptr++ = base64 ((ch >> 10) & 0x3f); \ > - *outptr++ = base64 ((ch >> 4) & 0x3f); \ > + base64 (((statep->__count >> 3) & ~3) | (ch >> 28), \ > + var); \ > + *outptr++ = base64 ((ch >> 22) & 0x3f, var); \ > + *outptr++ = base64 ((ch >> 16) & 0x3f, var); \ > + *outptr++ = base64 ((ch >> 10) & 0x3f, var); \ > + *outptr++ = base64 ((ch >> 4) & 0x3f, var); \ > statep->__count = ((ch & 15) << 5) | (3 << 3); \ > break; \ > case 3: \ > *outptr++ = \ > - base64 (((statep->__count >> 3) & ~3) | (ch >> 30)); \ > - *outptr++ = base64 ((ch >> 24) & 0x3f); \ > - *outptr++ = base64 ((ch >> 18) & 0x3f); \ > - *outptr++ = base64 ((ch >> 12) & 0x3f); \ > - *outptr++ = base64 ((ch >> 6) & 0x3f); \ > - *outptr++ = base64 (ch & 0x3f); \ > + base64 (((statep->__count >> 3) & ~3) | (ch >> 30), \ > + var); \ > + *outptr++ = base64 ((ch >> 24) & 0x3f, var); \ > + *outptr++ = base64 ((ch >> 18) & 0x3f, var); \ > + *outptr++ = base64 ((ch >> 12) & 0x3f, var); \ > + *outptr++ = base64 ((ch >> 6) & 0x3f, var); \ > + *outptr++ = base64 (ch & 0x3f, var); \ > statep->__count = (1 << 3); \ > break; \ > default: \ > @@ -486,7 +595,7 @@ base64 (unsigned int i) > inptr += 4; \ > } Ok > #define LOOP_NEED_FLAGS > -#define EXTRA_LOOP_DECLS , mbstate_t *statep > +#define EXTRA_LOOP_DECLS , mbstate_t *statep, enum variant var > #include > > > @@ -516,7 +625,7 @@ base64 (unsigned int i) > { \ > /* Write out the shift sequence. */ \ > if ((state & 0x18) >= 0x10) \ > - *outbuf++ = base64 ((state >> 3) & ~3); \ > + *outbuf++ = base64 ((state >> 3) & ~3, var); \ > *outbuf++ = '-'; \ > \ > data->__statep->__count = 0; \ Ok.