From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.129.124]) by sourceware.org (Postfix) with ESMTPS id 5BA0B385DC03 for ; Mon, 4 Jul 2022 19:54:14 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 5BA0B385DC03 Received: from mail-io1-f71.google.com (mail-io1-f71.google.com [209.85.166.71]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id us-mta-17-xM8IQXe2OQ6zIEVBgjitgA-1; Mon, 04 Jul 2022 15:54:12 -0400 X-MC-Unique: xM8IQXe2OQ6zIEVBgjitgA-1 Received: by mail-io1-f71.google.com with SMTP id h18-20020a5d9712000000b00674f83a60f0so5930151iol.4 for ; Mon, 04 Jul 2022 12:54:12 -0700 (PDT) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:message-id:date:mime-version:user-agent:subject :content-language:to:references:from:organization:in-reply-to :content-transfer-encoding; bh=UmqTIGw+NKcWS0OBp1uJFvrrOOyTPoaxKgP6OQwVssQ=; b=fBLHqfh2h8EZNJ/oYT0FKE547Us10exb3KYRLqE4Jqncog7kQxSbSvypRhmr+yjExT ROl9llkBIJpfRh6FftpTbHetFe/DI/aTAUTOQG9FMUBvZhWKJq1iq+5NJFk1IGQstrTx VmkcqElhQEJpP+XmfXeah2qzwfAY0+9M/pAW5OzPaYuQ3qBarn4AXNGbx/n1WM4Vjpt4 PqvKMBz4zWoHlEbRckK6y3s/vo1totOrqRaygw1gqVh+EkLciby8LKVDdjHOwSiOUiaV iSEqfU8Rjd8glNObIKvP3GJzZDLjCOsHFYJlypPoSTwP/vS3FWBhwlVKABvbp/cgIA0N CMYg== X-Gm-Message-State: AJIora9RLXb0ZvRWrcXsrt210XI1aDVGZCq/ER7uY6CsgqkIFW3kng4t BqS808AvsxqVq2Fecubl0I7nSyLSYTmjMoYer8wcPI+pgCxXZ0jn/zUvm4WhccWq7M3M1pPlcUj 2b+TEO7KXyN0eM9p4+F3m X-Received: by 2002:a05:6e02:1ca3:b0:2da:b273:200d with SMTP id x3-20020a056e021ca300b002dab273200dmr18904931ill.31.1656964451906; Mon, 04 Jul 2022 12:54:11 -0700 (PDT) X-Google-Smtp-Source: AGRyM1tdDxp0MnIGA514YV7Mlal3JsZj6hdcFhG9UPDsBD2LHBMhY40DKZtA6DZmwAPIoNWzfVcucA== X-Received: by 2002:a05:6e02:1ca3:b0:2da:b273:200d with SMTP id x3-20020a056e021ca300b002dab273200dmr18904922ill.31.1656964451588; Mon, 04 Jul 2022 12:54:11 -0700 (PDT) Received: from [192.168.0.241] (135-23-175-80.cpe.pppoe.ca. [135.23.175.80]) by smtp.gmail.com with ESMTPSA id c18-20020a92c8d2000000b002d8f62d2e3bsm12181504ilq.86.2022.07.04.12.54.10 (version=TLS1_3 cipher=TLS_AES_128_GCM_SHA256 bits=128/128); Mon, 04 Jul 2022 12:54:11 -0700 (PDT) Message-ID: <6afe4672-4a35-4056-e4c1-292cafb2c2fe@redhat.com> Date: Mon, 4 Jul 2022 15:54:10 -0400 MIME-Version: 1.0 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:91.0) Gecko/20100101 Thunderbird/91.9.0 Subject: Re: [PATCH 1/5] locale: Turn ADDC and ADDS into functions in linereader.c To: Florian Weimer , libc-alpha@sourceware.org References: <6e108376cb28f366ead22cab2347245bf43e4060.1652994079.git.fweimer@redhat.com> From: Carlos O'Donell Organization: Red Hat In-Reply-To: <6e108376cb28f366ead22cab2347245bf43e4060.1652994079.git.fweimer@redhat.com> X-Mimecast-Spam-Score: 0 X-Mimecast-Originator: redhat.com Content-Language: en-US Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 7bit X-Spam-Status: No, score=-16.6 required=5.0 tests=BAYES_00, DKIMWL_WL_HIGH, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, DKIM_VALID_EF, GIT_PATCH_0, NICE_REPLY_A, RCVD_IN_DNSWL_LOW, SPF_HELO_NONE, SPF_NONE, TXREP, T_SCC_BODY_TEXT_LINE autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org X-BeenThere: libc-alpha@sourceware.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Libc-alpha mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 04 Jul 2022 19:54:16 -0000 On 5/19/22 17:06, Florian Weimer via Libc-alpha wrote: > And introduce struct lr_buffer. The functions addc and adds can > be called from functions, enabling subsequent refactoring. LGTM. Reviewed-by: Carlos O'Donell Tested-by: Carlos O'Donell > --- > locale/programs/linereader.c | 203 ++++++++++++++++++----------------- > 1 file changed, 104 insertions(+), 99 deletions(-) > > diff --git a/locale/programs/linereader.c b/locale/programs/linereader.c > index 146d32e5e2..d5367e0a1e 100644 > --- a/locale/programs/linereader.c > +++ b/locale/programs/linereader.c > @@ -416,36 +416,60 @@ get_toplvl_escape (struct linereader *lr) > return &lr->token; > } > > +/* Multibyte string buffer. */ > +struct lr_buffer > +{ > + size_t act; > + size_t max; > + char *buf; > +}; > > -#define ADDC(ch) \ > - do \ > - { \ > - if (bufact == bufmax) \ > - { \ > - bufmax *= 2; \ > - buf = xrealloc (buf, bufmax); \ > - } \ > - buf[bufact++] = (ch); \ > - } \ > - while (0) OK. Removing macros is awesome. > +/* Initialize *LRB with a default-sized buffer. */ > +static void > +lr_buffer_init (struct lr_buffer *lrb) > +{ > + lrb->act = 0; > + lrb->max = 56; OK. Taken from original code for a 56 byte maximum. > + lrb->buf = xmalloc (lrb->max); > +} OK. > > +/* Transfers the buffer string from *LRB to LR->token.mbstr. */ > +static void > +lr_buffer_to_token (struct lr_buffer *lrb, struct linereader *lr) > +{ > + lr->token.val.str.startmb = xrealloc (lrb->buf, lrb->act + 1); > + lr->token.val.str.startmb[lrb->act] = '\0'; > + lr->token.val.str.lenmb = lrb->act; > +} OK. > > -#define ADDS(s, l) \ > - do \ > - { \ > - size_t _l = (l); \ > - if (bufact + _l > bufmax) \ > - { \ > - if (bufact < _l) \ > - bufact = _l; \ > - bufmax *= 2; \ > - buf = xrealloc (buf, bufmax); \ > - } \ > - memcpy (&buf[bufact], s, _l); \ > - bufact += _l; \ > - } \ > - while (0) OK. Removing macros is awesome. > +/* Adds CH to *LRB. */ > +static void > +addc (struct lr_buffer *lrb, char ch) > +{ > + if (lrb->act == lrb->max) > + { > + lrb->max *= 2; > + lrb->buf = xrealloc (lrb->buf, lrb->max); > + } > + lrb->buf[lrb->act++] = ch; > +} OK. Matches original addc macro. > > +/* Adds L bytes at S to *LRB. */ > +static void > +adds (struct lr_buffer *lrb, const unsigned char *s, size_t l) > +{ > + if (lrb->max - lrb->act < l) > + { > + size_t required_size = lrb->act + l; > + size_t new_max = 2 * lrb->max; > + if (new_max < required_size) > + new_max = required_size; > + lrb->buf = xrealloc (lrb->buf, new_max); > + lrb->max = new_max; > + } > + memcpy (lrb->buf + lrb->act, s, l); > + lrb->act += l; > +} OK. Matches original adds macro. > > #define ADDWC(ch) \ > do \ > @@ -467,13 +491,11 @@ get_symname (struct linereader *lr) > 1. reserved words > 2. ISO 10646 position values > 3. all other. */ > - char *buf; > - size_t bufact = 0; > - size_t bufmax = 56; OK. > const struct keyword_t *kw; > int ch; > + struct lr_buffer lrb; > > - buf = (char *) xmalloc (bufmax); > + lr_buffer_init (&lrb); OK. > > do > { > @@ -481,13 +503,13 @@ get_symname (struct linereader *lr) > if (ch == lr->escape_char) > { > int c2 = lr_getc (lr); > - ADDC (c2); > + addc (&lrb, c2); OK. > > if (c2 == '\n') > ch = '\n'; > } > else > - ADDC (ch); > + addc (&lrb, ch); OK. > } > while (ch != '>' && ch != '\n'); > > @@ -495,39 +517,35 @@ get_symname (struct linereader *lr) > lr_error (lr, _("unterminated symbolic name")); > > /* Test for ISO 10646 position value. */ > - if (buf[0] == 'U' && (bufact == 6 || bufact == 10)) > + if (lrb.buf[0] == 'U' && (lrb.act == 6 || lrb.act == 10)) OK. > { > - char *cp = buf + 1; > - while (cp < &buf[bufact - 1] && isxdigit (*cp)) > + char *cp = lrb.buf + 1; > + while (cp < &lrb.buf[lrb.act - 1] && isxdigit (*cp)) OK. > ++cp; > > - if (cp == &buf[bufact - 1]) > + if (cp == &lrb.buf[lrb.act - 1]) OK. > { > /* Yes, it is. */ > lr->token.tok = tok_ucs4; > - lr->token.val.ucs4 = strtoul (buf + 1, NULL, 16); > + lr->token.val.ucs4 = strtoul (lrb.buf + 1, NULL, 16); OK. > > return &lr->token; > } > } > > /* It is a symbolic name. Test for reserved words. */ > - kw = lr->hash_fct (buf, bufact - 1); > + kw = lr->hash_fct (lrb.buf, lrb.act - 1); OK. > > if (kw != NULL && kw->symname_or_ident == 1) > { > lr->token.tok = kw->token; > - free (buf); > + free (lrb.buf); OK. We have an init function but then directly free lrb.buf. This is OK, I wonder if there would be any use for lr_buffer_free () with some checking. > } > else > { > lr->token.tok = tok_bsymbol; > - > - buf = xrealloc (buf, bufact + 1); > - buf[bufact] = '\0'; > - > - lr->token.val.str.startmb = buf; > - lr->token.val.str.lenmb = bufact - 1; > + lr_buffer_to_token (&lrb, lr); > + --lr->token.val.str.lenmb; /* Hide the training '>'. */ OK. > } > > return &lr->token; > @@ -537,16 +555,13 @@ get_symname (struct linereader *lr) > static struct token * > get_ident (struct linereader *lr) > { > - char *buf; > - size_t bufact; > - size_t bufmax = 56; > const struct keyword_t *kw; > int ch; > + struct lr_buffer lrb; > > - buf = xmalloc (bufmax); > - bufact = 0; > + lr_buffer_init (&lrb); > > - ADDC (lr->buf[lr->idx - 1]); > + addc (&lrb, lr->buf[lr->idx - 1]); > > while (!isspace ((ch = lr_getc (lr))) && ch != '"' && ch != ';' > && ch != '<' && ch != ',' && ch != EOF) > @@ -560,27 +575,22 @@ get_ident (struct linereader *lr) > break; > } > } > - ADDC (ch); > + addc (&lrb, ch); > } > > lr_ungetc (lr, ch); > > - kw = lr->hash_fct (buf, bufact); > + kw = lr->hash_fct (lrb.buf, lrb.act); > > if (kw != NULL && kw->symname_or_ident == 0) > { > lr->token.tok = kw->token; > - free (buf); > + free (lrb.buf); OK. > } > else > { > lr->token.tok = tok_ident; > - > - buf = xrealloc (buf, bufact + 1); > - buf[bufact] = '\0'; > - > - lr->token.val.str.startmb = buf; > - lr->token.val.str.lenmb = bufact; > + lr_buffer_to_token (&lrb, lr); OK. > } > > return &lr->token; > @@ -593,14 +603,10 @@ get_string (struct linereader *lr, const struct charmap_t *charmap, > int verbose) > { > int return_widestr = lr->return_widestr; > - char *buf; > + struct lr_buffer lrb; > wchar_t *buf2 = NULL; > - size_t bufact; > - size_t bufmax = 56; > > - /* We must return two different strings. */ > - buf = xmalloc (bufmax); > - bufact = 0; > + lr_buffer_init (&lrb); OK. > > /* We know it'll be a string. */ > lr->token.tok = tok_string; > @@ -613,19 +619,19 @@ get_string (struct linereader *lr, const struct charmap_t *charmap, > > buf2 = NULL; > while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF) > - ADDC (ch); > + addc (&lrb, ch); > > /* Catch errors with trailing escape character. */ > - if (bufact > 0 && buf[bufact - 1] == lr->escape_char > - && (bufact == 1 || buf[bufact - 2] != lr->escape_char)) > + if (lrb.act > 0 && lrb.buf[lrb.act - 1] == lr->escape_char > + && (lrb.act == 1 || lrb.buf[lrb.act - 2] != lr->escape_char)) > { > lr_error (lr, _("illegal escape sequence at end of string")); > - --bufact; > + --lrb.act; > } > else if (ch == '\n' || ch == EOF) > lr_error (lr, _("unterminated string")); > > - ADDC ('\0'); > + addc (&lrb, '\0'); > } > else > { > @@ -662,7 +668,7 @@ get_string (struct linereader *lr, const struct charmap_t *charmap, > break; > } > > - ADDC (ch); > + addc (&lrb, ch); > if (return_widestr) > ADDWC ((uint32_t) ch); > > @@ -671,7 +677,7 @@ get_string (struct linereader *lr, const struct charmap_t *charmap, > > /* Now we have to search for the end of the symbolic name, i.e., > the closing '>'. */ > - startidx = bufact; > + startidx = lrb.act; OK. > while ((ch = lr_getc (lr)) != '>' && ch != '\n' && ch != EOF) > { > if (ch == lr->escape_char) > @@ -680,12 +686,12 @@ get_string (struct linereader *lr, const struct charmap_t *charmap, > if (ch == '\n' || ch == EOF) > break; > } > - ADDC (ch); > + addc (&lrb, ch); > } > if (ch == '\n' || ch == EOF) > /* Not a correct string. */ > break; > - if (bufact == startidx) > + if (lrb.act == startidx) > { > /* <> is no correct name. Ignore it and also signal an > error. */ > @@ -694,23 +700,23 @@ get_string (struct linereader *lr, const struct charmap_t *charmap, > } > > /* It might be a Uxxxx symbol. */ > - if (buf[startidx] == 'U' > - && (bufact - startidx == 5 || bufact - startidx == 9)) > + if (lrb.buf[startidx] == 'U' > + && (lrb.act - startidx == 5 || lrb.act - startidx == 9)) > { > - char *cp = buf + startidx + 1; > - while (cp < &buf[bufact] && isxdigit (*cp)) > + char *cp = lrb.buf + startidx + 1; > + while (cp < &lrb.buf[lrb.act] && isxdigit (*cp)) > ++cp; > > - if (cp == &buf[bufact]) > + if (cp == &lrb.buf[lrb.act]) > { > char utmp[10]; > > /* Yes, it is. */ > - ADDC ('\0'); > - wch = strtoul (buf + startidx + 1, NULL, 16); > + addc (&lrb, '\0'); > + wch = strtoul (lrb.buf + startidx + 1, NULL, 16); > > /* Now forget about the name we just added. */ > - bufact = startidx; > + lrb.act = startidx; > > if (return_widestr) > ADDWC (wch); > @@ -774,7 +780,7 @@ get_string (struct linereader *lr, const struct charmap_t *charmap, > seq = charmap_find_value (charmap, utmp, > 9); > assert (seq != NULL); > - ADDS (seq->bytes, seq->nbytes); > + adds (&lrb, seq->bytes, seq->nbytes); > } > > continue; > @@ -788,24 +794,24 @@ get_string (struct linereader *lr, const struct charmap_t *charmap, > } > > if (seq != NULL) > - ADDS (seq->bytes, seq->nbytes); > + adds (&lrb, seq->bytes, seq->nbytes); > > continue; > } > } > > - /* We now have the symbolic name in buf[startidx] to > - buf[bufact-1]. Now find out the value for this character > + /* We now have the symbolic name in lrb.buf[startidx] to > + lrb.buf[lrb.act-1]. Now find out the value for this character > in the charmap as well as in the repertoire map (in this > order). */ > - seq = charmap_find_value (charmap, &buf[startidx], > - bufact - startidx); > + seq = charmap_find_value (charmap, &lrb.buf[startidx], > + lrb.act - startidx); OK. > > if (seq == NULL) > { > /* This name is not in the charmap. */ > lr_error (lr, _("symbol `%.*s' not in charmap"), > - (int) (bufact - startidx), &buf[startidx]); > + (int) (lrb.act - startidx), &lrb.buf[startidx]); OK. > illegal_string = 1; > } > > @@ -816,8 +822,8 @@ get_string (struct linereader *lr, const struct charmap_t *charmap, > wch = seq->ucs4; > else > { > - wch = repertoire_find_value (repertoire, &buf[startidx], > - bufact - startidx); > + wch = repertoire_find_value (repertoire, &lrb.buf[startidx], > + lrb.act - startidx); > if (seq != NULL) > seq->ucs4 = wch; > } > @@ -826,7 +832,7 @@ get_string (struct linereader *lr, const struct charmap_t *charmap, > { > /* This name is not in the repertoire map. */ > lr_error (lr, _("symbol `%.*s' not in repertoire map"), > - (int) (bufact - startidx), &buf[startidx]); > + (int) (lrb.act - startidx), &lrb.buf[startidx]); > illegal_string = 1; > } > else > @@ -834,11 +840,11 @@ get_string (struct linereader *lr, const struct charmap_t *charmap, > } > > /* Now forget about the name we just added. */ > - bufact = startidx; > + lrb.act = startidx; > > /* And copy the bytes. */ > if (seq != NULL) > - ADDS (seq->bytes, seq->nbytes); > + adds (&lrb, seq->bytes, seq->nbytes); > } > > if (ch == '\n' || ch == EOF) > @@ -849,7 +855,7 @@ get_string (struct linereader *lr, const struct charmap_t *charmap, > > if (illegal_string) > { > - free (buf); > + free (lrb.buf); OK. > free (buf2); > lr->token.val.str.startmb = NULL; > lr->token.val.str.lenmb = 0; > @@ -859,7 +865,7 @@ get_string (struct linereader *lr, const struct charmap_t *charmap, > return &lr->token; > } > > - ADDC ('\0'); > + addc (&lrb, '\0'); > > if (return_widestr) > { > @@ -870,8 +876,7 @@ get_string (struct linereader *lr, const struct charmap_t *charmap, > } > } > > - lr->token.val.str.startmb = xrealloc (buf, bufact); > - lr->token.val.str.lenmb = bufact; > + lr_buffer_to_token (&lrb, lr); > > return &lr->token; > } -- Cheers, Carlos.