From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <carlos@redhat.com>
Received: from us-smtp-delivery-124.mimecast.com
 (us-smtp-delivery-124.mimecast.com [170.10.129.124])
 by sourceware.org (Postfix) with ESMTPS id 5BA0B385DC03
 for <libc-alpha@sourceware.org>; Mon,  4 Jul 2022 19:54:14 +0000 (GMT)
DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 5BA0B385DC03
Received: from mail-io1-f71.google.com (mail-io1-f71.google.com
 [209.85.166.71]) by relay.mimecast.com with ESMTP with STARTTLS
 (version=TLSv1.2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id
 us-mta-17-xM8IQXe2OQ6zIEVBgjitgA-1; Mon, 04 Jul 2022 15:54:12 -0400
X-MC-Unique: xM8IQXe2OQ6zIEVBgjitgA-1
Received: by mail-io1-f71.google.com with SMTP id
 h18-20020a5d9712000000b00674f83a60f0so5930151iol.4
 for <libc-alpha@sourceware.org>; Mon, 04 Jul 2022 12:54:12 -0700 (PDT)
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
 d=1e100.net; s=20210112;
 h=x-gm-message-state:message-id:date:mime-version:user-agent:subject
 :content-language:to:references:from:organization:in-reply-to
 :content-transfer-encoding;
 bh=UmqTIGw+NKcWS0OBp1uJFvrrOOyTPoaxKgP6OQwVssQ=;
 b=fBLHqfh2h8EZNJ/oYT0FKE547Us10exb3KYRLqE4Jqncog7kQxSbSvypRhmr+yjExT
 ROl9llkBIJpfRh6FftpTbHetFe/DI/aTAUTOQG9FMUBvZhWKJq1iq+5NJFk1IGQstrTx
 VmkcqElhQEJpP+XmfXeah2qzwfAY0+9M/pAW5OzPaYuQ3qBarn4AXNGbx/n1WM4Vjpt4
 PqvKMBz4zWoHlEbRckK6y3s/vo1totOrqRaygw1gqVh+EkLciby8LKVDdjHOwSiOUiaV
 iSEqfU8Rjd8glNObIKvP3GJzZDLjCOsHFYJlypPoSTwP/vS3FWBhwlVKABvbp/cgIA0N
 CMYg==
X-Gm-Message-State: AJIora9RLXb0ZvRWrcXsrt210XI1aDVGZCq/ER7uY6CsgqkIFW3kng4t
 BqS808AvsxqVq2Fecubl0I7nSyLSYTmjMoYer8wcPI+pgCxXZ0jn/zUvm4WhccWq7M3M1pPlcUj
 2b+TEO7KXyN0eM9p4+F3m
X-Received: by 2002:a05:6e02:1ca3:b0:2da:b273:200d with SMTP id
 x3-20020a056e021ca300b002dab273200dmr18904931ill.31.1656964451906; 
 Mon, 04 Jul 2022 12:54:11 -0700 (PDT)
X-Google-Smtp-Source: AGRyM1tdDxp0MnIGA514YV7Mlal3JsZj6hdcFhG9UPDsBD2LHBMhY40DKZtA6DZmwAPIoNWzfVcucA==
X-Received: by 2002:a05:6e02:1ca3:b0:2da:b273:200d with SMTP id
 x3-20020a056e021ca300b002dab273200dmr18904922ill.31.1656964451588; 
 Mon, 04 Jul 2022 12:54:11 -0700 (PDT)
Received: from [192.168.0.241] (135-23-175-80.cpe.pppoe.ca. [135.23.175.80])
 by smtp.gmail.com with ESMTPSA id
 c18-20020a92c8d2000000b002d8f62d2e3bsm12181504ilq.86.2022.07.04.12.54.10
 (version=TLS1_3 cipher=TLS_AES_128_GCM_SHA256 bits=128/128);
 Mon, 04 Jul 2022 12:54:11 -0700 (PDT)
Message-ID: <6afe4672-4a35-4056-e4c1-292cafb2c2fe@redhat.com>
Date: Mon, 4 Jul 2022 15:54:10 -0400
MIME-Version: 1.0
User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:91.0) Gecko/20100101
 Thunderbird/91.9.0
Subject: Re: [PATCH 1/5] locale: Turn ADDC and ADDS into functions in
 linereader.c
To: Florian Weimer <fweimer@redhat.com>, libc-alpha@sourceware.org
References: <cover.1652994079.git.fweimer@redhat.com>
 <6e108376cb28f366ead22cab2347245bf43e4060.1652994079.git.fweimer@redhat.com>
From: Carlos O'Donell <carlos@redhat.com>
Organization: Red Hat
In-Reply-To: <6e108376cb28f366ead22cab2347245bf43e4060.1652994079.git.fweimer@redhat.com>
X-Mimecast-Spam-Score: 0
X-Mimecast-Originator: redhat.com
Content-Language: en-US
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 7bit
X-Spam-Status: No, score=-16.6 required=5.0 tests=BAYES_00, DKIMWL_WL_HIGH,
 DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, DKIM_VALID_EF, GIT_PATCH_0,
 NICE_REPLY_A, RCVD_IN_DNSWL_LOW, SPF_HELO_NONE, SPF_NONE, TXREP,
 T_SCC_BODY_TEXT_LINE autolearn=ham autolearn_force=no version=3.4.6
X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on
 server2.sourceware.org
X-BeenThere: libc-alpha@sourceware.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Libc-alpha mailing list <libc-alpha.sourceware.org>
List-Unsubscribe: <https://sourceware.org/mailman/options/libc-alpha>,
 <mailto:libc-alpha-request@sourceware.org?subject=unsubscribe>
List-Archive: <https://sourceware.org/pipermail/libc-alpha/>
List-Post: <mailto:libc-alpha@sourceware.org>
List-Help: <mailto:libc-alpha-request@sourceware.org?subject=help>
List-Subscribe: <https://sourceware.org/mailman/listinfo/libc-alpha>,
 <mailto:libc-alpha-request@sourceware.org?subject=subscribe>
X-List-Received-Date: Mon, 04 Jul 2022 19:54:16 -0000

On 5/19/22 17:06, Florian Weimer via Libc-alpha wrote:
> And introduce struct lr_buffer.  The functions addc and adds can
> be called from functions, enabling subsequent refactoring.

LGTM.

Reviewed-by: Carlos O'Donell <carlos@redhat.com>
Tested-by: Carlos O'Donell <carlos@redhat.com>

> ---
>  locale/programs/linereader.c | 203 ++++++++++++++++++-----------------
>  1 file changed, 104 insertions(+), 99 deletions(-)
> 
> diff --git a/locale/programs/linereader.c b/locale/programs/linereader.c
> index 146d32e5e2..d5367e0a1e 100644
> --- a/locale/programs/linereader.c
> +++ b/locale/programs/linereader.c
> @@ -416,36 +416,60 @@ get_toplvl_escape (struct linereader *lr)
>    return &lr->token;
>  }
>  
> +/* Multibyte string buffer.  */
> +struct lr_buffer
> +{
> +  size_t act;
> +  size_t max;
> +  char *buf;
> +};
>  
> -#define ADDC(ch) \
> -  do									      \
> -    {									      \
> -      if (bufact == bufmax)						      \
> -	{								      \
> -	  bufmax *= 2;							      \
> -	  buf = xrealloc (buf, bufmax);					      \
> -	}								      \
> -      buf[bufact++] = (ch);						      \
> -    }									      \
> -  while (0)

OK. Removing macros is awesome.

> +/* Initialize *LRB with a default-sized buffer.  */
> +static void
> +lr_buffer_init (struct lr_buffer *lrb)
> +{
> + lrb->act = 0;
> + lrb->max = 56;

OK. Taken from original code for a 56 byte maximum.

> + lrb->buf = xmalloc (lrb->max);
> +}

OK.

>  
> +/* Transfers the buffer string from *LRB to LR->token.mbstr.  */
> +static void
> +lr_buffer_to_token (struct lr_buffer *lrb, struct linereader *lr)
> +{
> +  lr->token.val.str.startmb = xrealloc (lrb->buf, lrb->act + 1);
> +  lr->token.val.str.startmb[lrb->act] = '\0';
> +  lr->token.val.str.lenmb = lrb->act;
> +}

OK.

>  
> -#define ADDS(s, l) \
> -  do									      \
> -    {									      \
> -      size_t _l = (l);							      \
> -      if (bufact + _l > bufmax)						      \
> -	{								      \
> -	  if (bufact < _l)						      \
> -	    bufact = _l;						      \
> -	  bufmax *= 2;							      \
> -	  buf = xrealloc (buf, bufmax);					      \
> -	}								      \
> -      memcpy (&buf[bufact], s, _l);					      \
> -      bufact += _l;							      \
> -    }									      \
> -  while (0)

OK. Removing macros is awesome.

> +/* Adds CH to *LRB.  */
> +static void
> +addc (struct lr_buffer *lrb, char ch)
> +{
> +  if (lrb->act == lrb->max)
> +    {
> +      lrb->max *= 2;
> +      lrb->buf = xrealloc (lrb->buf, lrb->max);
> +    }
> +  lrb->buf[lrb->act++] = ch;
> +}

OK. Matches original addc macro.

>  
> +/* Adds L bytes at S to *LRB.  */
> +static void
> +adds (struct lr_buffer *lrb, const unsigned char *s, size_t l)
> +{
> +  if (lrb->max - lrb->act < l)
> +    {
> +      size_t required_size = lrb->act + l;
> +      size_t new_max = 2 * lrb->max;
> +      if (new_max < required_size)
> +	new_max = required_size;
> +      lrb->buf = xrealloc (lrb->buf, new_max);
> +      lrb->max = new_max;
> +    }
> +  memcpy (lrb->buf + lrb->act, s, l);
> +  lrb->act += l;
> +}

OK. Matches original adds macro.

>  
>  #define ADDWC(ch) \
>    do									      \
> @@ -467,13 +491,11 @@ get_symname (struct linereader *lr)
>       1. reserved words
>       2. ISO 10646 position values
>       3. all other.  */
> -  char *buf;
> -  size_t bufact = 0;
> -  size_t bufmax = 56;

OK.

>    const struct keyword_t *kw;
>    int ch;
> +  struct lr_buffer lrb;
>  
> -  buf = (char *) xmalloc (bufmax);
> +  lr_buffer_init (&lrb);

OK.

>  
>    do
>      {
> @@ -481,13 +503,13 @@ get_symname (struct linereader *lr)
>        if (ch == lr->escape_char)
>  	{
>  	  int c2 = lr_getc (lr);
> -	  ADDC (c2);
> +	  addc (&lrb, c2);

OK.

>  
>  	  if (c2 == '\n')
>  	    ch = '\n';
>  	}
>        else
> -	ADDC (ch);
> +	addc (&lrb, ch);

OK.

>      }
>    while (ch != '>' && ch != '\n');
>  
> @@ -495,39 +517,35 @@ get_symname (struct linereader *lr)
>      lr_error (lr, _("unterminated symbolic name"));
>  
>    /* Test for ISO 10646 position value.  */
> -  if (buf[0] == 'U' && (bufact == 6 || bufact == 10))
> +  if (lrb.buf[0] == 'U' && (lrb.act == 6 || lrb.act == 10))

OK.

>      {
> -      char *cp = buf + 1;
> -      while (cp < &buf[bufact - 1] && isxdigit (*cp))
> +      char *cp = lrb.buf + 1;
> +      while (cp < &lrb.buf[lrb.act - 1] && isxdigit (*cp))

OK.

>  	++cp;
>  
> -      if (cp == &buf[bufact - 1])
> +      if (cp == &lrb.buf[lrb.act - 1])

OK.

>  	{
>  	  /* Yes, it is.  */
>  	  lr->token.tok = tok_ucs4;
> -	  lr->token.val.ucs4 = strtoul (buf + 1, NULL, 16);
> +	  lr->token.val.ucs4 = strtoul (lrb.buf + 1, NULL, 16);

OK.

>  
>  	  return &lr->token;
>  	}
>      }
>  
>    /* It is a symbolic name.  Test for reserved words.  */
> -  kw = lr->hash_fct (buf, bufact - 1);
> +  kw = lr->hash_fct (lrb.buf, lrb.act - 1);

OK.

>  
>    if (kw != NULL && kw->symname_or_ident == 1)
>      {
>        lr->token.tok = kw->token;
> -      free (buf);
> +      free (lrb.buf);

OK. We have an init function but then directly free lrb.buf. This is OK, I wonder if
there would be any use for lr_buffer_free () with some checking.

>      }
>    else
>      {
>        lr->token.tok = tok_bsymbol;
> -
> -      buf = xrealloc (buf, bufact + 1);
> -      buf[bufact] = '\0';
> -
> -      lr->token.val.str.startmb = buf;
> -      lr->token.val.str.lenmb = bufact - 1;
> +      lr_buffer_to_token (&lrb, lr);
> +      --lr->token.val.str.lenmb;  /* Hide the training '>'.  */

OK.

>      }
>  
>    return &lr->token;
> @@ -537,16 +555,13 @@ get_symname (struct linereader *lr)
>  static struct token *
>  get_ident (struct linereader *lr)
>  {
> -  char *buf;
> -  size_t bufact;
> -  size_t bufmax = 56;
>    const struct keyword_t *kw;
>    int ch;
> +  struct lr_buffer lrb;
>  
> -  buf = xmalloc (bufmax);
> -  bufact = 0;
> +  lr_buffer_init (&lrb);
>  
> -  ADDC (lr->buf[lr->idx - 1]);
> +  addc (&lrb, lr->buf[lr->idx - 1]);
>  
>    while (!isspace ((ch = lr_getc (lr))) && ch != '"' && ch != ';'
>  	 && ch != '<' && ch != ',' && ch != EOF)
> @@ -560,27 +575,22 @@ get_ident (struct linereader *lr)
>  	      break;
>  	    }
>  	}
> -      ADDC (ch);
> +      addc (&lrb, ch);
>      }
>  
>    lr_ungetc (lr, ch);
>  
> -  kw = lr->hash_fct (buf, bufact);
> +  kw = lr->hash_fct (lrb.buf, lrb.act);
>  
>    if (kw != NULL && kw->symname_or_ident == 0)
>      {
>        lr->token.tok = kw->token;
> -      free (buf);
> +      free (lrb.buf);

OK.

>      }
>    else
>      {
>        lr->token.tok = tok_ident;
> -
> -      buf = xrealloc (buf, bufact + 1);
> -      buf[bufact] = '\0';
> -
> -      lr->token.val.str.startmb = buf;
> -      lr->token.val.str.lenmb = bufact;
> +      lr_buffer_to_token (&lrb, lr);

OK.

>      }
>  
>    return &lr->token;
> @@ -593,14 +603,10 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
>  	    int verbose)
>  {
>    int return_widestr = lr->return_widestr;
> -  char *buf;
> +  struct lr_buffer lrb;
>    wchar_t *buf2 = NULL;
> -  size_t bufact;
> -  size_t bufmax = 56;
>  
> -  /* We must return two different strings.  */
> -  buf = xmalloc (bufmax);
> -  bufact = 0;
> +  lr_buffer_init (&lrb);

OK.

>  
>    /* We know it'll be a string.  */
>    lr->token.tok = tok_string;
> @@ -613,19 +619,19 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
>  
>        buf2 = NULL;
>        while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF)
> -	ADDC (ch);
> +	addc (&lrb, ch);
>  
>        /* Catch errors with trailing escape character.  */
> -      if (bufact > 0 && buf[bufact - 1] == lr->escape_char
> -	  && (bufact == 1 || buf[bufact - 2] != lr->escape_char))
> +      if (lrb.act > 0 && lrb.buf[lrb.act - 1] == lr->escape_char
> +	  && (lrb.act == 1 || lrb.buf[lrb.act - 2] != lr->escape_char))
>  	{
>  	  lr_error (lr, _("illegal escape sequence at end of string"));
> -	  --bufact;
> +	  --lrb.act;
>  	}
>        else if (ch == '\n' || ch == EOF)
>  	lr_error (lr, _("unterminated string"));
>  
> -      ADDC ('\0');
> +      addc (&lrb, '\0');
>      }
>    else
>      {
> @@ -662,7 +668,7 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
>  		    break;
>  		}
>  
> -	      ADDC (ch);
> +	      addc (&lrb, ch);
>  	      if (return_widestr)
>  		ADDWC ((uint32_t) ch);
>  
> @@ -671,7 +677,7 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
>  
>  	  /* Now we have to search for the end of the symbolic name, i.e.,
>  	     the closing '>'.  */
> -	  startidx = bufact;
> +	  startidx = lrb.act;

OK.

>  	  while ((ch = lr_getc (lr)) != '>' && ch != '\n' && ch != EOF)
>  	    {
>  	      if (ch == lr->escape_char)
> @@ -680,12 +686,12 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
>  		  if (ch == '\n' || ch == EOF)
>  		    break;
>  		}
> -	      ADDC (ch);
> +	      addc (&lrb, ch);
>  	    }
>  	  if (ch == '\n' || ch == EOF)
>  	    /* Not a correct string.  */
>  	    break;
> -	  if (bufact == startidx)
> +	  if (lrb.act == startidx)
>  	    {
>  	      /* <> is no correct name.  Ignore it and also signal an
>  		 error.  */
> @@ -694,23 +700,23 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
>  	    }
>  
>  	  /* It might be a Uxxxx symbol.  */
> -	  if (buf[startidx] == 'U'
> -	      && (bufact - startidx == 5 || bufact - startidx == 9))
> +	  if (lrb.buf[startidx] == 'U'
> +	      && (lrb.act - startidx == 5 || lrb.act - startidx == 9))
>  	    {
> -	      char *cp = buf + startidx + 1;
> -	      while (cp < &buf[bufact] && isxdigit (*cp))
> +	      char *cp = lrb.buf + startidx + 1;
> +	      while (cp < &lrb.buf[lrb.act] && isxdigit (*cp))
>  		++cp;
>  
> -	      if (cp == &buf[bufact])
> +	      if (cp == &lrb.buf[lrb.act])
>  		{
>  		  char utmp[10];
>  
>  		  /* Yes, it is.  */
> -		  ADDC ('\0');
> -		  wch = strtoul (buf + startidx + 1, NULL, 16);
> +		  addc (&lrb, '\0');
> +		  wch = strtoul (lrb.buf + startidx + 1, NULL, 16);
>  
>  		  /* Now forget about the name we just added.  */
> -		  bufact = startidx;
> +		  lrb.act = startidx;
>  
>  		  if (return_widestr)
>  		    ADDWC (wch);
> @@ -774,7 +780,7 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
>  				      seq = charmap_find_value (charmap, utmp,
>  								9);
>  				      assert (seq != NULL);
> -				      ADDS (seq->bytes, seq->nbytes);
> +				      adds (&lrb, seq->bytes, seq->nbytes);
>  				    }
>  
>  				  continue;
> @@ -788,24 +794,24 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
>  		    }
>  
>  		  if (seq != NULL)
> -		    ADDS (seq->bytes, seq->nbytes);
> +		    adds (&lrb, seq->bytes, seq->nbytes);
>  
>  		  continue;
>  		}
>  	    }
>  
> -	  /* We now have the symbolic name in buf[startidx] to
> -	     buf[bufact-1].  Now find out the value for this character
> +	  /* We now have the symbolic name in lrb.buf[startidx] to
> +	     lrb.buf[lrb.act-1].  Now find out the value for this character
>  	     in the charmap as well as in the repertoire map (in this
>  	     order).  */
> -	  seq = charmap_find_value (charmap, &buf[startidx],
> -				    bufact - startidx);
> +	  seq = charmap_find_value (charmap, &lrb.buf[startidx],
> +				    lrb.act - startidx);

OK.

>  
>  	  if (seq == NULL)
>  	    {
>  	      /* This name is not in the charmap.  */
>  	      lr_error (lr, _("symbol `%.*s' not in charmap"),
> -			(int) (bufact - startidx), &buf[startidx]);
> +			(int) (lrb.act - startidx), &lrb.buf[startidx]);

OK.

>  	      illegal_string = 1;
>  	    }
>  
> @@ -816,8 +822,8 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
>  		wch = seq->ucs4;
>  	      else
>  		{
> -		  wch = repertoire_find_value (repertoire, &buf[startidx],
> -					       bufact - startidx);
> +		  wch = repertoire_find_value (repertoire, &lrb.buf[startidx],
> +					       lrb.act - startidx);
>  		  if (seq != NULL)
>  		    seq->ucs4 = wch;
>  		}
> @@ -826,7 +832,7 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
>  		{
>  		  /* This name is not in the repertoire map.  */
>  		  lr_error (lr, _("symbol `%.*s' not in repertoire map"),
> -			    (int) (bufact - startidx), &buf[startidx]);
> +			    (int) (lrb.act - startidx), &lrb.buf[startidx]);
>  		  illegal_string = 1;
>  		}
>  	      else
> @@ -834,11 +840,11 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
>  	    }
>  
>  	  /* Now forget about the name we just added.  */
> -	  bufact = startidx;
> +	  lrb.act = startidx;
>  
>  	  /* And copy the bytes.  */
>  	  if (seq != NULL)
> -	    ADDS (seq->bytes, seq->nbytes);
> +	    adds (&lrb, seq->bytes, seq->nbytes);
>  	}
>  
>        if (ch == '\n' || ch == EOF)
> @@ -849,7 +855,7 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
>  
>        if (illegal_string)
>  	{
> -	  free (buf);
> +	  free (lrb.buf);

OK.

>  	  free (buf2);
>  	  lr->token.val.str.startmb = NULL;
>  	  lr->token.val.str.lenmb = 0;
> @@ -859,7 +865,7 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
>  	  return &lr->token;
>  	}
>  
> -      ADDC ('\0');
> +      addc (&lrb, '\0');
>  
>        if (return_widestr)
>  	{
> @@ -870,8 +876,7 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
>  	}
>      }
>  
> -  lr->token.val.str.startmb = xrealloc (buf, bufact);
> -  lr->token.val.str.lenmb = bufact;
> +  lr_buffer_to_token (&lrb, lr);
>  
>    return &lr->token;
>  }


-- 
Cheers,
Carlos.