public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
From: Adhemerval Zanella <adhemerval.zanella@linaro.org>
To: Florian Weimer <fweimer@redhat.com>, libc-alpha@sourceware.org
Subject: Re: [PATCH 15/26] locale: Add more cached data to LC_CTYPE
Date: Fri, 20 May 2022 15:29:28 -0300	[thread overview]
Message-ID: <5ea6ef24-5d02-ceeb-6908-746138652006@linaro.org> (raw)
In-Reply-To: <f891ee37374a8b0af60c93bff8353206d35426cd.1647544751.git.fweimer@redhat.com>



On 17/03/2022 16:30, Florian Weimer via Libc-alpha wrote:
> This data will be used in number formatting.

Looks ok, some minor comments below.

Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>

> ---
>  locale/C-ctype.c    |  9 +++++-
>  locale/loadlocale.c | 74 ++++++++++++++++++++++++++++++++++++++++++---
>  locale/localeinfo.h | 23 +++++++++++++-
>  wcsmbs/wcsmbsload.c | 23 ++++++++------
>  wcsmbs/wcsmbsload.h | 13 +++-----
>  5 files changed, 117 insertions(+), 25 deletions(-)
> 
> diff --git a/locale/C-ctype.c b/locale/C-ctype.c
> index ef4b67029c..6253f7f413 100644
> --- a/locale/C-ctype.c
> +++ b/locale/C-ctype.c
> @@ -19,6 +19,7 @@
>  #include <endian.h>
>  #include <stdalign.h>
>  #include <stdint.h>
> +#include <wcsmbs/wcsmbsload.h>
>  
>  #include "C-translit.h"
>  
> @@ -538,11 +539,17 @@ _nl_C_LC_CTYPE_width attribute_hidden =
>     NR_FIXED == _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1). */
>  typedef int assertion1[1 - 2 * (NR_FIXED != _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1))];
>  
> +static const struct lc_ctype_data lc_ctype_data =
> +  {
> +    .fcts = &__wcsmbs_gconv_fcts_c,
> +    .outdigit_bytes_all_equal = 1,
> +  };
> +
>  const struct __locale_data _nl_C_LC_CTYPE attribute_hidden =
>  {
>    _nl_C_name,
>    NULL, 0, 0,			/* no file mapped */
> -  NULL,				/* No cached data.  */
> +  (void *) &lc_ctype_data,
>    UNDELETABLE,
>    1,		/* Enable transliteration by default.  */
>    NR_FIXED + NR_CLASSES + NR_MAPS,
> diff --git a/locale/loadlocale.c b/locale/loadlocale.c
> index 9069bafcd8..ce78dfd071 100644
> --- a/locale/loadlocale.c
> +++ b/locale/loadlocale.c
> @@ -31,7 +31,6 @@
>  #include <not-cancel.h>
>  #include "localeinfo.h"
>  
> -

Spurious line removal.

>  static const size_t _nl_category_num_items[] =
>  {
>  #define DEFINE_CATEGORY(category, category_name, items, a) \
> @@ -62,6 +61,61 @@ static const enum value_type *const _nl_value_types[] =
>  #undef DEFINE_CATEGORY
>  };
>  
> +/* Fill in LOCDATA->private for the LC_CTYPE category.  */
> +static void
> +_nl_intern_locale_data_fill_cache_ctype (struct __locale_data *locdata)
> +{
> +  struct lc_ctype_data *data = locdata->private;
> +
> +  /* Default to no translation.  Assumes zero initialization of *data.  */
> +  memset (data->outdigit_bytes, 1, 10);

Use sizeof data->outdigit_bytes here.

> +
> +  for (int i = 0; i <= 9; ++i)
> +    {
> +      const char *digit
> +	= locdata->values[_NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB + i)].string;
> +      unsigned char len;
> +      if (digit[0] != '0' + i || digit[1] != '\0')
> +	 {
> +	   data->outdigit_translation_needed = true;
> +	   len = strlen (locdata->values[_NL_ITEM_INDEX
> +					 (_NL_CTYPE_OUTDIGIT0_MB + i)].string);
> +	 }
> +      else
> +	len = 1;
> +      data->outdigit_bytes[i] = len;
> +      if (i == 0)
> +	data->outdigit_bytes_all_equal = len;
> +      else if (data->outdigit_bytes_all_equal != len)
> +	data->outdigit_bytes_all_equal = 0;
> +    }
> +}
> +
> +/* Updates data in LOCDATA->private for CATEGORY.  */
> +static void
> +_nl_intern_locale_data_fill_cache (int category, struct __locale_data *locdata)
> +{
> +  switch (category)
> +    {
> +    case LC_CTYPE:
> +      _nl_intern_locale_data_fill_cache_ctype (locdata);
> +      break;
> +    }
> +}
> +
> +/* Returns the number of bytes allocated of struct __locale_data for
> +   CATEGORY.  */
> +static size_t
> +_nl_intern_locale_data_extra_size (int category)
> +{
> +  switch (category)
> +    {
> +    case LC_CTYPE:
> +      return sizeof (struct lc_ctype_data);
> +    default:
> +      return 0;
> +    }
> +}
>  
>  struct __locale_data *
>  _nl_intern_locale_data (int category, const void *data, size_t datasize)
> @@ -94,14 +148,23 @@ _nl_intern_locale_data (int category, const void *data, size_t datasize)
>        return NULL;
>      }
>  
> -  newdata = malloc (sizeof *newdata
> -		    + filedata->nstrings * sizeof (union locale_data_value));
> +  size_t base_size = (sizeof *newdata
> +		      + filedata->nstrings * sizeof (union locale_data_value));
> +  size_t extra_size = _nl_intern_locale_data_extra_size (category);
> +
> +  newdata = malloc (base_size + extra_size);
>    if (newdata == NULL)
>      return NULL;
>  
>    newdata->filedata = (void *) filedata;
>    newdata->filesize = datasize;
> -  memset (&newdata->private, 0, sizeof (newdata->private));
> +  if (extra_size == 0)
> +    newdata->private = NULL;
> +  else
> +    {
> +      newdata->private = (char *) newdata + base_size;
> +      memset (newdata->private, 0, extra_size);
> +    }
>    newdata->usage_count = 0;
>    newdata->use_translit = 0;
>    newdata->nstrings = filedata->nstrings;
> @@ -157,6 +220,9 @@ _nl_intern_locale_data (int category, const void *data, size_t datasize)
>  	}
>      }
>  
> +  if (extra_size > 0)
> +    _nl_intern_locale_data_fill_cache (category, newdata);
> +
>    return newdata;
>  }
>  
> diff --git a/locale/localeinfo.h b/locale/localeinfo.h
> index 01ec5535bb..fd43033a19 100644
> --- a/locale/localeinfo.h
> +++ b/locale/localeinfo.h
> @@ -61,7 +61,7 @@ struct __locale_data
>    /* This provides a slot for category-specific code to cache data
>       computed about this locale.  Type of the data pointed to:
>  
> -     LC_CTYPE   struct gconv_fcts (get_gconv_fcts, __wcsmbs_load_conv)
> +     LC_CTYPE   struct lc_ctype_data (_nl_intern_locale_data)
>       LC_TIME    struct lc_time_data (_nl_init_alt_digit, _nl_init_era_entries)
>  
>       This data deallocated at the start of _nl_unload_locale.  */
> @@ -161,6 +161,27 @@ struct lc_time_data
>    int walt_digits_initialized;
>  };
>  
> +/* Ancillary data for LC_CTYPE.  Co-allocated after struct
> +   __locale_data by _nl_intern_locale_data.  */
> +struct lc_ctype_data
> +{
> +  /* See get_gconv_fcts and __wcsmbs_load_conv.  */
> +  const struct gconv_fcts *fcts;
> +
> +  /* If false, outdigit just maps to the ASCII digits.  */
> +  bool outdigit_translation_needed;
> +
> +  /* Cached multi-byte string lengths.  This could be added to the
> +     locale data itself if the format is changed (which impacts
> +     existing statically linked binaries).  */
> +
> +  /* For the outdigit decimal digits (copied from LC_CTYPE).  */
> +  unsigned char outdigit_bytes[10];
> +
> +  /* If all outdigit_bytes elements are equal, this is that value,
> +     otherwise it is 0.  */
> +  unsigned char outdigit_bytes_all_equal;

Why not _Bool?

> +};
>  
>  /* LC_CTYPE specific:
>     Hardwired indices for standard wide character translation mappings.  */
> diff --git a/wcsmbs/wcsmbsload.c b/wcsmbs/wcsmbsload.c
> index 2650834e29..0f0f55f9ed 100644
> --- a/wcsmbs/wcsmbsload.c
> +++ b/wcsmbs/wcsmbsload.c
> @@ -150,12 +150,14 @@ __libc_rwlock_define (extern, __libc_setlocale_lock attribute_hidden)
>  void
>  __wcsmbs_load_conv (struct __locale_data *new_category)
>  {
> +  struct lc_ctype_data *data = new_category->private;
> +
>    /* Acquire the lock.  */
>    __libc_rwlock_wrlock (__libc_setlocale_lock);
>  
>    /* We should repeat the test since while we waited some other thread
>       might have run this function.  */
> -  if (__glibc_likely (new_category->private == NULL))
> +  if (__glibc_likely (data->fcts == NULL))
>      {
>        /* We must find the real functions.  */
>        const char *charset_name;
> @@ -199,10 +201,10 @@ __wcsmbs_load_conv (struct __locale_data *new_category)
>  	  free (new_fcts);
>  
>  	failed:
> -	  new_category->private = (void *) &__wcsmbs_gconv_fcts_c;
> +	  data->fcts = (void *) &__wcsmbs_gconv_fcts_c;
>  	}
>        else
> -	new_category->private = new_fcts;
> +	data->fcts = new_fcts;
>      }
>  
>    __libc_rwlock_unlock (__libc_setlocale_lock);
> @@ -263,14 +265,15 @@ __wcsmbs_named_conv (struct gconv_fcts *copy, const char *name)
>  void
>  _nl_cleanup_ctype (struct __locale_data *locale)
>  {
> -  const struct gconv_fcts *const data = locale->private;
> -  if (data != NULL && data != &__wcsmbs_gconv_fcts_c)
> +  struct lc_ctype_data *data = locale->private;
> +  if (data->fcts != NULL && data->fcts != &__wcsmbs_gconv_fcts_c)
>      {
> -      locale->private = NULL;
> -
>        /* Free the old conversions.  */
> -      __gconv_close_transform (data->tomb, data->tomb_nsteps);
> -      __gconv_close_transform (data->towc, data->towc_nsteps);
> -      free ((char *) data);
> +      __gconv_close_transform (data->fcts->tomb, data->fcts->tomb_nsteps);
> +      __gconv_close_transform (data->fcts->towc, data->fcts->towc_nsteps);
> +
> +      free ((void *) data->fcts);
> +      data->fcts = NULL;
> +      /* data itself is allocated within locale.  */
>      }
>  }
> diff --git a/wcsmbs/wcsmbsload.h b/wcsmbs/wcsmbsload.h
> index 8bbd34ba02..876f8368b1 100644
> --- a/wcsmbs/wcsmbsload.h
> +++ b/wcsmbs/wcsmbsload.h
> @@ -66,15 +66,10 @@ extern const struct __locale_data _nl_C_LC_CTYPE attribute_hidden;
>  static inline const struct gconv_fcts *
>  get_gconv_fcts (struct __locale_data *data)
>  {
> -  struct gconv_fcts *private = data->private;
> -  if (private == NULL)
> -    {
> -      if (data == &_nl_C_LC_CTYPE)
> -	return &__wcsmbs_gconv_fcts_c;
> -      __wcsmbs_load_conv (data);
> -      private = data->private;
> -    }
> -  return private;
> +  struct lc_ctype_data *private = data->private;
> +  if (private->fcts == NULL)
> +    __wcsmbs_load_conv (data);
> +  return private->fcts;
>  }
>  
>  #endif	/* wcsmbsload.h */

  reply	other threads:[~2022-05-20 18:29 UTC|newest]

Thread overview: 49+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-03-17 19:28 [PATCH 00/26] vfprintf rework to remove vtables Florian Weimer
2022-03-17 19:28 ` [PATCH 01/26] libio: Convert tst_swprintf to the test framework Florian Weimer
2022-03-18 17:40   ` Adhemerval Zanella
2022-03-17 19:28 ` [PATCH 02/26] libio: Flush-only _IO_str_overflow must not return EOF (bug 28949) Florian Weimer
2022-03-18 18:11   ` Adhemerval Zanella
2022-03-17 19:28 ` [PATCH 03/26] stdio-common: Add wide stream coverage to tst-vfprintf-user-type Florian Weimer
2022-03-18 18:30   ` Adhemerval Zanella
2022-03-18 19:19     ` Florian Weimer
2022-03-17 19:28 ` [PATCH 04/26] stdio-common: Add tst-printf-width-i18n to cover numeric field width Florian Weimer
2022-05-20 13:22   ` Adhemerval Zanella
2022-05-20 13:33     ` Adhemerval Zanella
2022-05-23  6:39       ` Florian Weimer
2022-03-17 19:28 ` [PATCH 05/26] vfprintf: Move argument processing into vfprintf-process-arg.c Florian Weimer
2022-05-20 13:28   ` Adhemerval Zanella
2022-03-17 19:28 ` [PATCH 06/26] vfprintf: Consolidate some multibyte/wide character processing Florian Weimer
2022-05-20 14:16   ` Adhemerval Zanella
2022-03-17 19:29 ` [PATCH 07/26] __printf_fphex always uses LC_NUMERIC Florian Weimer
2022-05-20 14:21   ` Adhemerval Zanella
2022-05-23  6:55     ` Florian Weimer
2022-03-17 19:29 ` [PATCH 08/26] stdio-common: Add tst-memstream-string for open_memstream overflow Florian Weimer
2022-05-20 17:44   ` Adhemerval Zanella
2022-05-23  7:03     ` Florian Weimer
2022-03-17 19:29 ` [PATCH 09/26] stdio-common: Add printf specifier registry to <printf.h> Florian Weimer
2022-05-20 17:49   ` Adhemerval Zanella
2022-03-17 19:30 ` [PATCH 10/26] stdio-common: Move union printf_arg int <printf.h> Florian Weimer
2022-05-20 17:51   ` Adhemerval Zanella
2022-03-17 19:30 ` [PATCH 11/26] stdio-common: Simplify printf_unknown interface in vfprintf-internal.c Florian Weimer
2022-05-20 18:07   ` Adhemerval Zanella
2022-03-17 19:30 ` [PATCH 12/26] locale: Call _nl_unload_locale from _nl_archive_subfreeres Florian Weimer
2022-05-20 18:09   ` Adhemerval Zanella
2022-05-23  7:14     ` Florian Weimer
2022-03-17 19:30 ` [PATCH 13/26] locale: Remove cleanup function pointer from struct __localedata Florian Weimer
2022-05-20 18:16   ` Adhemerval Zanella
2022-03-17 19:30 ` [PATCH 14/26] locale: Remove private union from struct __locale_data Florian Weimer
2022-05-20 18:22   ` Adhemerval Zanella
2022-03-17 19:30 ` [PATCH 15/26] locale: Add more cached data to LC_CTYPE Florian Weimer
2022-05-20 18:29   ` Adhemerval Zanella [this message]
2022-05-23  7:20     ` Florian Weimer
2022-03-17 19:31 ` [PATCH 16/26] locale: Implement struct grouping_iterator Florian Weimer
2022-03-17 19:31 ` [PATCH 17/26] stdio-common: Introduce buffers for implementing printf Florian Weimer
2022-03-17 19:31 ` [PATCH 18/26] stdio-common: Add __printf_function_invoke Florian Weimer
2022-03-17 19:31 ` [PATCH 19/26] stdio-common: Add __translated_number_width Florian Weimer
2022-03-17 19:31 ` [PATCH 20/26] stdio-common: Convert vfprintf and related functions to buffers Florian Weimer
2022-03-17 19:31 ` [PATCH 21/26] stdio-common: Add lock optimization to vfprintf and vfwprintf Florian Weimer
2022-03-17 19:31 ` [PATCH 22/26] libio: Convert __vsprintf_internal to buffers Florian Weimer
2022-03-17 19:31 ` [PATCH 23/26] libio: Convert __vasprintf_internal " Florian Weimer
2022-03-17 19:31 ` [PATCH 24/26] libio: Convert __vdprintf_internal " Florian Weimer
2022-03-17 19:32 ` [PATCH 25/26] libio: Convert __obstack_vprintf_internal to buffers (bug 27124) Florian Weimer
2022-03-17 19:32 ` [PATCH 26/26] libio: Convert __vswprintf_internal to buffers (bug 27857) Florian Weimer

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5ea6ef24-5d02-ceeb-6908-746138652006@linaro.org \
    --to=adhemerval.zanella@linaro.org \
    --cc=fweimer@redhat.com \
    --cc=libc-alpha@sourceware.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).