From: Adhemerval Zanella <adhemerval.zanella@linaro.org>
To: Florian Weimer <fweimer@redhat.com>, libc-alpha@sourceware.org
Subject: Re: [PATCH 15/26] locale: Add more cached data to LC_CTYPE
Date: Fri, 20 May 2022 15:29:28 -0300 [thread overview]
Message-ID: <5ea6ef24-5d02-ceeb-6908-746138652006@linaro.org> (raw)
In-Reply-To: <f891ee37374a8b0af60c93bff8353206d35426cd.1647544751.git.fweimer@redhat.com>
On 17/03/2022 16:30, Florian Weimer via Libc-alpha wrote:
> This data will be used in number formatting.
Looks ok, some minor comments below.
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
> ---
> locale/C-ctype.c | 9 +++++-
> locale/loadlocale.c | 74 ++++++++++++++++++++++++++++++++++++++++++---
> locale/localeinfo.h | 23 +++++++++++++-
> wcsmbs/wcsmbsload.c | 23 ++++++++------
> wcsmbs/wcsmbsload.h | 13 +++-----
> 5 files changed, 117 insertions(+), 25 deletions(-)
>
> diff --git a/locale/C-ctype.c b/locale/C-ctype.c
> index ef4b67029c..6253f7f413 100644
> --- a/locale/C-ctype.c
> +++ b/locale/C-ctype.c
> @@ -19,6 +19,7 @@
> #include <endian.h>
> #include <stdalign.h>
> #include <stdint.h>
> +#include <wcsmbs/wcsmbsload.h>
>
> #include "C-translit.h"
>
> @@ -538,11 +539,17 @@ _nl_C_LC_CTYPE_width attribute_hidden =
> NR_FIXED == _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1). */
> typedef int assertion1[1 - 2 * (NR_FIXED != _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1))];
>
> +static const struct lc_ctype_data lc_ctype_data =
> + {
> + .fcts = &__wcsmbs_gconv_fcts_c,
> + .outdigit_bytes_all_equal = 1,
> + };
> +
> const struct __locale_data _nl_C_LC_CTYPE attribute_hidden =
> {
> _nl_C_name,
> NULL, 0, 0, /* no file mapped */
> - NULL, /* No cached data. */
> + (void *) &lc_ctype_data,
> UNDELETABLE,
> 1, /* Enable transliteration by default. */
> NR_FIXED + NR_CLASSES + NR_MAPS,
> diff --git a/locale/loadlocale.c b/locale/loadlocale.c
> index 9069bafcd8..ce78dfd071 100644
> --- a/locale/loadlocale.c
> +++ b/locale/loadlocale.c
> @@ -31,7 +31,6 @@
> #include <not-cancel.h>
> #include "localeinfo.h"
>
> -
Spurious line removal.
> static const size_t _nl_category_num_items[] =
> {
> #define DEFINE_CATEGORY(category, category_name, items, a) \
> @@ -62,6 +61,61 @@ static const enum value_type *const _nl_value_types[] =
> #undef DEFINE_CATEGORY
> };
>
> +/* Fill in LOCDATA->private for the LC_CTYPE category. */
> +static void
> +_nl_intern_locale_data_fill_cache_ctype (struct __locale_data *locdata)
> +{
> + struct lc_ctype_data *data = locdata->private;
> +
> + /* Default to no translation. Assumes zero initialization of *data. */
> + memset (data->outdigit_bytes, 1, 10);
Use sizeof data->outdigit_bytes here.
> +
> + for (int i = 0; i <= 9; ++i)
> + {
> + const char *digit
> + = locdata->values[_NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB + i)].string;
> + unsigned char len;
> + if (digit[0] != '0' + i || digit[1] != '\0')
> + {
> + data->outdigit_translation_needed = true;
> + len = strlen (locdata->values[_NL_ITEM_INDEX
> + (_NL_CTYPE_OUTDIGIT0_MB + i)].string);
> + }
> + else
> + len = 1;
> + data->outdigit_bytes[i] = len;
> + if (i == 0)
> + data->outdigit_bytes_all_equal = len;
> + else if (data->outdigit_bytes_all_equal != len)
> + data->outdigit_bytes_all_equal = 0;
> + }
> +}
> +
> +/* Updates data in LOCDATA->private for CATEGORY. */
> +static void
> +_nl_intern_locale_data_fill_cache (int category, struct __locale_data *locdata)
> +{
> + switch (category)
> + {
> + case LC_CTYPE:
> + _nl_intern_locale_data_fill_cache_ctype (locdata);
> + break;
> + }
> +}
> +
> +/* Returns the number of bytes allocated of struct __locale_data for
> + CATEGORY. */
> +static size_t
> +_nl_intern_locale_data_extra_size (int category)
> +{
> + switch (category)
> + {
> + case LC_CTYPE:
> + return sizeof (struct lc_ctype_data);
> + default:
> + return 0;
> + }
> +}
>
> struct __locale_data *
> _nl_intern_locale_data (int category, const void *data, size_t datasize)
> @@ -94,14 +148,23 @@ _nl_intern_locale_data (int category, const void *data, size_t datasize)
> return NULL;
> }
>
> - newdata = malloc (sizeof *newdata
> - + filedata->nstrings * sizeof (union locale_data_value));
> + size_t base_size = (sizeof *newdata
> + + filedata->nstrings * sizeof (union locale_data_value));
> + size_t extra_size = _nl_intern_locale_data_extra_size (category);
> +
> + newdata = malloc (base_size + extra_size);
> if (newdata == NULL)
> return NULL;
>
> newdata->filedata = (void *) filedata;
> newdata->filesize = datasize;
> - memset (&newdata->private, 0, sizeof (newdata->private));
> + if (extra_size == 0)
> + newdata->private = NULL;
> + else
> + {
> + newdata->private = (char *) newdata + base_size;
> + memset (newdata->private, 0, extra_size);
> + }
> newdata->usage_count = 0;
> newdata->use_translit = 0;
> newdata->nstrings = filedata->nstrings;
> @@ -157,6 +220,9 @@ _nl_intern_locale_data (int category, const void *data, size_t datasize)
> }
> }
>
> + if (extra_size > 0)
> + _nl_intern_locale_data_fill_cache (category, newdata);
> +
> return newdata;
> }
>
> diff --git a/locale/localeinfo.h b/locale/localeinfo.h
> index 01ec5535bb..fd43033a19 100644
> --- a/locale/localeinfo.h
> +++ b/locale/localeinfo.h
> @@ -61,7 +61,7 @@ struct __locale_data
> /* This provides a slot for category-specific code to cache data
> computed about this locale. Type of the data pointed to:
>
> - LC_CTYPE struct gconv_fcts (get_gconv_fcts, __wcsmbs_load_conv)
> + LC_CTYPE struct lc_ctype_data (_nl_intern_locale_data)
> LC_TIME struct lc_time_data (_nl_init_alt_digit, _nl_init_era_entries)
>
> This data deallocated at the start of _nl_unload_locale. */
> @@ -161,6 +161,27 @@ struct lc_time_data
> int walt_digits_initialized;
> };
>
> +/* Ancillary data for LC_CTYPE. Co-allocated after struct
> + __locale_data by _nl_intern_locale_data. */
> +struct lc_ctype_data
> +{
> + /* See get_gconv_fcts and __wcsmbs_load_conv. */
> + const struct gconv_fcts *fcts;
> +
> + /* If false, outdigit just maps to the ASCII digits. */
> + bool outdigit_translation_needed;
> +
> + /* Cached multi-byte string lengths. This could be added to the
> + locale data itself if the format is changed (which impacts
> + existing statically linked binaries). */
> +
> + /* For the outdigit decimal digits (copied from LC_CTYPE). */
> + unsigned char outdigit_bytes[10];
> +
> + /* If all outdigit_bytes elements are equal, this is that value,
> + otherwise it is 0. */
> + unsigned char outdigit_bytes_all_equal;
Why not _Bool?
> +};
>
> /* LC_CTYPE specific:
> Hardwired indices for standard wide character translation mappings. */
> diff --git a/wcsmbs/wcsmbsload.c b/wcsmbs/wcsmbsload.c
> index 2650834e29..0f0f55f9ed 100644
> --- a/wcsmbs/wcsmbsload.c
> +++ b/wcsmbs/wcsmbsload.c
> @@ -150,12 +150,14 @@ __libc_rwlock_define (extern, __libc_setlocale_lock attribute_hidden)
> void
> __wcsmbs_load_conv (struct __locale_data *new_category)
> {
> + struct lc_ctype_data *data = new_category->private;
> +
> /* Acquire the lock. */
> __libc_rwlock_wrlock (__libc_setlocale_lock);
>
> /* We should repeat the test since while we waited some other thread
> might have run this function. */
> - if (__glibc_likely (new_category->private == NULL))
> + if (__glibc_likely (data->fcts == NULL))
> {
> /* We must find the real functions. */
> const char *charset_name;
> @@ -199,10 +201,10 @@ __wcsmbs_load_conv (struct __locale_data *new_category)
> free (new_fcts);
>
> failed:
> - new_category->private = (void *) &__wcsmbs_gconv_fcts_c;
> + data->fcts = (void *) &__wcsmbs_gconv_fcts_c;
> }
> else
> - new_category->private = new_fcts;
> + data->fcts = new_fcts;
> }
>
> __libc_rwlock_unlock (__libc_setlocale_lock);
> @@ -263,14 +265,15 @@ __wcsmbs_named_conv (struct gconv_fcts *copy, const char *name)
> void
> _nl_cleanup_ctype (struct __locale_data *locale)
> {
> - const struct gconv_fcts *const data = locale->private;
> - if (data != NULL && data != &__wcsmbs_gconv_fcts_c)
> + struct lc_ctype_data *data = locale->private;
> + if (data->fcts != NULL && data->fcts != &__wcsmbs_gconv_fcts_c)
> {
> - locale->private = NULL;
> -
> /* Free the old conversions. */
> - __gconv_close_transform (data->tomb, data->tomb_nsteps);
> - __gconv_close_transform (data->towc, data->towc_nsteps);
> - free ((char *) data);
> + __gconv_close_transform (data->fcts->tomb, data->fcts->tomb_nsteps);
> + __gconv_close_transform (data->fcts->towc, data->fcts->towc_nsteps);
> +
> + free ((void *) data->fcts);
> + data->fcts = NULL;
> + /* data itself is allocated within locale. */
> }
> }
> diff --git a/wcsmbs/wcsmbsload.h b/wcsmbs/wcsmbsload.h
> index 8bbd34ba02..876f8368b1 100644
> --- a/wcsmbs/wcsmbsload.h
> +++ b/wcsmbs/wcsmbsload.h
> @@ -66,15 +66,10 @@ extern const struct __locale_data _nl_C_LC_CTYPE attribute_hidden;
> static inline const struct gconv_fcts *
> get_gconv_fcts (struct __locale_data *data)
> {
> - struct gconv_fcts *private = data->private;
> - if (private == NULL)
> - {
> - if (data == &_nl_C_LC_CTYPE)
> - return &__wcsmbs_gconv_fcts_c;
> - __wcsmbs_load_conv (data);
> - private = data->private;
> - }
> - return private;
> + struct lc_ctype_data *private = data->private;
> + if (private->fcts == NULL)
> + __wcsmbs_load_conv (data);
> + return private->fcts;
> }
>
> #endif /* wcsmbsload.h */
next prev parent reply other threads:[~2022-05-20 18:29 UTC|newest]
Thread overview: 49+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-03-17 19:28 [PATCH 00/26] vfprintf rework to remove vtables Florian Weimer
2022-03-17 19:28 ` [PATCH 01/26] libio: Convert tst_swprintf to the test framework Florian Weimer
2022-03-18 17:40 ` Adhemerval Zanella
2022-03-17 19:28 ` [PATCH 02/26] libio: Flush-only _IO_str_overflow must not return EOF (bug 28949) Florian Weimer
2022-03-18 18:11 ` Adhemerval Zanella
2022-03-17 19:28 ` [PATCH 03/26] stdio-common: Add wide stream coverage to tst-vfprintf-user-type Florian Weimer
2022-03-18 18:30 ` Adhemerval Zanella
2022-03-18 19:19 ` Florian Weimer
2022-03-17 19:28 ` [PATCH 04/26] stdio-common: Add tst-printf-width-i18n to cover numeric field width Florian Weimer
2022-05-20 13:22 ` Adhemerval Zanella
2022-05-20 13:33 ` Adhemerval Zanella
2022-05-23 6:39 ` Florian Weimer
2022-03-17 19:28 ` [PATCH 05/26] vfprintf: Move argument processing into vfprintf-process-arg.c Florian Weimer
2022-05-20 13:28 ` Adhemerval Zanella
2022-03-17 19:28 ` [PATCH 06/26] vfprintf: Consolidate some multibyte/wide character processing Florian Weimer
2022-05-20 14:16 ` Adhemerval Zanella
2022-03-17 19:29 ` [PATCH 07/26] __printf_fphex always uses LC_NUMERIC Florian Weimer
2022-05-20 14:21 ` Adhemerval Zanella
2022-05-23 6:55 ` Florian Weimer
2022-03-17 19:29 ` [PATCH 08/26] stdio-common: Add tst-memstream-string for open_memstream overflow Florian Weimer
2022-05-20 17:44 ` Adhemerval Zanella
2022-05-23 7:03 ` Florian Weimer
2022-03-17 19:29 ` [PATCH 09/26] stdio-common: Add printf specifier registry to <printf.h> Florian Weimer
2022-05-20 17:49 ` Adhemerval Zanella
2022-03-17 19:30 ` [PATCH 10/26] stdio-common: Move union printf_arg int <printf.h> Florian Weimer
2022-05-20 17:51 ` Adhemerval Zanella
2022-03-17 19:30 ` [PATCH 11/26] stdio-common: Simplify printf_unknown interface in vfprintf-internal.c Florian Weimer
2022-05-20 18:07 ` Adhemerval Zanella
2022-03-17 19:30 ` [PATCH 12/26] locale: Call _nl_unload_locale from _nl_archive_subfreeres Florian Weimer
2022-05-20 18:09 ` Adhemerval Zanella
2022-05-23 7:14 ` Florian Weimer
2022-03-17 19:30 ` [PATCH 13/26] locale: Remove cleanup function pointer from struct __localedata Florian Weimer
2022-05-20 18:16 ` Adhemerval Zanella
2022-03-17 19:30 ` [PATCH 14/26] locale: Remove private union from struct __locale_data Florian Weimer
2022-05-20 18:22 ` Adhemerval Zanella
2022-03-17 19:30 ` [PATCH 15/26] locale: Add more cached data to LC_CTYPE Florian Weimer
2022-05-20 18:29 ` Adhemerval Zanella [this message]
2022-05-23 7:20 ` Florian Weimer
2022-03-17 19:31 ` [PATCH 16/26] locale: Implement struct grouping_iterator Florian Weimer
2022-03-17 19:31 ` [PATCH 17/26] stdio-common: Introduce buffers for implementing printf Florian Weimer
2022-03-17 19:31 ` [PATCH 18/26] stdio-common: Add __printf_function_invoke Florian Weimer
2022-03-17 19:31 ` [PATCH 19/26] stdio-common: Add __translated_number_width Florian Weimer
2022-03-17 19:31 ` [PATCH 20/26] stdio-common: Convert vfprintf and related functions to buffers Florian Weimer
2022-03-17 19:31 ` [PATCH 21/26] stdio-common: Add lock optimization to vfprintf and vfwprintf Florian Weimer
2022-03-17 19:31 ` [PATCH 22/26] libio: Convert __vsprintf_internal to buffers Florian Weimer
2022-03-17 19:31 ` [PATCH 23/26] libio: Convert __vasprintf_internal " Florian Weimer
2022-03-17 19:31 ` [PATCH 24/26] libio: Convert __vdprintf_internal " Florian Weimer
2022-03-17 19:32 ` [PATCH 25/26] libio: Convert __obstack_vprintf_internal to buffers (bug 27124) Florian Weimer
2022-03-17 19:32 ` [PATCH 26/26] libio: Convert __vswprintf_internal to buffers (bug 27857) Florian Weimer
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=5ea6ef24-5d02-ceeb-6908-746138652006@linaro.org \
--to=adhemerval.zanella@linaro.org \
--cc=fweimer@redhat.com \
--cc=libc-alpha@sourceware.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).