From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-oa1-x2b.google.com (mail-oa1-x2b.google.com [IPv6:2001:4860:4864:20::2b]) by sourceware.org (Postfix) with ESMTPS id 0532838300A6 for ; Fri, 20 May 2022 18:29:32 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 0532838300A6 Received: by mail-oa1-x2b.google.com with SMTP id 586e51a60fabf-f16a3e0529so11264741fac.2 for ; Fri, 20 May 2022 11:29:31 -0700 (PDT) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:message-id:date:mime-version:user-agent:subject :content-language:to:references:from:in-reply-to :content-transfer-encoding; bh=njWyiRYGDyBxSLp7ciTBaZAdXuW3k9wEwMNV+Hj1dAs=; b=Rl1HotnSFj4BudbooMaHITB5WLxhfO/5MQZn3IpUy/OfcF6qsm9gZS8pxQkeVoWsDo 0nooM+UJdvUcGWD/J1nhJnMSqBX8IKhbg8H7rrwZkmujs/2jbk8DxNrGKa7cY/3Y4zVx 17zxMaQNViOhY4zUfn73KfR6wvDfHDJS2eGb0ERjSc3AYBL9OLy7DsSTT1GCf+7O9wHS LQHUAQh/uo4GnCoiWjwtpkSBni1gCuTHlqlRx7Qukma10LCGyGBp254mfQNmyG+TXN5G d+yEuUGX48NBa7TV7FQpgU0lTVLVmQH9w6DgoYS9Ke2ohZkVU5fTT4OGtWM1jrSq1VYY YXuQ== X-Gm-Message-State: AOAM533FTH/atExp4ATqcT5Vd8khnZ5JCxWHICngeHRAcvOrWR7s85Y1 kM7QhacLo3S5A/lvRp5E4vAZbFNM1kKLQg== X-Google-Smtp-Source: ABdhPJwnHupoPeYfPnSDfohvImhGKrC4XgsfVKresR3ptzKviY0IXWs/yb/PvywaonLq26Ngz0q4BA== X-Received: by 2002:a05:6870:17a6:b0:f1:ccf4:ad53 with SMTP id r38-20020a05687017a600b000f1ccf4ad53mr6405913oae.48.1653071371266; Fri, 20 May 2022 11:29:31 -0700 (PDT) Received: from ?IPV6:2804:431:c7cb:cdd6:2f68:19ef:b907:1f26? ([2804:431:c7cb:cdd6:2f68:19ef:b907:1f26]) by smtp.gmail.com with ESMTPSA id r125-20020acada83000000b003289f51c2d7sm1376534oig.34.2022.05.20.11.29.29 (version=TLS1_3 cipher=TLS_AES_128_GCM_SHA256 bits=128/128); Fri, 20 May 2022 11:29:30 -0700 (PDT) Message-ID: <5ea6ef24-5d02-ceeb-6908-746138652006@linaro.org> Date: Fri, 20 May 2022 15:29:28 -0300 MIME-Version: 1.0 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:91.0) Gecko/20100101 Thunderbird/91.9.0 Subject: Re: [PATCH 15/26] locale: Add more cached data to LC_CTYPE Content-Language: en-US To: Florian Weimer , libc-alpha@sourceware.org References: From: Adhemerval Zanella In-Reply-To: Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 7bit X-Spam-Status: No, score=-13.4 required=5.0 tests=BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, DKIM_VALID_EF, GIT_PATCH_0, NICE_REPLY_A, RCVD_IN_DNSWL_NONE, SPF_HELO_NONE, SPF_PASS, TXREP, T_SCC_BODY_TEXT_LINE autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org X-BeenThere: libc-alpha@sourceware.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Libc-alpha mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 20 May 2022 18:29:34 -0000 On 17/03/2022 16:30, Florian Weimer via Libc-alpha wrote: > This data will be used in number formatting. Looks ok, some minor comments below. Reviewed-by: Adhemerval Zanella > --- > locale/C-ctype.c | 9 +++++- > locale/loadlocale.c | 74 ++++++++++++++++++++++++++++++++++++++++++--- > locale/localeinfo.h | 23 +++++++++++++- > wcsmbs/wcsmbsload.c | 23 ++++++++------ > wcsmbs/wcsmbsload.h | 13 +++----- > 5 files changed, 117 insertions(+), 25 deletions(-) > > diff --git a/locale/C-ctype.c b/locale/C-ctype.c > index ef4b67029c..6253f7f413 100644 > --- a/locale/C-ctype.c > +++ b/locale/C-ctype.c > @@ -19,6 +19,7 @@ > #include > #include > #include > +#include > > #include "C-translit.h" > > @@ -538,11 +539,17 @@ _nl_C_LC_CTYPE_width attribute_hidden = > NR_FIXED == _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1). */ > typedef int assertion1[1 - 2 * (NR_FIXED != _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1))]; > > +static const struct lc_ctype_data lc_ctype_data = > + { > + .fcts = &__wcsmbs_gconv_fcts_c, > + .outdigit_bytes_all_equal = 1, > + }; > + > const struct __locale_data _nl_C_LC_CTYPE attribute_hidden = > { > _nl_C_name, > NULL, 0, 0, /* no file mapped */ > - NULL, /* No cached data. */ > + (void *) &lc_ctype_data, > UNDELETABLE, > 1, /* Enable transliteration by default. */ > NR_FIXED + NR_CLASSES + NR_MAPS, > diff --git a/locale/loadlocale.c b/locale/loadlocale.c > index 9069bafcd8..ce78dfd071 100644 > --- a/locale/loadlocale.c > +++ b/locale/loadlocale.c > @@ -31,7 +31,6 @@ > #include > #include "localeinfo.h" > > - Spurious line removal. > static const size_t _nl_category_num_items[] = > { > #define DEFINE_CATEGORY(category, category_name, items, a) \ > @@ -62,6 +61,61 @@ static const enum value_type *const _nl_value_types[] = > #undef DEFINE_CATEGORY > }; > > +/* Fill in LOCDATA->private for the LC_CTYPE category. */ > +static void > +_nl_intern_locale_data_fill_cache_ctype (struct __locale_data *locdata) > +{ > + struct lc_ctype_data *data = locdata->private; > + > + /* Default to no translation. Assumes zero initialization of *data. */ > + memset (data->outdigit_bytes, 1, 10); Use sizeof data->outdigit_bytes here. > + > + for (int i = 0; i <= 9; ++i) > + { > + const char *digit > + = locdata->values[_NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB + i)].string; > + unsigned char len; > + if (digit[0] != '0' + i || digit[1] != '\0') > + { > + data->outdigit_translation_needed = true; > + len = strlen (locdata->values[_NL_ITEM_INDEX > + (_NL_CTYPE_OUTDIGIT0_MB + i)].string); > + } > + else > + len = 1; > + data->outdigit_bytes[i] = len; > + if (i == 0) > + data->outdigit_bytes_all_equal = len; > + else if (data->outdigit_bytes_all_equal != len) > + data->outdigit_bytes_all_equal = 0; > + } > +} > + > +/* Updates data in LOCDATA->private for CATEGORY. */ > +static void > +_nl_intern_locale_data_fill_cache (int category, struct __locale_data *locdata) > +{ > + switch (category) > + { > + case LC_CTYPE: > + _nl_intern_locale_data_fill_cache_ctype (locdata); > + break; > + } > +} > + > +/* Returns the number of bytes allocated of struct __locale_data for > + CATEGORY. */ > +static size_t > +_nl_intern_locale_data_extra_size (int category) > +{ > + switch (category) > + { > + case LC_CTYPE: > + return sizeof (struct lc_ctype_data); > + default: > + return 0; > + } > +} > > struct __locale_data * > _nl_intern_locale_data (int category, const void *data, size_t datasize) > @@ -94,14 +148,23 @@ _nl_intern_locale_data (int category, const void *data, size_t datasize) > return NULL; > } > > - newdata = malloc (sizeof *newdata > - + filedata->nstrings * sizeof (union locale_data_value)); > + size_t base_size = (sizeof *newdata > + + filedata->nstrings * sizeof (union locale_data_value)); > + size_t extra_size = _nl_intern_locale_data_extra_size (category); > + > + newdata = malloc (base_size + extra_size); > if (newdata == NULL) > return NULL; > > newdata->filedata = (void *) filedata; > newdata->filesize = datasize; > - memset (&newdata->private, 0, sizeof (newdata->private)); > + if (extra_size == 0) > + newdata->private = NULL; > + else > + { > + newdata->private = (char *) newdata + base_size; > + memset (newdata->private, 0, extra_size); > + } > newdata->usage_count = 0; > newdata->use_translit = 0; > newdata->nstrings = filedata->nstrings; > @@ -157,6 +220,9 @@ _nl_intern_locale_data (int category, const void *data, size_t datasize) > } > } > > + if (extra_size > 0) > + _nl_intern_locale_data_fill_cache (category, newdata); > + > return newdata; > } > > diff --git a/locale/localeinfo.h b/locale/localeinfo.h > index 01ec5535bb..fd43033a19 100644 > --- a/locale/localeinfo.h > +++ b/locale/localeinfo.h > @@ -61,7 +61,7 @@ struct __locale_data > /* This provides a slot for category-specific code to cache data > computed about this locale. Type of the data pointed to: > > - LC_CTYPE struct gconv_fcts (get_gconv_fcts, __wcsmbs_load_conv) > + LC_CTYPE struct lc_ctype_data (_nl_intern_locale_data) > LC_TIME struct lc_time_data (_nl_init_alt_digit, _nl_init_era_entries) > > This data deallocated at the start of _nl_unload_locale. */ > @@ -161,6 +161,27 @@ struct lc_time_data > int walt_digits_initialized; > }; > > +/* Ancillary data for LC_CTYPE. Co-allocated after struct > + __locale_data by _nl_intern_locale_data. */ > +struct lc_ctype_data > +{ > + /* See get_gconv_fcts and __wcsmbs_load_conv. */ > + const struct gconv_fcts *fcts; > + > + /* If false, outdigit just maps to the ASCII digits. */ > + bool outdigit_translation_needed; > + > + /* Cached multi-byte string lengths. This could be added to the > + locale data itself if the format is changed (which impacts > + existing statically linked binaries). */ > + > + /* For the outdigit decimal digits (copied from LC_CTYPE). */ > + unsigned char outdigit_bytes[10]; > + > + /* If all outdigit_bytes elements are equal, this is that value, > + otherwise it is 0. */ > + unsigned char outdigit_bytes_all_equal; Why not _Bool? > +}; > > /* LC_CTYPE specific: > Hardwired indices for standard wide character translation mappings. */ > diff --git a/wcsmbs/wcsmbsload.c b/wcsmbs/wcsmbsload.c > index 2650834e29..0f0f55f9ed 100644 > --- a/wcsmbs/wcsmbsload.c > +++ b/wcsmbs/wcsmbsload.c > @@ -150,12 +150,14 @@ __libc_rwlock_define (extern, __libc_setlocale_lock attribute_hidden) > void > __wcsmbs_load_conv (struct __locale_data *new_category) > { > + struct lc_ctype_data *data = new_category->private; > + > /* Acquire the lock. */ > __libc_rwlock_wrlock (__libc_setlocale_lock); > > /* We should repeat the test since while we waited some other thread > might have run this function. */ > - if (__glibc_likely (new_category->private == NULL)) > + if (__glibc_likely (data->fcts == NULL)) > { > /* We must find the real functions. */ > const char *charset_name; > @@ -199,10 +201,10 @@ __wcsmbs_load_conv (struct __locale_data *new_category) > free (new_fcts); > > failed: > - new_category->private = (void *) &__wcsmbs_gconv_fcts_c; > + data->fcts = (void *) &__wcsmbs_gconv_fcts_c; > } > else > - new_category->private = new_fcts; > + data->fcts = new_fcts; > } > > __libc_rwlock_unlock (__libc_setlocale_lock); > @@ -263,14 +265,15 @@ __wcsmbs_named_conv (struct gconv_fcts *copy, const char *name) > void > _nl_cleanup_ctype (struct __locale_data *locale) > { > - const struct gconv_fcts *const data = locale->private; > - if (data != NULL && data != &__wcsmbs_gconv_fcts_c) > + struct lc_ctype_data *data = locale->private; > + if (data->fcts != NULL && data->fcts != &__wcsmbs_gconv_fcts_c) > { > - locale->private = NULL; > - > /* Free the old conversions. */ > - __gconv_close_transform (data->tomb, data->tomb_nsteps); > - __gconv_close_transform (data->towc, data->towc_nsteps); > - free ((char *) data); > + __gconv_close_transform (data->fcts->tomb, data->fcts->tomb_nsteps); > + __gconv_close_transform (data->fcts->towc, data->fcts->towc_nsteps); > + > + free ((void *) data->fcts); > + data->fcts = NULL; > + /* data itself is allocated within locale. */ > } > } > diff --git a/wcsmbs/wcsmbsload.h b/wcsmbs/wcsmbsload.h > index 8bbd34ba02..876f8368b1 100644 > --- a/wcsmbs/wcsmbsload.h > +++ b/wcsmbs/wcsmbsload.h > @@ -66,15 +66,10 @@ extern const struct __locale_data _nl_C_LC_CTYPE attribute_hidden; > static inline const struct gconv_fcts * > get_gconv_fcts (struct __locale_data *data) > { > - struct gconv_fcts *private = data->private; > - if (private == NULL) > - { > - if (data == &_nl_C_LC_CTYPE) > - return &__wcsmbs_gconv_fcts_c; > - __wcsmbs_load_conv (data); > - private = data->private; > - } > - return private; > + struct lc_ctype_data *private = data->private; > + if (private->fcts == NULL) > + __wcsmbs_load_conv (data); > + return private->fcts; > } > > #endif /* wcsmbsload.h */