public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
From: Florian Weimer <fweimer@redhat.com>
To: libc-alpha@sourceware.org
Subject: [PATCH 15/26] locale: Add more cached data to LC_CTYPE
Date: Thu, 17 Mar 2022 20:30:58 +0100	[thread overview]
Message-ID: <f891ee37374a8b0af60c93bff8353206d35426cd.1647544751.git.fweimer@redhat.com> (raw)
In-Reply-To: <cover.1647544751.git.fweimer@redhat.com>

This data will be used in number formatting.
---
 locale/C-ctype.c    |  9 +++++-
 locale/loadlocale.c | 74 ++++++++++++++++++++++++++++++++++++++++++---
 locale/localeinfo.h | 23 +++++++++++++-
 wcsmbs/wcsmbsload.c | 23 ++++++++------
 wcsmbs/wcsmbsload.h | 13 +++-----
 5 files changed, 117 insertions(+), 25 deletions(-)

diff --git a/locale/C-ctype.c b/locale/C-ctype.c
index ef4b67029c..6253f7f413 100644
--- a/locale/C-ctype.c
+++ b/locale/C-ctype.c
@@ -19,6 +19,7 @@
 #include <endian.h>
 #include <stdalign.h>
 #include <stdint.h>
+#include <wcsmbs/wcsmbsload.h>
 
 #include "C-translit.h"
 
@@ -538,11 +539,17 @@ _nl_C_LC_CTYPE_width attribute_hidden =
    NR_FIXED == _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1). */
 typedef int assertion1[1 - 2 * (NR_FIXED != _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1))];
 
+static const struct lc_ctype_data lc_ctype_data =
+  {
+    .fcts = &__wcsmbs_gconv_fcts_c,
+    .outdigit_bytes_all_equal = 1,
+  };
+
 const struct __locale_data _nl_C_LC_CTYPE attribute_hidden =
 {
   _nl_C_name,
   NULL, 0, 0,			/* no file mapped */
-  NULL,				/* No cached data.  */
+  (void *) &lc_ctype_data,
   UNDELETABLE,
   1,		/* Enable transliteration by default.  */
   NR_FIXED + NR_CLASSES + NR_MAPS,
diff --git a/locale/loadlocale.c b/locale/loadlocale.c
index 9069bafcd8..ce78dfd071 100644
--- a/locale/loadlocale.c
+++ b/locale/loadlocale.c
@@ -31,7 +31,6 @@
 #include <not-cancel.h>
 #include "localeinfo.h"
 
-
 static const size_t _nl_category_num_items[] =
 {
 #define DEFINE_CATEGORY(category, category_name, items, a) \
@@ -62,6 +61,61 @@ static const enum value_type *const _nl_value_types[] =
 #undef DEFINE_CATEGORY
 };
 
+/* Fill in LOCDATA->private for the LC_CTYPE category.  */
+static void
+_nl_intern_locale_data_fill_cache_ctype (struct __locale_data *locdata)
+{
+  struct lc_ctype_data *data = locdata->private;
+
+  /* Default to no translation.  Assumes zero initialization of *data.  */
+  memset (data->outdigit_bytes, 1, 10);
+
+  for (int i = 0; i <= 9; ++i)
+    {
+      const char *digit
+	= locdata->values[_NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB + i)].string;
+      unsigned char len;
+      if (digit[0] != '0' + i || digit[1] != '\0')
+	 {
+	   data->outdigit_translation_needed = true;
+	   len = strlen (locdata->values[_NL_ITEM_INDEX
+					 (_NL_CTYPE_OUTDIGIT0_MB + i)].string);
+	 }
+      else
+	len = 1;
+      data->outdigit_bytes[i] = len;
+      if (i == 0)
+	data->outdigit_bytes_all_equal = len;
+      else if (data->outdigit_bytes_all_equal != len)
+	data->outdigit_bytes_all_equal = 0;
+    }
+}
+
+/* Updates data in LOCDATA->private for CATEGORY.  */
+static void
+_nl_intern_locale_data_fill_cache (int category, struct __locale_data *locdata)
+{
+  switch (category)
+    {
+    case LC_CTYPE:
+      _nl_intern_locale_data_fill_cache_ctype (locdata);
+      break;
+    }
+}
+
+/* Returns the number of bytes allocated of struct __locale_data for
+   CATEGORY.  */
+static size_t
+_nl_intern_locale_data_extra_size (int category)
+{
+  switch (category)
+    {
+    case LC_CTYPE:
+      return sizeof (struct lc_ctype_data);
+    default:
+      return 0;
+    }
+}
 
 struct __locale_data *
 _nl_intern_locale_data (int category, const void *data, size_t datasize)
@@ -94,14 +148,23 @@ _nl_intern_locale_data (int category, const void *data, size_t datasize)
       return NULL;
     }
 
-  newdata = malloc (sizeof *newdata
-		    + filedata->nstrings * sizeof (union locale_data_value));
+  size_t base_size = (sizeof *newdata
+		      + filedata->nstrings * sizeof (union locale_data_value));
+  size_t extra_size = _nl_intern_locale_data_extra_size (category);
+
+  newdata = malloc (base_size + extra_size);
   if (newdata == NULL)
     return NULL;
 
   newdata->filedata = (void *) filedata;
   newdata->filesize = datasize;
-  memset (&newdata->private, 0, sizeof (newdata->private));
+  if (extra_size == 0)
+    newdata->private = NULL;
+  else
+    {
+      newdata->private = (char *) newdata + base_size;
+      memset (newdata->private, 0, extra_size);
+    }
   newdata->usage_count = 0;
   newdata->use_translit = 0;
   newdata->nstrings = filedata->nstrings;
@@ -157,6 +220,9 @@ _nl_intern_locale_data (int category, const void *data, size_t datasize)
 	}
     }
 
+  if (extra_size > 0)
+    _nl_intern_locale_data_fill_cache (category, newdata);
+
   return newdata;
 }
 
diff --git a/locale/localeinfo.h b/locale/localeinfo.h
index 01ec5535bb..fd43033a19 100644
--- a/locale/localeinfo.h
+++ b/locale/localeinfo.h
@@ -61,7 +61,7 @@ struct __locale_data
   /* This provides a slot for category-specific code to cache data
      computed about this locale.  Type of the data pointed to:
 
-     LC_CTYPE   struct gconv_fcts (get_gconv_fcts, __wcsmbs_load_conv)
+     LC_CTYPE   struct lc_ctype_data (_nl_intern_locale_data)
      LC_TIME    struct lc_time_data (_nl_init_alt_digit, _nl_init_era_entries)
 
      This data deallocated at the start of _nl_unload_locale.  */
@@ -161,6 +161,27 @@ struct lc_time_data
   int walt_digits_initialized;
 };
 
+/* Ancillary data for LC_CTYPE.  Co-allocated after struct
+   __locale_data by _nl_intern_locale_data.  */
+struct lc_ctype_data
+{
+  /* See get_gconv_fcts and __wcsmbs_load_conv.  */
+  const struct gconv_fcts *fcts;
+
+  /* If false, outdigit just maps to the ASCII digits.  */
+  bool outdigit_translation_needed;
+
+  /* Cached multi-byte string lengths.  This could be added to the
+     locale data itself if the format is changed (which impacts
+     existing statically linked binaries).  */
+
+  /* For the outdigit decimal digits (copied from LC_CTYPE).  */
+  unsigned char outdigit_bytes[10];
+
+  /* If all outdigit_bytes elements are equal, this is that value,
+     otherwise it is 0.  */
+  unsigned char outdigit_bytes_all_equal;
+};
 
 /* LC_CTYPE specific:
    Hardwired indices for standard wide character translation mappings.  */
diff --git a/wcsmbs/wcsmbsload.c b/wcsmbs/wcsmbsload.c
index 2650834e29..0f0f55f9ed 100644
--- a/wcsmbs/wcsmbsload.c
+++ b/wcsmbs/wcsmbsload.c
@@ -150,12 +150,14 @@ __libc_rwlock_define (extern, __libc_setlocale_lock attribute_hidden)
 void
 __wcsmbs_load_conv (struct __locale_data *new_category)
 {
+  struct lc_ctype_data *data = new_category->private;
+
   /* Acquire the lock.  */
   __libc_rwlock_wrlock (__libc_setlocale_lock);
 
   /* We should repeat the test since while we waited some other thread
      might have run this function.  */
-  if (__glibc_likely (new_category->private == NULL))
+  if (__glibc_likely (data->fcts == NULL))
     {
       /* We must find the real functions.  */
       const char *charset_name;
@@ -199,10 +201,10 @@ __wcsmbs_load_conv (struct __locale_data *new_category)
 	  free (new_fcts);
 
 	failed:
-	  new_category->private = (void *) &__wcsmbs_gconv_fcts_c;
+	  data->fcts = (void *) &__wcsmbs_gconv_fcts_c;
 	}
       else
-	new_category->private = new_fcts;
+	data->fcts = new_fcts;
     }
 
   __libc_rwlock_unlock (__libc_setlocale_lock);
@@ -263,14 +265,15 @@ __wcsmbs_named_conv (struct gconv_fcts *copy, const char *name)
 void
 _nl_cleanup_ctype (struct __locale_data *locale)
 {
-  const struct gconv_fcts *const data = locale->private;
-  if (data != NULL && data != &__wcsmbs_gconv_fcts_c)
+  struct lc_ctype_data *data = locale->private;
+  if (data->fcts != NULL && data->fcts != &__wcsmbs_gconv_fcts_c)
     {
-      locale->private = NULL;
-
       /* Free the old conversions.  */
-      __gconv_close_transform (data->tomb, data->tomb_nsteps);
-      __gconv_close_transform (data->towc, data->towc_nsteps);
-      free ((char *) data);
+      __gconv_close_transform (data->fcts->tomb, data->fcts->tomb_nsteps);
+      __gconv_close_transform (data->fcts->towc, data->fcts->towc_nsteps);
+
+      free ((void *) data->fcts);
+      data->fcts = NULL;
+      /* data itself is allocated within locale.  */
     }
 }
diff --git a/wcsmbs/wcsmbsload.h b/wcsmbs/wcsmbsload.h
index 8bbd34ba02..876f8368b1 100644
--- a/wcsmbs/wcsmbsload.h
+++ b/wcsmbs/wcsmbsload.h
@@ -66,15 +66,10 @@ extern const struct __locale_data _nl_C_LC_CTYPE attribute_hidden;
 static inline const struct gconv_fcts *
 get_gconv_fcts (struct __locale_data *data)
 {
-  struct gconv_fcts *private = data->private;
-  if (private == NULL)
-    {
-      if (data == &_nl_C_LC_CTYPE)
-	return &__wcsmbs_gconv_fcts_c;
-      __wcsmbs_load_conv (data);
-      private = data->private;
-    }
-  return private;
+  struct lc_ctype_data *private = data->private;
+  if (private->fcts == NULL)
+    __wcsmbs_load_conv (data);
+  return private->fcts;
 }
 
 #endif	/* wcsmbsload.h */
-- 
2.35.1



  parent reply	other threads:[~2022-03-17 19:31 UTC|newest]

Thread overview: 49+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-03-17 19:28 [PATCH 00/26] vfprintf rework to remove vtables Florian Weimer
2022-03-17 19:28 ` [PATCH 01/26] libio: Convert tst_swprintf to the test framework Florian Weimer
2022-03-18 17:40   ` Adhemerval Zanella
2022-03-17 19:28 ` [PATCH 02/26] libio: Flush-only _IO_str_overflow must not return EOF (bug 28949) Florian Weimer
2022-03-18 18:11   ` Adhemerval Zanella
2022-03-17 19:28 ` [PATCH 03/26] stdio-common: Add wide stream coverage to tst-vfprintf-user-type Florian Weimer
2022-03-18 18:30   ` Adhemerval Zanella
2022-03-18 19:19     ` Florian Weimer
2022-03-17 19:28 ` [PATCH 04/26] stdio-common: Add tst-printf-width-i18n to cover numeric field width Florian Weimer
2022-05-20 13:22   ` Adhemerval Zanella
2022-05-20 13:33     ` Adhemerval Zanella
2022-05-23  6:39       ` Florian Weimer
2022-03-17 19:28 ` [PATCH 05/26] vfprintf: Move argument processing into vfprintf-process-arg.c Florian Weimer
2022-05-20 13:28   ` Adhemerval Zanella
2022-03-17 19:28 ` [PATCH 06/26] vfprintf: Consolidate some multibyte/wide character processing Florian Weimer
2022-05-20 14:16   ` Adhemerval Zanella
2022-03-17 19:29 ` [PATCH 07/26] __printf_fphex always uses LC_NUMERIC Florian Weimer
2022-05-20 14:21   ` Adhemerval Zanella
2022-05-23  6:55     ` Florian Weimer
2022-03-17 19:29 ` [PATCH 08/26] stdio-common: Add tst-memstream-string for open_memstream overflow Florian Weimer
2022-05-20 17:44   ` Adhemerval Zanella
2022-05-23  7:03     ` Florian Weimer
2022-03-17 19:29 ` [PATCH 09/26] stdio-common: Add printf specifier registry to <printf.h> Florian Weimer
2022-05-20 17:49   ` Adhemerval Zanella
2022-03-17 19:30 ` [PATCH 10/26] stdio-common: Move union printf_arg int <printf.h> Florian Weimer
2022-05-20 17:51   ` Adhemerval Zanella
2022-03-17 19:30 ` [PATCH 11/26] stdio-common: Simplify printf_unknown interface in vfprintf-internal.c Florian Weimer
2022-05-20 18:07   ` Adhemerval Zanella
2022-03-17 19:30 ` [PATCH 12/26] locale: Call _nl_unload_locale from _nl_archive_subfreeres Florian Weimer
2022-05-20 18:09   ` Adhemerval Zanella
2022-05-23  7:14     ` Florian Weimer
2022-03-17 19:30 ` [PATCH 13/26] locale: Remove cleanup function pointer from struct __localedata Florian Weimer
2022-05-20 18:16   ` Adhemerval Zanella
2022-03-17 19:30 ` [PATCH 14/26] locale: Remove private union from struct __locale_data Florian Weimer
2022-05-20 18:22   ` Adhemerval Zanella
2022-03-17 19:30 ` Florian Weimer [this message]
2022-05-20 18:29   ` [PATCH 15/26] locale: Add more cached data to LC_CTYPE Adhemerval Zanella
2022-05-23  7:20     ` Florian Weimer
2022-03-17 19:31 ` [PATCH 16/26] locale: Implement struct grouping_iterator Florian Weimer
2022-03-17 19:31 ` [PATCH 17/26] stdio-common: Introduce buffers for implementing printf Florian Weimer
2022-03-17 19:31 ` [PATCH 18/26] stdio-common: Add __printf_function_invoke Florian Weimer
2022-03-17 19:31 ` [PATCH 19/26] stdio-common: Add __translated_number_width Florian Weimer
2022-03-17 19:31 ` [PATCH 20/26] stdio-common: Convert vfprintf and related functions to buffers Florian Weimer
2022-03-17 19:31 ` [PATCH 21/26] stdio-common: Add lock optimization to vfprintf and vfwprintf Florian Weimer
2022-03-17 19:31 ` [PATCH 22/26] libio: Convert __vsprintf_internal to buffers Florian Weimer
2022-03-17 19:31 ` [PATCH 23/26] libio: Convert __vasprintf_internal " Florian Weimer
2022-03-17 19:31 ` [PATCH 24/26] libio: Convert __vdprintf_internal " Florian Weimer
2022-03-17 19:32 ` [PATCH 25/26] libio: Convert __obstack_vprintf_internal to buffers (bug 27124) Florian Weimer
2022-03-17 19:32 ` [PATCH 26/26] libio: Convert __vswprintf_internal to buffers (bug 27857) Florian Weimer

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=f891ee37374a8b0af60c93bff8353206d35426cd.1647544751.git.fweimer@redhat.com \
    --to=fweimer@redhat.com \
    --cc=libc-alpha@sourceware.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).