From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <siddhesh@sourceware.org>
Received: by sourceware.org (Postfix, from userid 2206)
 id 5ECB3395442B; Fri,  6 May 2022 12:51:31 +0000 (GMT)
DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 5ECB3395442B
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
Content-Type: text/plain; charset="utf-8"
From: Siddhesh Poyarekar <siddhesh@sourceware.org>
To: glibc-cvs@sourceware.org
Subject: [glibc] benchtests: Add wcrtomb microbenchmark
X-Act-Checkin: glibc
X-Git-Author: Siddhesh Poyarekar <siddhesh@sourceware.org>
X-Git-Refname: refs/heads/master
X-Git-Oldrev: cf73acb596e39af4bd9f32846552cd41d9e17a78
X-Git-Newrev: 050cc5f7c1fc30ae2a071282faed9b6a68ca95ba
Message-Id: <20220506125131.5ECB3395442B@sourceware.org>
Date: Fri,  6 May 2022 12:51:31 +0000 (GMT)
X-BeenThere: glibc-cvs@sourceware.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Glibc-cvs mailing list <glibc-cvs.sourceware.org>
List-Unsubscribe: <https://sourceware.org/mailman/options/glibc-cvs>,
 <mailto:glibc-cvs-request@sourceware.org?subject=unsubscribe>
List-Archive: <https://sourceware.org/pipermail/glibc-cvs/>
List-Help: <mailto:glibc-cvs-request@sourceware.org?subject=help>
List-Subscribe: <https://sourceware.org/mailman/listinfo/glibc-cvs>,
 <mailto:glibc-cvs-request@sourceware.org?subject=subscribe>
X-List-Received-Date: Fri, 06 May 2022 12:51:31 -0000

https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=050cc5f7c1fc30ae2a071282faed9b6a68ca95ba

commit 050cc5f7c1fc30ae2a071282faed9b6a68ca95ba
Author: Siddhesh Poyarekar <siddhesh@sourceware.org>
Date:   Fri May 6 18:16:43 2022 +0530

    benchtests: Add wcrtomb microbenchmark
    
    Add a simple benchmark that measures wcrtomb performance with various
    locales with 1-4 byte characters.
    
    Signed-off-by: Siddhesh Poyarekar <siddhesh@sourceware.org>
    Reviewed-by: Florian Weimer <fweimer@redhat.com>

Diff:
---
 benchtests/Makefile        |   1 +
 benchtests/bench-wcrtomb.c | 139 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 140 insertions(+)

diff --git a/benchtests/Makefile b/benchtests/Makefile
index 149d87e22e..de9de5cf58 100644
--- a/benchtests/Makefile
+++ b/benchtests/Makefile
@@ -171,6 +171,7 @@ ifeq (no,$(cross-compiling))
 wcsmbs-benchset := \
   wcpcpy \
   wcpncpy \
+  wcrtomb \
   wcscat \
   wcschr \
   wcschrnul \
diff --git a/benchtests/bench-wcrtomb.c b/benchtests/bench-wcrtomb.c
new file mode 100644
index 0000000000..232a7d59de
--- /dev/null
+++ b/benchtests/bench-wcrtomb.c
@@ -0,0 +1,139 @@
+/* Measure wcrtomb function.
+   Copyright The GNU Toolchain Authors.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <array_length.h>
+#include <limits.h>
+#include <locale.h>
+#include <string.h>
+#include <wchar.h>
+
+#include "bench-timing.h"
+#include "json-lib.h"
+
+#define NITERS 100000
+
+struct test_inputs
+{
+  const char *locale;
+  const wchar_t *input_chars;
+};
+
+/* The inputs represent different types of characters, e.g. RTL, 1 byte, 2
+   byte, 3 byte and 4 byte chars.  The exact number of inputs per locale
+   doesn't really matter because we're not looking to compare performance
+   between locales.  */
+struct test_inputs inputs[] =
+{
+  /* RTL.  */
+  {"ar_SA.UTF-8",
+   L",-.،؟ـًُّ٠٢٣٤ءآأؤإئابةتثجحخدذرزسشصضطظعغفقكلمنهوىي"},
+
+  /* Various mixes of 1 and 2 byte chars.  */
+  {"cs_CZ.UTF-8",
+   L",.aAábcCčdDďeEéÉěĚfFghHiIíJlLmMnNňŇoóÓpPqQrřsSšŠTťuUúÚůŮvVWxyýz"},
+
+  {"el_GR.UTF-8",
+   L",.αΑβγδΔεΕζηΗθΘιΙκΚλμΜνΝξοΟπΠρΡσΣςτυΥφΦχψω"},
+
+  {"en_GB.UTF-8",
+   L",.aAāĀæÆǽǣǢbcCċdDðÐeEēĒfFgGġhHiIīĪlLmMnNoōpPqQrsSTuUūŪvVwxyȝzþÞƿǷ"},
+
+  {"fr_FR.UTF-8",
+   L",.aAàâbcCçdDeEéèêëfFghHiIîïjlLmMnNoOôœpPqQrRsSTuUùûvVwxyz"},
+
+  {"he_IL.UTF-8",
+   L"',.ִאבגדהוזחטיכךלמםנןסעפףצץקרשת"},
+
+  /* Devanagari, Japanese, 3-byte chars.  */
+  {"hi_IN.UTF-8",
+   L"(।ं०४५७अआइईउऎएओऔकखगघचछजञटडढणतथदधनपफ़बभमयरलवशषसहािीुूृेैोौ्"},
+
+  {"ja_JP.UTF-8",
+   L".ー0123456789あアいイうウえエおオかカがきキぎくクぐけケげこコごさサざ"},
+
+  /* More mixtures of 1 and 2 byte chars.  */
+  {"ru_RU.UTF-8",
+   L",.аАбвВгдДеЕёЁжЖзЗийЙкКлЛмМнНоОпПрстТуУфФхХЦчшШщъыЫьэЭюЮя"},
+
+  {"sr_RS.UTF-8",
+   L",.aAbcCćčdDđĐeEfgGhHiIlLmMnNoOpPqQrsSšŠTuUvVxyzZž"},
+
+  {"sv_SE.UTF-8",
+   L",.aAåÅäÄæÆbBcCdDeEfFghHiIjlLmMnNoOöÖpPqQrsSTuUvVwxyz"},
+
+  /* Chinese, 3-byte chars  */
+  {"zh_CN.UTF-8",
+   L"一七三下不与世両並中串主乱予事二五亡京人今仕付以任企伎会伸住佐体作使"},
+
+  /* 4-byte chars, because smileys are the universal language and we want to
+     ensure optimal performance with them 😊.  */
+  {"en_US.UTF-8",
+   L"😀😁😂😃😄😅😆😇😈😉😊😋😌😍😎😏😐😑😒😓😔😕😖😗😘😙😚😛😜😝😞😟😠😡"}
+};
+
+char buf[MB_LEN_MAX];
+size_t ret;
+
+int
+main (int argc, char **argv)
+{
+  json_ctx_t json_ctx;
+  json_init (&json_ctx, 0, stdout);
+  json_document_begin (&json_ctx);
+
+  json_attr_string (&json_ctx, "timing_type", TIMING_TYPE);
+  json_attr_object_begin (&json_ctx, "functions");
+  json_attr_object_begin (&json_ctx, "wcrtomb");
+
+  for (size_t i = 0; i < array_length (inputs); i++)
+    {
+      json_attr_object_begin (&json_ctx, inputs[i].locale);
+      setlocale (LC_ALL, inputs[i].locale);
+
+      timing_t min = 0x7fffffffffffffff, max = 0, total = 0;
+      const wchar_t *inp = inputs[i].input_chars;
+      const size_t len = wcslen (inp);
+      mbstate_t s;
+
+      memset (&s, '\0', sizeof (s));
+
+      for (size_t n = 0; n < NITERS; n++)
+	{
+	  timing_t start, end, elapsed;
+
+	  TIMING_NOW (start);
+	  for (size_t j = 0; j < len; j++)
+	    ret = wcrtomb (buf, inp[j], &s);
+	  TIMING_NOW (end);
+	  TIMING_DIFF (elapsed, start, end);
+	  if (min > elapsed)
+	    min = elapsed;
+	  if (max < elapsed)
+	    max = elapsed;
+	  TIMING_ACCUM (total, elapsed);
+	}
+      json_attr_double (&json_ctx, "max", max);
+      json_attr_double (&json_ctx, "min", min);
+      json_attr_double (&json_ctx, "mean", total / NITERS);
+      json_attr_object_end (&json_ctx);
+    }
+
+  json_attr_object_end (&json_ctx);
+  json_attr_object_end (&json_ctx);
+  json_document_end (&json_ctx);
+}