From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2206) id 5ECB3395442B; Fri, 6 May 2022 12:51:31 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 5ECB3395442B MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Content-Type: text/plain; charset="utf-8" From: Siddhesh Poyarekar To: glibc-cvs@sourceware.org Subject: [glibc] benchtests: Add wcrtomb microbenchmark X-Act-Checkin: glibc X-Git-Author: Siddhesh Poyarekar X-Git-Refname: refs/heads/master X-Git-Oldrev: cf73acb596e39af4bd9f32846552cd41d9e17a78 X-Git-Newrev: 050cc5f7c1fc30ae2a071282faed9b6a68ca95ba Message-Id: <20220506125131.5ECB3395442B@sourceware.org> Date: Fri, 6 May 2022 12:51:31 +0000 (GMT) X-BeenThere: glibc-cvs@sourceware.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Glibc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 06 May 2022 12:51:31 -0000 https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=050cc5f7c1fc30ae2a071282faed9b6a68ca95ba commit 050cc5f7c1fc30ae2a071282faed9b6a68ca95ba Author: Siddhesh Poyarekar Date: Fri May 6 18:16:43 2022 +0530 benchtests: Add wcrtomb microbenchmark Add a simple benchmark that measures wcrtomb performance with various locales with 1-4 byte characters. Signed-off-by: Siddhesh Poyarekar Reviewed-by: Florian Weimer Diff: --- benchtests/Makefile | 1 + benchtests/bench-wcrtomb.c | 139 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 140 insertions(+) diff --git a/benchtests/Makefile b/benchtests/Makefile index 149d87e22e..de9de5cf58 100644 --- a/benchtests/Makefile +++ b/benchtests/Makefile @@ -171,6 +171,7 @@ ifeq (no,$(cross-compiling)) wcsmbs-benchset := \ wcpcpy \ wcpncpy \ + wcrtomb \ wcscat \ wcschr \ wcschrnul \ diff --git a/benchtests/bench-wcrtomb.c b/benchtests/bench-wcrtomb.c new file mode 100644 index 0000000000..232a7d59de --- /dev/null +++ b/benchtests/bench-wcrtomb.c @@ -0,0 +1,139 @@ +/* Measure wcrtomb function. + Copyright The GNU Toolchain Authors. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include +#include + +#include "bench-timing.h" +#include "json-lib.h" + +#define NITERS 100000 + +struct test_inputs +{ + const char *locale; + const wchar_t *input_chars; +}; + +/* The inputs represent different types of characters, e.g. RTL, 1 byte, 2 + byte, 3 byte and 4 byte chars. The exact number of inputs per locale + doesn't really matter because we're not looking to compare performance + between locales. */ +struct test_inputs inputs[] = +{ + /* RTL. */ + {"ar_SA.UTF-8", + L",-.،؟ـًُّ٠٢٣٤ءآأؤإئابةتثجحخدذرزسشصضطظعغفقكلمنهوىي"}, + + /* Various mixes of 1 and 2 byte chars. */ + {"cs_CZ.UTF-8", + L",.aAábcCčdDďeEéÉěĚfFghHiIíJlLmMnNňŇoóÓpPqQrřsSšŠTťuUúÚůŮvVWxyýz"}, + + {"el_GR.UTF-8", + L",.αΑβγδΔεΕζηΗθΘιΙκΚλμΜνΝξοΟπΠρΡσΣςτυΥφΦχψω"}, + + {"en_GB.UTF-8", + L",.aAāĀæÆǽǣǢbcCċdDðÐeEēĒfFgGġhHiIīĪlLmMnNoōpPqQrsSTuUūŪvVwxyȝzþÞƿǷ"}, + + {"fr_FR.UTF-8", + L",.aAàâbcCçdDeEéèêëfFghHiIîïjlLmMnNoOôœpPqQrRsSTuUùûvVwxyz"}, + + {"he_IL.UTF-8", + L"',.ִאבגדהוזחטיכךלמםנןסעפףצץקרשת"}, + + /* Devanagari, Japanese, 3-byte chars. */ + {"hi_IN.UTF-8", + L"(।ं०४५७अआइईउऎएओऔकखगघचछजञटडढणतथदधनपफ़बभमयरलवशषसहािीुूृेैोौ्"}, + + {"ja_JP.UTF-8", + L".ー0123456789あアいイうウえエおオかカがきキぎくクぐけケげこコごさサざ"}, + + /* More mixtures of 1 and 2 byte chars. */ + {"ru_RU.UTF-8", + L",.аАбвВгдДеЕёЁжЖзЗийЙкКлЛмМнНоОпПрстТуУфФхХЦчшШщъыЫьэЭюЮя"}, + + {"sr_RS.UTF-8", + L",.aAbcCćčdDđĐeEfgGhHiIlLmMnNoOpPqQrsSšŠTuUvVxyzZž"}, + + {"sv_SE.UTF-8", + L",.aAåÅäÄæÆbBcCdDeEfFghHiIjlLmMnNoOöÖpPqQrsSTuUvVwxyz"}, + + /* Chinese, 3-byte chars */ + {"zh_CN.UTF-8", + L"一七三下不与世両並中串主乱予事二五亡京人今仕付以任企伎会伸住佐体作使"}, + + /* 4-byte chars, because smileys are the universal language and we want to + ensure optimal performance with them 😊. */ + {"en_US.UTF-8", + L"😀😁😂😃😄😅😆😇😈😉😊😋😌😍😎😏😐😑😒😓😔😕😖😗😘😙😚😛😜😝😞😟😠😡"} +}; + +char buf[MB_LEN_MAX]; +size_t ret; + +int +main (int argc, char **argv) +{ + json_ctx_t json_ctx; + json_init (&json_ctx, 0, stdout); + json_document_begin (&json_ctx); + + json_attr_string (&json_ctx, "timing_type", TIMING_TYPE); + json_attr_object_begin (&json_ctx, "functions"); + json_attr_object_begin (&json_ctx, "wcrtomb"); + + for (size_t i = 0; i < array_length (inputs); i++) + { + json_attr_object_begin (&json_ctx, inputs[i].locale); + setlocale (LC_ALL, inputs[i].locale); + + timing_t min = 0x7fffffffffffffff, max = 0, total = 0; + const wchar_t *inp = inputs[i].input_chars; + const size_t len = wcslen (inp); + mbstate_t s; + + memset (&s, '\0', sizeof (s)); + + for (size_t n = 0; n < NITERS; n++) + { + timing_t start, end, elapsed; + + TIMING_NOW (start); + for (size_t j = 0; j < len; j++) + ret = wcrtomb (buf, inp[j], &s); + TIMING_NOW (end); + TIMING_DIFF (elapsed, start, end); + if (min > elapsed) + min = elapsed; + if (max < elapsed) + max = elapsed; + TIMING_ACCUM (total, elapsed); + } + json_attr_double (&json_ctx, "max", max); + json_attr_double (&json_ctx, "min", min); + json_attr_double (&json_ctx, "mean", total / NITERS); + json_attr_object_end (&json_ctx); + } + + json_attr_object_end (&json_ctx); + json_attr_object_end (&json_ctx); + json_document_end (&json_ctx); +}