From: "H.J. Lu" <hjl.tools@gmail.com>
To: Noah Goldstein <goldstein.w.n@gmail.com>
Cc: GNU C Library <libc-alpha@sourceware.org>,
"Carlos O'Donell" <carlos@systemhalted.org>
Subject: Re: [PATCH v1 02/23] benchtests: Add random benchmark in bench-strchr.c
Date: Thu, 24 Mar 2022 11:44:15 -0700 [thread overview]
Message-ID: <CAMe9rOo8q0HT4txQumG-wdVhTWLqqfDWd5ZGz+H1_igh857TXQ@mail.gmail.com> (raw)
In-Reply-To: <20220323215734.3927131-2-goldstein.w.n@gmail.com>
On Wed, Mar 23, 2022 at 2:58 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> Add benchmark that randomizes whether return should be NULL or pointer
> to CHAR. The rationale is on many architectures there is a choice
> between a predicate execution option (i.e cmovcc on x86) or a branch.
>
> On x86 the results for cmovcc vs branch are something along the lines
> of the following:
>
> perc-zero, Br On Result, Time Br / Time cmov
> 0.10, 1, ,0.983
> 0.10, 0, ,1.246
> 0.25, 1, ,1.035
> 0.25, 0, ,1.49
> 0.33, 1, ,1.016
> 0.33, 0, ,1.579
> 0.50, 1, ,1.228
> 0.50, 0, ,1.739
> 0.66, 1, ,1.039
> 0.66, 0, ,1.764
> 0.75, 1, ,0.996
> 0.75, 0, ,1.642
> 0.90, 1, ,1.071
> 0.90, 0, ,1.409
> 1.00, 1, ,0.937
> 1.00, 0, ,0.999
> ---
> benchtests/bench-strchr.c | 143 ++++++++++++++++++++++++++++++++++++++
> 1 file changed, 143 insertions(+)
>
> diff --git a/benchtests/bench-strchr.c b/benchtests/bench-strchr.c
> index 203900d4ad..54640bde7e 100644
> --- a/benchtests/bench-strchr.c
> +++ b/benchtests/bench-strchr.c
> @@ -53,6 +53,11 @@
> # define SMALL_CHAR 851
> #endif /* WIDE */
>
> +#ifdef USE_FOR_STRCHRNUL
> +# define DO_RAND_TEST(...)
> +#else
> +# define DO_RAND_TEST(...) do_rand_test(__VA_ARGS__)
> +#endif
> #ifdef USE_FOR_STRCHRNUL
> # define NULLRET(endptr) endptr
> #else
> @@ -74,6 +79,133 @@ simple_STRCHR (const CHAR *s, int c)
> IMPL (simple_STRCHR, 0)
> IMPL (STRCHR, 1)
>
> +#ifndef USE_FOR_STRCHRNUL
> +/* Random benchmarks for strchr (if return is CHAR or NULL). The
> + rational for the benchmark is returning null/char can be done with
> + predicate execution (i.e cmovcc on x86) or a branch. */
> +
> +
> +/* Large enough that full history can't be stored in BHT. */
> +#define NUM_SEARCH_CHARS 2048
> +
> +/* Expectation is usecases of strchr check the return. Otherwise
> + strchrnul would almost always be better. Since there is another
> + branch coming we want to test the case where a potential branch in
> + strchr can be used to skip a later mispredict because of the
> + relationship between the two branches. */
> +static void __attribute__ ((noinline, noclone))
> +do_one_rand_plus_branch_test (json_ctx_t *json_ctx, impl_t *impl,
> + const CHAR *s, const CHAR *c)
> +{
> + size_t i, iters = INNER_LOOP_ITERS_LARGE;
> + int must_execute = 0;
> + timing_t start, stop, cur;
> + TIMING_NOW (start);
> + for (i = 0; i < iters; ++i)
> + {
> + if (CALL (impl, s, c[i % NUM_SEARCH_CHARS]))
> + {
> + /* We just need something that will force compiler to emit
> + a branch instead of conditional execution. */
> + ++must_execute;
> + asm volatile("" : : :);
> + }
> + }
> + TIMING_NOW (stop);
> +
> + TIMING_DIFF (cur, start, stop);
> +
> + json_element_double (json_ctx, (double)cur / (double)iters);
> +}
> +
> +static void __attribute__ ((noinline, noclone))
> +do_one_rand_test (json_ctx_t *json_ctx, impl_t *impl, const CHAR *s,
> + const CHAR *c)
> +{
> + size_t i, iters = INNER_LOOP_ITERS_LARGE;
> + timing_t start, stop, cur;
> + TIMING_NOW (start);
> + for (i = 0; i < iters; ++i)
> + {
> + CALL (impl, s, c[i % NUM_SEARCH_CHARS]);
> + }
> + TIMING_NOW (stop);
> +
> + TIMING_DIFF (cur, start, stop);
> +
> + json_element_double (json_ctx, (double)cur / (double)iters);
> +}
> +
> +static void
> +do_rand_test (json_ctx_t *json_ctx, size_t align, size_t pos, size_t len,
> + float perc_zero)
> +{
> + size_t i;
> + int perc_zero_int;
> + CHAR *buf = (CHAR *)buf1;
> + CHAR *c = (CHAR *)buf2;
> + align &= 127;
> + if ((align + len) * sizeof (CHAR) >= page_size)
> + return;
> +
> + /* Test is only interesting if we can hit both cases. */
> + if (pos >= len)
> + return;
> +
> + /* Segfault if we run the test. */
> + if (NUM_SEARCH_CHARS * sizeof (CHAR) > page_size)
> + return;
> +
> + for (i = 0; i < len; ++i)
> + {
> + buf[align + i] = 2;
> + }
> + buf[align + len] = 0;
> + buf[align + pos] = 1;
> +
> + perc_zero_int = perc_zero * RAND_MAX;
> + for (i = 0; i < NUM_SEARCH_CHARS; ++i)
> + {
> + if (rand () > perc_zero_int)
> + c[i] = 0;
> + else
> + c[i] = 1;
> + }
> + {
> + json_element_object_begin (json_ctx);
> + json_attr_uint (json_ctx, "rand", 1);
> + json_attr_uint (json_ctx, "branch", 1);
> + json_attr_double (json_ctx, "perc-zero", perc_zero);
> + json_attr_uint (json_ctx, "length", len);
> + json_attr_uint (json_ctx, "pos", pos);
> + json_attr_uint (json_ctx, "alignment", align);
> + json_array_begin (json_ctx, "timings");
> +
> + FOR_EACH_IMPL (impl, 0)
> + do_one_rand_plus_branch_test (json_ctx, impl, buf + align, c);
> +
> + json_array_end (json_ctx);
> + json_element_object_end (json_ctx);
> + }
> + {
> + json_element_object_begin (json_ctx);
> + json_attr_uint (json_ctx, "rand", 1);
> + json_attr_uint (json_ctx, "branch", 0);
> + json_attr_double (json_ctx, "perc-zero", perc_zero);
> + json_attr_uint (json_ctx, "length", len);
> + json_attr_uint (json_ctx, "pos", pos);
> + json_attr_uint (json_ctx, "alignment", align);
> + json_array_begin (json_ctx, "timings");
> +
> + FOR_EACH_IMPL (impl, 0)
> + do_one_rand_test (json_ctx, impl, buf + align, c);
> +
> + json_array_end (json_ctx);
> + json_element_object_end (json_ctx);
> + }
> +}
> +#endif
> +
> static void
> do_one_test (json_ctx_t *json_ctx, impl_t *impl, const CHAR *s, int c,
> const CHAR *exp_res)
> @@ -136,6 +268,7 @@ do_test (json_ctx_t *json_ctx, size_t align, size_t pos, size_t len,
> result = NULLRET (buf + align + len);
>
> json_element_object_begin (json_ctx);
> + json_attr_uint (json_ctx, "rand", 0);
> json_attr_uint (json_ctx, "length", len);
> json_attr_uint (json_ctx, "pos", pos);
> json_attr_uint (json_ctx, "seek_char", seek_char);
> @@ -234,6 +367,16 @@ test_main (void)
> do_test (&json_ctx, 0, i, i + 1, 0, BIG_CHAR);
> }
>
> + DO_RAND_TEST(&json_ctx, 0, 15, 16, 0.0);
> + DO_RAND_TEST(&json_ctx, 0, 15, 16, 0.1);
> + DO_RAND_TEST(&json_ctx, 0, 15, 16, 0.25);
> + DO_RAND_TEST(&json_ctx, 0, 15, 16, 0.33);
> + DO_RAND_TEST(&json_ctx, 0, 15, 16, 0.5);
> + DO_RAND_TEST(&json_ctx, 0, 15, 16, 0.66);
> + DO_RAND_TEST(&json_ctx, 0, 15, 16, 0.75);
> + DO_RAND_TEST(&json_ctx, 0, 15, 16, 0.9);
> + DO_RAND_TEST(&json_ctx, 0, 15, 16, 1.0);
> +
> json_array_end (&json_ctx);
> json_attr_object_end (&json_ctx);
> json_attr_object_end (&json_ctx);
> --
> 2.25.1
>
LGTM.
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
Thanks.
--
H.J.
next prev parent reply other threads:[~2022-03-24 18:44 UTC|newest]
Thread overview: 76+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-03-23 21:57 [PATCH v1 01/23] benchtests: Use json-lib " Noah Goldstein
2022-03-23 21:57 ` [PATCH v1 02/23] benchtests: Add random benchmark " Noah Goldstein
2022-03-24 18:44 ` H.J. Lu [this message]
2022-03-23 21:57 ` [PATCH v1 03/23] x86: Code cleanup in strchr-avx2 and comment justifying branch Noah Goldstein
2022-03-24 18:53 ` H.J. Lu
2022-03-24 19:20 ` Noah Goldstein
2022-03-24 19:36 ` H.J. Lu
2022-05-12 19:31 ` Sunil Pandey
2022-03-23 21:57 ` [PATCH v1 04/23] x86: Code cleanup in strchr-evex " Noah Goldstein
2022-03-24 18:54 ` H.J. Lu
2022-05-12 19:32 ` Sunil Pandey
2022-03-23 21:57 ` [PATCH v1 05/23] benchtests: Use json-lib in bench-strpbrk.c Noah Goldstein
2022-03-24 18:54 ` H.J. Lu
2022-03-23 21:57 ` [PATCH v1 06/23] benchtests: Use json-lib in bench-strspn.c Noah Goldstein
2022-03-24 18:54 ` H.J. Lu
2022-03-23 21:57 ` [PATCH v1 07/23] x86: Optimize strcspn and strpbrk in strcspn-c.c Noah Goldstein
2022-03-24 18:55 ` H.J. Lu
2022-05-12 19:34 ` Sunil Pandey
2022-03-23 21:57 ` [PATCH v1 08/23] x86: Optimize strspn in strspn-c.c Noah Goldstein
2022-03-24 18:56 ` H.J. Lu
2022-05-12 19:39 ` Sunil Pandey
2022-03-23 21:57 ` [PATCH v1 09/23] x86: Remove strcspn-sse2.S and use the generic implementation Noah Goldstein
2022-03-24 18:57 ` H.J. Lu
2022-05-12 19:40 ` Sunil Pandey
2022-03-23 21:57 ` [PATCH v1 10/23] x86: Remove strpbrk-sse2.S " Noah Goldstein
2022-03-24 18:57 ` H.J. Lu
2022-05-12 19:41 ` Sunil Pandey
2022-03-23 21:57 ` [PATCH v1 11/23] x86: Remove strspn-sse2.S " Noah Goldstein
2022-03-24 18:57 ` H.J. Lu
2022-05-12 19:42 ` Sunil Pandey
2022-03-23 21:57 ` [PATCH v1 12/23] x86: Fix fallback for wcsncmp_avx2 in strcmp-avx2.S [BZ #28896] Noah Goldstein
2022-03-24 18:59 ` H.J. Lu
2022-03-24 19:18 ` Noah Goldstein
2022-03-24 19:34 ` H.J. Lu
2022-03-24 19:39 ` Noah Goldstein
2022-03-24 20:50 ` [PATCH v2 12/31] " Noah Goldstein
2022-03-24 21:26 ` H.J. Lu
2022-03-24 21:43 ` Noah Goldstein
2022-03-24 21:58 ` H.J. Lu
2022-05-04 6:05 ` Sunil Pandey
2022-03-23 21:57 ` [PATCH v1 13/23] benchtests: Use json-lib in bench-strcasecmp.c Noah Goldstein
2022-03-24 19:00 ` H.J. Lu
2022-03-23 21:57 ` [PATCH v1 14/23] benchtests: Use json-lib in bench-strncasecmp.c Noah Goldstein
2022-03-24 19:00 ` H.J. Lu
2022-03-23 21:57 ` [PATCH v1 15/23] string: Expand page cross tests in test-strcasecmp.c Noah Goldstein
2022-03-24 19:01 ` H.J. Lu
2022-03-23 21:57 ` [PATCH v1 16/23] string: Expand page cross tests in test-strncasecmp.c Noah Goldstein
2022-03-24 19:01 ` H.J. Lu
2022-03-23 21:57 ` [PATCH v1 17/23] x86: Optimize str{n}casecmp TOLOWER logic in strcmp.S Noah Goldstein
2022-03-24 19:02 ` H.J. Lu
2022-05-12 19:44 ` Sunil Pandey
2022-03-23 21:57 ` [PATCH v1 18/23] x86: Optimize str{n}casecmp TOLOWER logic in strcmp-sse42.S Noah Goldstein
2022-03-24 19:02 ` H.J. Lu
2022-05-12 19:45 ` Sunil Pandey
2022-03-23 21:57 ` [PATCH v1 19/23] string: Expand page cross test cases in test-strcmp.c Noah Goldstein
2022-03-24 19:02 ` H.J. Lu
2022-03-23 21:57 ` [PATCH v1 20/23] string: Expand page cross test cases in test-strncmp.c Noah Goldstein
2022-03-24 19:02 ` H.J. Lu
2022-03-23 21:57 ` [PATCH v1 21/23] x86: Add AVX2 optimized str{n}casecmp Noah Goldstein
2022-03-24 19:03 ` H.J. Lu
2022-03-24 22:41 ` [PATCH v3 " Noah Goldstein
2022-03-24 22:41 ` [PATCH v3 22/23] x86: Add EVEX " Noah Goldstein
2022-03-24 23:56 ` [PATCH v4 21/23] x86: Add AVX2 " Noah Goldstein
2022-03-24 23:56 ` [PATCH v4 22/23] x86: Add EVEX " Noah Goldstein
2022-03-25 18:15 ` H.J. Lu
2022-03-25 18:18 ` Noah Goldstein
2022-05-12 19:47 ` Sunil Pandey
2022-05-12 19:52 ` Sunil Pandey
2022-03-25 18:14 ` [PATCH v4 21/23] x86: Add AVX2 " H.J. Lu
2022-05-12 19:52 ` Sunil Pandey
2022-03-23 21:57 ` [PATCH v1 22/23] x86: Add EVEX " Noah Goldstein
2022-03-24 19:04 ` H.J. Lu
2022-03-23 21:57 ` [PATCH v1 23/23] x86: Remove AVX str{n}casecmp Noah Goldstein
2022-03-24 19:04 ` H.J. Lu
2022-05-12 19:54 ` Sunil Pandey
2022-03-24 18:43 ` [PATCH v1 01/23] benchtests: Use json-lib in bench-strchr.c H.J. Lu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=CAMe9rOo8q0HT4txQumG-wdVhTWLqqfDWd5ZGz+H1_igh857TXQ@mail.gmail.com \
--to=hjl.tools@gmail.com \
--cc=carlos@systemhalted.org \
--cc=goldstein.w.n@gmail.com \
--cc=libc-alpha@sourceware.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).