From: "Guo, Wangyang" <wangyang.guo@intel.com>
To: Noah Goldstein <goldstein.w.n@gmail.com>
Cc: GNU C Library <libc-alpha@sourceware.org>
Subject: Re: [PATCH] benchtests: Add pthread-mutex-locks bench
Date: Thu, 21 Apr 2022 09:47:57 +0800 [thread overview]
Message-ID: <0b43186d-420e-8fe7-13fd-dde71c364e1c@intel.com> (raw)
In-Reply-To: <CAFUsyfKbvyoNW6OijQGh+bK+TwSSwUbWoOkVzmJ9=hhoF5BJrg@mail.gmail.com>
On 4/21/2022 1:57 AM, Noah Goldstein via Libc-alpha wrote:
> On Wed, Apr 20, 2022 at 12:49 AM Wangyang Guo <wangyang.guo@intel.com> wrote:
>>
>> Benchmark for testing pthread mutex locks performance with different
>> threads and critical sections.
>>
>> The test configuration consists of 3 parts:
>> 1. thread number
>> 2. critical-section length
>> 3. non-critical-section length
>>
>> Thread number starts from 1 and increased by 2x until num of CPU cores
>> (nprocs). An additional over-saturation case (1.25 * nprocs) is also
>> included.
>> Critical-section is represented by a loop of shared do_filler(),
>> length can be determined by the loop iters.
>> Non-critical-section is similiar to the critical-section, except it's
>> based on non-shared do_filler().
>>
>> Currently, adaptive pthread_mutex lock is tested.
>> ---
>> benchtests/Makefile | 2 +
>> benchtests/bench-pthread-mutex-locks.c | 297 +++++++++++++++++++++++++
>> 2 files changed, 299 insertions(+)
>> create mode 100644 benchtests/bench-pthread-mutex-locks.c
>>
>> diff --git a/benchtests/Makefile b/benchtests/Makefile
>> index 8dfca592fd..b477042e6c 100644
>> --- a/benchtests/Makefile
>> +++ b/benchtests/Makefile
>> @@ -102,6 +102,7 @@ endif
>>
>> bench-pthread := \
>> pthread-locks \
>> + pthread-mutex-locks \
>> pthread_once \
>> thread_create \
>> # bench-pthread
>> @@ -281,6 +282,7 @@ $(addprefix $(objpfx)bench-,$(math-benchset)): $(libm-benchtests)
>> $(addprefix $(objpfx)bench-,$(bench-pthread)): $(thread-library-benchtests)
>> $(addprefix $(objpfx)bench-,$(bench-malloc)): $(thread-library-benchtests)
>> $(addprefix $(objpfx)bench-,pthread-locks): $(libm-benchtests)
>> +$(addprefix $(objpfx)bench-,pthread-mutex-locks): $(libm-benchtests)
>>
>>
>>
>> diff --git a/benchtests/bench-pthread-mutex-locks.c b/benchtests/bench-pthread-mutex-locks.c
>> new file mode 100644
>> index 0000000000..76f7b43635
>> --- /dev/null
>> +++ b/benchtests/bench-pthread-mutex-locks.c
>> @@ -0,0 +1,297 @@
>> +/* Measure mutex_lock for different threads and critical sections.
>> + Copyright (C) 2020-2022 Free Software Foundation, Inc.
>> + This file is part of the GNU C Library.
>> +
>> + The GNU C Library is free software; you can redistribute it and/or
>> + modify it under the terms of the GNU Lesser General Public
>> + License as published by the Free Software Foundation; either
>> + version 2.1 of the License, or (at your option) any later version.
>> +
>> + The GNU C Library is distributed in the hope that it will be useful,
>> + but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>> + Lesser General Public License for more details.
>> +
>> + You should have received a copy of the GNU Lesser General Public
>> + License along with the GNU C Library; if not, see
>> + <https://www.gnu.org/licenses/>. */
>> +
>> +#define TEST_MAIN
>> +#define TEST_NAME "pthread-mutex-locks"
>> +#define TIMEOUT (20 * 60)
>> +
>> +#include <stdio.h>
>> +#include <stdlib.h>
>> +#include <string.h>
>> +#include <unistd.h>
>> +#include <math.h>
>> +#include <pthread.h>
>> +#include <sys/time.h>
>> +#include <sys/sysinfo.h>
>> +#include "bench-timing.h"
>> +#include "json-lib.h"
>> +
>> +static pthread_mutex_t lock;
>> +static pthread_mutexattr_t attr;
>> +static pthread_barrier_t barrier;
>> +
>> +#define START_ITERS 1000
>> +
>> +#pragma GCC push_options
>> +#pragma GCC optimize(1)
>> +
>> +static int __attribute__ ((noinline)) fibonacci (int i)
>> +{
>> + asm("");
>> + if (i > 2)
>> + return fibonacci (i - 1) + fibonacci (i - 2);
>> + return 10 + i;
>> +}
>> +
>> +static void
>> +do_filler (void)
>> +{
>> + char buf1[512], buf2[512];
>> + int f = fibonacci (4);
>> + memcpy (buf1, buf2, f);
>> +}
>> +
>> +static void
>> +do_filler_shared (void)
>> +{
>> + static char buf1[512], buf2[512];
>> + int f = fibonacci (4);
>> + memcpy (buf1, buf2, f);
>> +}
>> +
>> +#pragma GCC pop_options
>> +
>> +#define UNIT_WORK_CRT do_filler_shared ()
>> +#define UNIT_WORK_NON_CRT do_filler ()
>> +
>> +static inline void
>> +critical_section (int length)
>> +{
>> + for (int i = length; i >= 0; i--)
>> + UNIT_WORK_CRT;
>> +}
>> +
>> +static inline void
>> +non_critical_section (int length)
>> +{
>> + for (int i = length; i >= 0; i--)
>> + UNIT_WORK_NON_CRT;
>> +}
>> +
>> +typedef struct Worker_Params
>> +{
>> + long iters;
>> + int crt_len;
>> + int non_crt_len;
>> + timing_t duration;
>> +} Worker_Params;
>> +
>> +static void *
>> +worker (void *v)
>> +{
>> + timing_t start, stop;
>> + Worker_Params *p = (Worker_Params *) v;
>> + long iters = p->iters;
>> + int crt_len = p->crt_len;
>> + int non_crt_len = p->non_crt_len;
>> +
>> + pthread_barrier_wait (&barrier);
>> + TIMING_NOW (start);
>> + while (iters--)
>> + {
>> + pthread_mutex_lock (&lock);
>> + critical_section (crt_len);
>> + pthread_mutex_unlock (&lock);
>> + non_critical_section (non_crt_len);
>> + }
>> + TIMING_NOW (stop);
>> +
>> + TIMING_DIFF (p->duration, start, stop);
>> + return NULL;
>> +}
>> +
>> +static double
>> +do_one_test (int num_threads, int crt_len, int non_crt_len, long iters)
>> +{
>> + int i;
>> + timing_t mean;
>> + Worker_Params *p, params[num_threads];
>> + pthread_t threads[num_threads];
>> +
>> + pthread_mutex_init (&lock, &attr);
>> + pthread_barrier_init (&barrier, NULL, num_threads);
>> +
>> + for (i = 0; i < num_threads; i++)
>> + {
>> + p = ¶ms[i];
>> + p->iters = iters;
>> + p->crt_len = crt_len;
>> + p->non_crt_len = non_crt_len;
>> + pthread_create (&threads[i], NULL, worker, (void *) p);
>> + }
>> + for (i = 0; i < num_threads; i++)
>> + pthread_join (threads[i], NULL);
>> +
>> + pthread_mutex_destroy (&lock);
>> + pthread_barrier_destroy (&barrier);
>> +
>> + mean = 0;
>> + for (i = 0; i < num_threads; i++)
>> + mean += params[i].duration;
>> + mean /= num_threads;
>> + return mean;
>> +}
>> +
>> +#define RUN_COUNT 10
>> +#define MIN_TEST_SEC 0.01
>> +
>> +static void
>> +do_bench_1 (int num_threads, int crt_len, int non_crt_len, json_ctx_t *js)
>> +{
>> + timing_t cur;
>> + struct timeval ts, te;
>> + double tsd, ted, td;
>> + long iters, iters_limit, total_iters;
>> + timing_t curs[RUN_COUNT + 2];
>> + int i, j;
>> + double mean, stdev;
>> +
>> + iters = START_ITERS;
>> + iters_limit = LONG_MAX / 100;
>> +
>> + while (1)
>> + {
>> + gettimeofday (&ts, NULL);
>> + cur = do_one_test (num_threads, crt_len, non_crt_len, iters);
>> + gettimeofday (&te, NULL);
>> + /* Make sure the test to run at least MIN_TEST_SEC. */
>> + tsd = ts.tv_sec + ts.tv_usec / 1000000.0;
>> + ted = te.tv_sec + te.tv_usec / 1000000.0;
>> + td = ted - tsd;
>> + if (td >= MIN_TEST_SEC || iters >= iters_limit)
>> + break;
>> +
>> + iters *= 10;
>> + }
>> +
>> + curs[0] = cur;
>> + for (i = 1; i < RUN_COUNT + 2; i++)
>> + curs[i] = do_one_test (num_threads, crt_len, non_crt_len, iters);
>> +
>> + /* Sort the results so we can discard the fastest and slowest
>> + times as outliers. */
>> + for (i = 0; i < RUN_COUNT + 1; i++)
>> + for (j = i + 1; j < RUN_COUNT + 2; j++)
>> + if (curs[i] > curs[j])
>> + {
>> + timing_t temp = curs[i];
>> + curs[i] = curs[j];
>> + curs[j] = temp;
>> + }
>> +
>> + /* Calculate mean and standard deviation. */
>> + mean = 0.0;
>> + total_iters = iters * num_threads;
>> + for (i = 1; i < RUN_COUNT + 1; i++)
>> + mean += (double) curs[i] / (double) total_iters;
>> + mean /= RUN_COUNT;
>> +
>> + stdev = 0.0;
>> + for (i = 1; i < RUN_COUNT + 1; i++)
>> + {
>> + double s = (double) curs[i] / (double) total_iters - mean;
>> + stdev += s * s;
>> + }
>> + stdev = sqrt (stdev / (RUN_COUNT - 1));
>> +
>> + json_element_object_begin (js);
>> + json_attr_uint (js, "thread", num_threads);
>> + json_attr_double (js, "mean", mean);
>> + json_attr_double (js, "stdev", stdev);
>> + json_attr_double (js, "min-outlier",
>> + (double) curs[0] / (double) total_iters);
>> + json_attr_double (js, "min", (double) curs[1] / (double) total_iters);
>> + json_attr_double (js, "max",
>> + (double) curs[RUN_COUNT] / (double) total_iters);
>> + json_attr_double (js, "max-outlier",
>> + (double) curs[RUN_COUNT + 1] / (double) total_iters);
>> + json_element_object_end (js);
>> +}
>> +
>> +#define TH_CONF_MAX 10
>> +
>> +int
>> +do_bench (void)
>> +{
>> + int rv = 0;
>> + json_ctx_t json_ctx;
>> + int i, j, k;
>> + int th_num, th_conf, nprocs;
>> + int threads[TH_CONF_MAX];
>> + int crt_lens[] = { 0, 1, 2, 4, 8, 16, 32, 64, 128 };
>> + int non_crt_lens[] = { 1, 32, 128 };
>> +
>> + json_init (&json_ctx, 2, stdout);
>> + json_document_begin (&json_ctx);
>> + json_attr_string (&json_ctx, "timing_type", TIMING_TYPE);
>> +
>> + /* The thread config begins from 1, and increases by 2x until nprocs.
>> + We also wants to test over-saturation case (1.25*nprocs). */
>> + nprocs = get_nprocs ();
>> + th_num = 1;
>> + for (th_conf = 0; th_conf < (TH_CONF_MAX - 2) && th_num < nprocs; th_conf++)
>> + {
>> + threads[th_conf] = th_num;
>> + th_num <<= 1;
>> + }
>> + threads[th_conf++] = nprocs;
>> + threads[th_conf++] = nprocs + nprocs / 4;
>> +
>> + json_array_begin (&json_ctx, "threads");
>> + for (i = 0; i < th_conf; i++)
>> + json_element_int (&json_ctx, threads[i]);
>> + json_array_end (&json_ctx);
>> +
>> + pthread_mutexattr_init (&attr);
>> + pthread_mutexattr_settype (&attr, PTHREAD_MUTEX_ADAPTIVE_NP);
>> + json_attr_string (&json_ctx, "lock-type", "adaptive-mutex");
>> +
>> + json_array_begin (&json_ctx, "non-critical-sections");
>> + for (k = 0; k < (sizeof (non_crt_lens) / sizeof (int)); k++)
>> + {
>> + int non_crt_len = non_crt_lens[k];
>> + json_element_object_begin (&json_ctx);
>> + json_attr_uint (&json_ctx, "non-critical-length", non_crt_len);
>> + json_array_begin (&json_ctx, "critical-sections");
>> + for (j = 0; j < (sizeof (crt_lens) / sizeof (int)); j++)
>> + {
>> + int crt_len = crt_lens[j];
>> + json_element_object_begin (&json_ctx);
>> + json_attr_uint (&json_ctx, "critical-length", crt_len);
>> + json_array_begin (&json_ctx, "results");
>> + for (i = 0; i < th_conf; i++)
>> + {
>> + th_num = threads[i];
>> + do_bench_1 (th_num, crt_len, non_crt_len, &json_ctx);
>> + }
>> + json_array_end (&json_ctx);
>> + json_element_object_end (&json_ctx);
>> + }
>> + json_array_end (&json_ctx);
>> + json_element_object_end (&json_ctx);
>> + }
>> + json_array_end (&json_ctx);
>> +
>> + json_document_end (&json_ctx);
>> +
>> + return rv;
>> +}
>> +
>> +#define TEST_FUNCTION do_bench ()
>> +
>> +#include "../test-skeleton.c"
>> --
>> 2.35.1
>>
>
> When I run this I get the following error:
>
> $> (cd /home/noah/programs/opensource/glibc-dev/build/glibc/; unset
> LD_LIBRARY_PATH; make --silent; make bench
> BENCHSET="bench-pthread";);
>
> Running /home/noah/programs/opensource/glibc-dev/build/glibc/benchtests/bench-pthread-locks
> Running /home/noah/programs/opensource/glibc-dev/build/glibc/benchtests/bench-pthread-mutex-locks
> Running /home/noah/programs/opensource/glibc-dev/build/glibc/benchtests/bench-pthread_once
> Running /home/noah/programs/opensource/glibc-dev/build/glibc/benchtests/bench-thread_create
> Benchmark output in
> /home/noah/programs/opensource/glibc-dev/build/glibc/benchtests/bench.out
> is not JSON.
> make[1]: *** [Makefile:412: bench-func] Error 65
> rm /home/noah/programs/opensource/glibc-dev/build/glibc/benchtests/bench-thread_create.c
> /home/noah/programs/opensource/glibc-dev/build/glibc/benchtests/bench-pthread_once.c
> make[1]: Leaving directory
> '/home/noah/programs/opensource/glibc-dev/src/glibc/benchtests'
> make: *** [Makefile:16: bench] Error 2
>
I am able to reproduce this after install python jsonschema module.
The json output need to be a "key": {...} format in order to fit in
bench.out.
I will fix it.
next prev parent reply other threads:[~2022-04-21 1:48 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-04-20 5:48 Wangyang Guo
2022-04-20 17:57 ` Noah Goldstein
2022-04-21 1:47 ` Guo, Wangyang [this message]
2022-04-21 3:28 ` [PATCH v2] " Wangyang Guo
2022-04-21 13:13 ` Noah Goldstein
2022-04-22 0:58 ` Guo, Wangyang
2022-04-24 3:04 ` Noah Goldstein
2022-04-27 20:38 ` H.J. Lu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=0b43186d-420e-8fe7-13fd-dde71c364e1c@intel.com \
--to=wangyang.guo@intel.com \
--cc=goldstein.w.n@gmail.com \
--cc=libc-alpha@sourceware.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).