From: Sunil Pandey <skpgkp2@gmail.com>
To: Noah Goldstein <goldstein.w.n@gmail.com>
Cc: GNU C Library <libc-alpha@sourceware.org>
Subject: Re: [PATCH v2 1/6] x86-64: Create microbenchmark infrastructure for libmvec
Date: Fri, 12 Nov 2021 14:49:43 -0800 [thread overview]
Message-ID: <CAMAf5_dBK1msQ+tUcJiNE45n7ZzOR8C53y=E9iLK1NrVrSCFsw@mail.gmail.com> (raw)
In-Reply-To: <CAFUsyf+syaV9Vk6WhPdJ+OSbDtCaU3LwWvXB9JqKoAjkg1u4HA@mail.gmail.com>
On Fri, Nov 12, 2021 at 1:02 PM Noah Goldstein <goldstein.w.n@gmail.com>
wrote:
> On Fri, Nov 12, 2021 at 1:19 PM Sunil K Pandey via Libc-alpha
> <libc-alpha@sourceware.org> wrote:
> >
> > Add python script to generate libmvec microbenchmark from the input
> > values for each libmvec function using skeleton benchmark template.
> >
> > Creates double and float benchmarks with vector length 1, 2, 4, 8,
> > and 16 for each libmvec function. Vector length 1 corresponds to
> > scalar version of function and is included for vector function perf
> > comparison.
> > ---
> > sysdeps/x86_64/fpu/Makeconfig | 35 ++
> > sysdeps/x86_64/fpu/Makefile | 40 ++
> > sysdeps/x86_64/fpu/bench-libmvec-skeleton.c | 104 +++++
> > sysdeps/x86_64/fpu/scripts/bench_libmvec.py | 464 ++++++++++++++++++++
> > 4 files changed, 643 insertions(+)
> > create mode 100644 sysdeps/x86_64/fpu/bench-libmvec-skeleton.c
> > create mode 100755 sysdeps/x86_64/fpu/scripts/bench_libmvec.py
> >
> > diff --git a/sysdeps/x86_64/fpu/Makeconfig
> b/sysdeps/x86_64/fpu/Makeconfig
> > index 24aaee1a43..503e9b5ffa 100644
> > --- a/sysdeps/x86_64/fpu/Makeconfig
> > +++ b/sysdeps/x86_64/fpu/Makeconfig
> > @@ -29,6 +29,23 @@ libmvec-funcs = \
> > sin \
> > sincos \
> >
> > +# Define libmvec function for benchtests directory.
> > +libmvec-bench-funcs = \
> > +
> > +bench-libmvec-double = \
> > + $(addprefix double-vlen1-, $(libmvec-bench-funcs)) \
> > + $(addprefix double-vlen2-, $(libmvec-bench-funcs)) \
> > + $(addprefix double-vlen4-, $(libmvec-bench-funcs)) \
> > + $(addprefix double-vlen4-avx2-, $(libmvec-bench-funcs)) \
> > + $(addprefix double-vlen8-, $(libmvec-bench-funcs)) \
> > +
> > +bench-libmvec-float = \
> > + $(addsuffix f, $(addprefix float-vlen1-, $(libmvec-bench-funcs))) \
> > + $(addsuffix f, $(addprefix float-vlen4-, $(libmvec-bench-funcs))) \
> > + $(addsuffix f, $(addprefix float-vlen8-, $(libmvec-bench-funcs))) \
> > + $(addsuffix f, $(addprefix float-vlen8-avx2-,
> $(libmvec-bench-funcs))) \
> > + $(addsuffix f, $(addprefix float-vlen16-, $(libmvec-bench-funcs))) \
> > +
> > # The base libmvec ABI tests.
> > libmvec-abi-func-tests = \
> > $(addprefix test-double-libmvec-,$(libmvec-funcs)) \
> > @@ -83,5 +100,23 @@ $(common-objpfx)libmvec.mk:
> $(common-objpfx)config.make
> > echo " \$$(float-vlen16-arch-ext-cflags)"; \
> > echo; \
> > done; \
> > + echo "endif"; \
> > + echo "ifeq (\$$(subdir),benchtests)"; \
> > + for t in $(libmvec-bench-funcs); do \
> > + echo "CFLAGS-bench-double-vlen4-$$t.c = \\"; \
> > + echo " \$$(double-vlen4-arch-ext-cflags)"; \
> > + echo "CFLAGS-bench-double-vlen4-avx2-$$t.c = \\"; \
> > + echo " \$$(double-vlen4-arch-ext2-cflags)"; \
> > + echo "CFLAGS-bench-double-vlen8-$$t.c = \\"; \
> > + echo " \$$(double-vlen8-arch-ext-cflags)"; \
> > + echo; \
> > + echo "CFLAGS-bench-float-vlen8-$${t}f.c = \\"; \
> > + echo " \$$(float-vlen8-arch-ext-cflags)"; \
> > + echo "CFLAGS-bench-float-vlen8-avx2-$${t}f.c = \\"; \
> > + echo " \$$(float-vlen8-arch-ext2-cflags)"; \
> > + echo "CFLAGS-bench-float-vlen16-$${t}f.c = \\"; \
> > + echo " \$$(float-vlen16-arch-ext-cflags)"; \
> > + echo; \
> > + done; \
> > echo "endif") > $@T
> > mv -f $@T $@
> > diff --git a/sysdeps/x86_64/fpu/Makefile b/sysdeps/x86_64/fpu/Makefile
> > index d172ae815d..9fb587cf8f 100644
> > --- a/sysdeps/x86_64/fpu/Makefile
> > +++ b/sysdeps/x86_64/fpu/Makefile
> > @@ -72,3 +72,43 @@ ifeq
> ($(subdir)$(config-cflags-mprefer-vector-width),mathyes)
> > # performance of sin and cos by more than 40% on Skylake.
> > CFLAGS-branred.c = -mprefer-vector-width=128
> > endif
> > +
> > +ifeq ($(subdir),benchtests)
> > +double-vlen4-arch-ext-cflags = -mavx
> > +double-vlen4-arch-ext2-cflags = -mavx2
> > +double-vlen8-arch-ext-cflags = -mavx512f
> > +
> > +float-vlen8-arch-ext-cflags = -mavx
> > +float-vlen8-arch-ext2-cflags = -mavx2
> > +float-vlen16-arch-ext-cflags = -mavx512f
> > +
> > +bench-libmvec := $(bench-libmvec-double) $(bench-libmvec-float)
> > +
> > +ifeq (${BENCHSET},)
> > +bench += $(bench-libmvec)
> > +endif
> > +
> > +ifeq (${STATIC-BENCHTESTS},yes)
> > +libmvec-benchtests = $(common-objpfx)mathvec/libmvec.a
> $(common-objpfx)math/libm.a
> > +else
> > +libmvec-benchtests = $(libmvec) $(libm)
> > +endif
> > +
> > +$(addprefix $(objpfx)bench-,$(bench-libmvec-double)):
> $(libmvec-benchtests)
> > +$(addprefix $(objpfx)bench-,$(bench-libmvec-float)):
> $(libmvec-benchtests)
> > +bench-libmvec-deps = $(..)sysdeps/x86_64/fpu/bench-libmvec-skeleton.c
> bench-timing.h Makefile
> > +
> > +$(objpfx)bench-float-%.c: $(bench-libmvec-deps)
> > + { if [ -n "$($*-INCLUDE)" ]; then \
> > + cat $($*-INCLUDE); \
> > + fi; \
> > + $(PYTHON) $(..)sysdeps/x86_64/fpu/scripts/bench_libmvec.py
> $(basename $(@F)); } > $@-tmp
> > + mv -f $@-tmp $@
> > +
> > +$(objpfx)bench-double-%.c: $(bench-libmvec-deps)
> > + { if [ -n "$($*-INCLUDE)" ]; then \
> > + cat $($*-INCLUDE); \
> > + fi; \
> > + $(PYTHON) $(..)sysdeps/x86_64/fpu/scripts/bench_libmvec.py
> $(basename $(@F)); } > $@-tmp
> > + mv -f $@-tmp $@
> > +endif
> > diff --git a/sysdeps/x86_64/fpu/bench-libmvec-skeleton.c
> b/sysdeps/x86_64/fpu/bench-libmvec-skeleton.c
> > new file mode 100644
> > index 0000000000..d56a0c4462
> > --- /dev/null
> > +++ b/sysdeps/x86_64/fpu/bench-libmvec-skeleton.c
> > @@ -0,0 +1,104 @@
> > +/* Skeleton for libmvec benchmark programs.
> > + Copyright (C) 2021 Free Software Foundation, Inc.
> > + This file is part of the GNU C Library.
> > +
> > + The GNU C Library is free software; you can redistribute it and/or
> > + modify it under the terms of the GNU Lesser General Public
> > + License as published by the Free Software Foundation; either
> > + version 2.1 of the License, or (at your option) any later version.
> > +
> > + The GNU C Library is distributed in the hope that it will be useful,
> > + but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> > + Lesser General Public License for more details.
> > +
> > + You should have received a copy of the GNU Lesser General Public
> > + License along with the GNU C Library; if not, see
> > + <https://www.gnu.org/licenses/>. */
> > +
> > +#include <string.h>
> > +#include <stdint.h>
> > +#include <stdbool.h>
> > +#include <stdio.h>
> > +#include <time.h>
> > +#include <inttypes.h>
> > +#include <bench-timing.h>
> > +#include <json-lib.h>
> > +#include <bench-util.h>
> > +
> > +#include <bench-util.c>
> > +#include <math-tests-arch.h>
> > +#define D_ITERS 10000
> > +
> > +int
> > +main (int argc, char **argv)
> > +{
> > + unsigned long i, k;
> > + timing_t start, end;
> > + json_ctx_t json_ctx;
> > +
> > +#if defined REQUIRE_AVX
> > + if (!CPU_FEATURE_ACTIVE (AVX))
> > + {
> > + printf ("AVX not supported.\n");
> > + return 0;
> > + }
> > +#elif defined REQUIRE_AVX2
> > + if (!CPU_FEATURE_ACTIVE (AVX2))
> > + {
> > + printf ("AVX2 not supported.\n");
> > + return 0;
> > + }
> > +#elif defined REQUIRE_AVX512F
> > + if (!CPU_FEATURE_ACTIVE (AVX512F))
> > + {
> > + printf ("AVX512F not supported.\n");
> > + return 0;
> > + }
> > +#endif
> > +
> > + bench_start ();
> > +
> > +#ifdef BENCH_INIT
> > + BENCH_INIT ();
> > +#endif
> > +
> > + json_init (&json_ctx, 2, stdout);
> > +
> > + /* Begin function. */
> > + json_attr_object_begin (&json_ctx, FUNCNAME);
> > +
> > + for (int v = 0; v < NUM_VARIANTS; v++)
> > + {
> > + double d_total_time = 0;
> > + uint64_t cur;
>
> Think these should also be type `timing_t`
>
I do not see a difference if I use timing_t or uint64_t. In any case
variable cur stores the
difference between start and end time, not time.
>
> > + for (k = 0; k < D_ITERS; k++)
> > + {
> > + TIMING_NOW (start);
> > + for (i = 0; i < NUM_SAMPLES (v); i++)
>
> What is the rationale for both `D_ITERS` and `NUM_SAMPLES (v)`? Why not
> one loop that iterates for `D_ITERS * NUM_SAMPLES (v)`?
>
D_ITERS define how many times each variant full data set will run.
NUM_SAMPLES(v)
represent the number of data sets in variant v. Index v and i select, i'th
data set from
variant v and call vector function. Having two loops simplifies logic.
> > + BENCH_FUNC (v, i);
> > + TIMING_NOW (end);
> > +
> > + TIMING_DIFF (cur, start, end);
> > +
> > + d_total_time += cur;
>
> Think this should be `TIMING_ACCUM(d_total_time, cur)`.
>
Not much difference, if I use TIMING_ACCUM or simply add cur to
d_total_time.
> > +
> > + }
> > + double d_total_data_set = D_ITERS * NUM_SAMPLES (v) * STRIDE;
> > +
> > + /* Begin variant. */
> > + json_attr_object_begin (&json_ctx, VARIANT (v));
> > +
> > + json_attr_double (&json_ctx, "duration", d_total_time);
> > + json_attr_double (&json_ctx, "iterations", d_total_data_set);
> > + json_attr_double (&json_ctx, "mean", d_total_time /
> d_total_data_set);
> > +
> > + /* End variant. */
> > + json_attr_object_end (&json_ctx);
> > + }
> > +
> > + /* End function. */
> > + json_attr_object_end (&json_ctx);
> > +
> > + return 0;
> > +}
> > diff --git a/sysdeps/x86_64/fpu/scripts/bench_libmvec.py
> b/sysdeps/x86_64/fpu/scripts/bench_libmvec.py
> > new file mode 100755
> > index 0000000000..762865de8f
> > --- /dev/null
> > +++ b/sysdeps/x86_64/fpu/scripts/bench_libmvec.py
> > @@ -0,0 +1,464 @@
> > +#!/usr/bin/python3
> > +# Copyright (C) 2021 Free Software Foundation, Inc.
> > +# This file is part of the GNU C Library.
> > +#
> > +# The GNU C Library is free software; you can redistribute it and/or
> > +# modify it under the terms of the GNU Lesser General Public
> > +# License as published by the Free Software Foundation; either
> > +# version 2.1 of the License, or (at your option) any later version.
> > +#
> > +# The GNU C Library is distributed in the hope that it will be useful,
> > +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> > +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> > +# Lesser General Public License for more details.
> > +#
> > +# You should have received a copy of the GNU Lesser General Public
> > +# License along with the GNU C Library; if not, see
> > +# <https://www.gnu.org/licenses/>.
> > +
> > +"""Benchmark program generator script
> > +
> > +This script takes a function name as input and generates a program using
> > +an libmvec input file located in the sysdeps/x86_64/fpu directory. The
> > +name of the input file should be of the form libmvec-foo-inputs where
> > +'foo' is the name of the function.
> > +"""
> > +
> > +from __future__ import print_function
> > +import sys
> > +import os
> > +import itertools
> > +import re
> > +
> > +# Macro definitions for functions that take no arguments. For functions
> > +# that take arguments, the STRUCT_TEMPLATE, ARGS_TEMPLATE and
> > +# VARIANTS_TEMPLATE are used instead.
> > +DEFINES_TEMPLATE = '''
> > +#define CALL_BENCH_FUNC(v, i) %(func)s ()
> > +#define NUM_VARIANTS (1)
> > +#define NUM_SAMPLES(v) (1)
> > +#define VARIANT(v) FUNCNAME "()"
> > +'''
> > +
> > +# Structures to store arguments for the function call. A function may
> > +# have its inputs partitioned to represent distinct performance
> > +# characteristics or distinct flavors of the function. Each such
> > +# variant is represented by the _VARIANT structure. The ARGS structure
> > +# represents a single set of arguments.
> > +BENCH_VEC_TEMPLATE = '''
> > +#define CALL_BENCH_FUNC(v, i) (__extension__ ({ \\
> > + %(defs)s mx0 = %(func)s (%(func_args)s); \\
> > + mx0; }))
> > +'''
> > +
> > +BENCH_SCALAR_TEMPLATE = '''
> > +#define CALL_BENCH_FUNC(v, i) %(func)s (%(func_args)s)
> > +'''
> > +
> > +STRUCT_TEMPLATE = '''struct args
> > +{
> > +%(args)s
> > + double timing;
> > +};
> > +
> > +struct _variants
> > +{
> > + const char *name;
> > + int count;
> > + struct args *in;
> > +};
> > +'''
> > +
> > +# The actual input arguments.
> > +ARGS_TEMPLATE = '''struct args in%(argnum)d[%(num_args)d] = {
> > +%(args)s
> > +};
> > +'''
> > +
> > +# The actual variants, along with macros defined to access the variants.
> > +VARIANTS_TEMPLATE = '''struct _variants variants[%(num_variants)d] = {
> > +%(variants)s
> > +};
> > +
> > +#define NUM_VARIANTS %(num_variants)d
> > +#define NUM_SAMPLES(i) (variants[i].count)
> > +#define VARIANT(i) (variants[i].name)
> > +'''
> > +
> > +# Epilogue for the generated source file.
> > +EPILOGUE = '''
> > +#define BENCH_FUNC(i, j) ({%(getret)s CALL_BENCH_FUNC (i, j);})
> > +#define FUNCNAME "%(func)s"
> > +#include <bench-libmvec-skeleton.c>'''
> > +
> > +
> > +def gen_source(func_types, directives, all_vals):
> > + """Generate source for the function
> > +
> > + Generate the C source for the function from the values and
> > + directives.
> > +
> > + Args:
> > + func: The function name
> > + directives: A dictionary of directives applicable to this function
> > + all_vals: A dictionary input values
> > + """
> > + # The includes go in first.
> > + for header in directives['includes']:
> > + print('#include <%s>' % header)
> > +
> > + for header in directives['include-sources']:
> > + print('#include "%s"' % header)
> > +
> > + argtype_vtable = {
> > + 2: '128',
> > + 4: '256',
> > + 8: '512'
> > + }
> > + prefix_vtable = {
> > + 2: 'b',
> > + 4: 'c',
> > + 8: 'e'
> > + }
> > +
> > + # Get all the function properties
> > + funcname_argtype = ''
> > + float_flag = False
> > + if func_types[1] == 'float':
> > + float_flag = True
> > + avx_flag = False
> > + if func_types[3] == 'avx2':
> > + avx_flag = True
> > + funcname_stride = int(func_types[2][4:])
> > + funcname_origin = func_types[-1]
> > + if float_flag:
> > + funcname_origin = funcname_origin[:-1]
> > +
> > + if funcname_stride == 1:
> > + # Prepare for scalar functions file generation
> > + funcname_prefix = ''
> > + funcname_prefix_1 = ''
> > + funcname_argtype = 'double'
> > + if float_flag:
> > + funcname_argtype = 'float'
> > + else:
> > + # Prepare for libmvec functions file generation
> > + funcname_prefix_1 = len(directives['args']) * 'v' + '_'
> > + aligned_stride = funcname_stride
> > + if float_flag:
> > + aligned_stride /= 2
> > + funcname_prefix = '_ZGV'
> > + if (avx_flag and (aligned_stride == 4)):
> > + funcname_prefix += 'd'
> > + else:
> > + funcname_prefix += prefix_vtable[aligned_stride]
> > + funcname_prefix = funcname_prefix + 'N' + func_types[2][4:]
> > + funcname_argtype = '__m' + argtype_vtable[aligned_stride]
> > + if not float_flag:
> > + funcname_argtype += 'd'
> > +
> > + # Include x86intrin.h for vector functions
> > + if not funcname_stride == 1:
> > + print('#include <x86intrin.h>')
> > + if (avx_flag and (aligned_stride == 4)):
> > + # For bench-float-vlen8-avx2* and bench-double-vlen4-avx2*
> > + print('#define REQUIRE_AVX2')
> > + elif aligned_stride == 8:
> > + # For bench-float-vlen16* and bench-double-vlen8*
> > + print('#define REQUIRE_AVX512F')
> > + elif aligned_stride == 4:
> > + # For bench-float-vlen8* and bench-double-vlen4* without avx2
> > + print('#define REQUIRE_AVX')
> > + else:
> > + print('#define FUNCTYPE %s' % funcname_argtype)
> > +
> > + print('#define STRIDE %d ' % funcname_stride)
> > +
> > + funcname = funcname_prefix + funcname_prefix_1 + funcname_origin
> > + if float_flag:
> > + funcname += 'f'
> > +
> > + funcname_rettype = funcname_argtype
> > + if directives['ret'] == '':
> > + funcname_rettype = 'void'
> > +
> > + funcname_inputtype = []
> > + for arg, i in zip(directives['args'], itertools.count()):
> > + funcname_inputtype.append(funcname_argtype)
> > + if arg[0] == '<' and arg[-1] == '>':
> > + pos = arg.rfind('*')
> > + if pos == -1:
> > + die('Output argument must be a pointer type')
> > + funcname_inputtype[i] += ' *'
> > +
> > + if not funcname_stride == 1:
> > + if len(directives['args']) == 2:
> > + print('extern %s %s (%s, %s);' % (funcname_rettype, funcname,
> funcname_inputtype[0], funcname_inputtype[1]))
> > + elif len(directives['args']) == 3:
> > + print('extern %s %s (%s, %s, %s);' % (funcname_rettype, funcname,
> funcname_inputtype[0], funcname_inputtype[1], funcname_inputtype[2]))
> > + else:
> > + print('extern %s %s (%s);' % (funcname_rettype, funcname,
> funcname_inputtype[0]))
> > +
> > + # Print macros. This branches out to a separate routine if
> > + # the function takes arguments.
> > + if not directives['args']:
> > + print(DEFINES_TEMPLATE % {'funcname': funcname})
> > + outargs = []
> > + else:
> > + outargs = _print_arg_data(funcname, float_flag, funcname_argtype,
> funcname_stride, directives, all_vals)
> > +
> > + # Print the output variable definitions if necessary.
> > + for out in outargs:
> > + print(out)
> > +
> > + # If we have a return value from the function, make sure it is
> > + # assigned to prevent the compiler from optimizing out the
> > + # call.
> > + getret = ''
> > +
> > + if directives['ret']:
> > + if funcname_argtype != '':
> > + print('static %s volatile ret;' % funcname_argtype)
> > + getret = 'ret ='
> > + else:
> > + print('static %s volatile ret;' % directives['ret'])
> > + getret = 'ret ='
> > +
> > + # Test initialization.
> > + if directives['init']:
> > + print('#define BENCH_INIT %s' % directives['init'])
> > +
> > + print(EPILOGUE % {'getret': getret, 'func': funcname})
> > +
> > +
> > +def _print_arg_data(func, float_flag, funcname_argtype,
> funcname_stride, directives, all_vals):
> > + """Print argument data
> > +
> > + This is a helper function for gen_source that prints structure and
> > + values for arguments and their variants and returns output arguments
> > + if any are found.
> > +
> > + Args:
> > + func: Function name
> > + float_flag: True if function is float type
> > + funcname_argtype: Type for vector variants
> > + funcname_stride: Vector Length
> > + directives: A dictionary of directives applicable to this function
> > + all_vals: A dictionary input values
> > +
> > + Returns:
> > + Returns a list of definitions for function arguments that act as
> > + output parameters.
> > + """
> > + # First, all of the definitions. We process writing of
> > + # CALL_BENCH_FUNC, struct args and also the output arguments
> > + # together in a single traversal of the arguments list.
> > + func_args = []
> > + _func_args = []
> > + arg_struct = []
> > + outargs = []
> > + # Conversion function for each type
> > + vtable = {
> > + '__m128d': '_mm_loadu_pd',
> > + '__m256d': '_mm256_loadu_pd',
> > + '__m512d': '_mm512_loadu_pd',
> > + '__m128': '_mm_loadu_ps',
> > + '__m256': '_mm256_loadu_ps',
> > + '__m512': '_mm512_loadu_ps',
> > + 'double': '',
> > + 'float': ''
> > + }
> > +
> > + # For double max_vlen=8, for float max_vlen=16.
> > + if float_flag == True:
> > + max_vlen = 16
> > + else:
> > + max_vlen = 8
> > +
> > + for arg, i in zip(directives['args'], itertools.count()):
> > + if arg[0] == '<' and arg[-1] == '>':
> > + outargs.append('static %s out%d __attribute__((used));' %
> (funcname_argtype, i))
> > + func_args.append('&out%d' % i)
> > + _func_args.append('&out%d' % i)
> > + else:
> > + arg_struct.append(' %s arg%d[STRIDE];' % (arg, i))
> > + func_args.append('%s (variants[v].in[i].arg%d)' %
> > + (vtable[funcname_argtype], i))
> > + _func_args.append('variants[v].in[i].arg%d[0]' % i)
> > +
> > + if funcname_stride == 1:
> > + print(BENCH_SCALAR_TEMPLATE % {'func': func,
> > + 'func_args': ', '.join(_func_args)})
> > + elif directives['ret'] == '':
> > + print(BENCH_SCALAR_TEMPLATE % {'func': func,
> > + 'func_args': ', '.join(func_args)})
> > + else:
> > + print(BENCH_VEC_TEMPLATE % {'func': func, 'func_args': ',
> '.join(func_args),
> > + 'defs': funcname_argtype})
> > + print(STRUCT_TEMPLATE % {'args': '\n'.join(arg_struct)})
> > +
> > + # Now print the values.
> > + variants = []
> > + for (k, _vals), i in zip(all_vals.items(), itertools.count()):
> > + vals = []
> > + temp_vals = []
> > + j = 0
> > + temp_j = 0
> > + result_v = ['', '', '']
> > + for _v in _vals:
> > + nums = _v.split(',')
> > + for l in range(0, len(nums)):
> > + result_v[l] = result_v[l] + nums[l].strip() + ','
> > + j += 1
> > + temp_j += 1
> > +
> > + if temp_j == funcname_stride:
> > + final_result = ''
> > + for l in range(0, len(nums)):
> > + final_result = final_result + '{' + result_v[l][:-1] + '},'
> > + temp_vals.append(final_result[:-1])
> > + temp_j = 0
> > + result_v = ['', '', '']
> > +
> > + # Make sure amount of test data is multiple of max_vlen
> > + # to keep data size same for all vector length.
> > + if j == max_vlen:
> > + vals.extend(temp_vals)
> > + temp_vals = []
> > + j = 0
> > +
> > + out = [' {%s, 0},' % v for v in vals]
> > +
> > + # Members for the variants structure list that we will
> > + # print later.
> > + variants.append(' {"%s", %d, in%d},' % (k, len(vals), i))
> > + print(ARGS_TEMPLATE % {'argnum': i, 'num_args': len(vals),
> > + 'args': '\n'.join(out)})
> > +
> > + # Print the variants and the last set of macros.
> > + print(VARIANTS_TEMPLATE % {'num_variants': len(all_vals),
> > + 'variants': '\n'.join(variants)})
> > + return outargs
> > +
> > +
> > +def _process_directive(d_name, d_val, func_args):
> > + """Process a directive.
> > +
> > + Evaluate the directive name and value passed and return the
> > + processed value. This is a helper function for parse_file.
> > +
> > + Args:
> > + d_name: Name of the directive
> > + d_val: The string value to process
> > +
> > + Returns:
> > + The processed value, which may be the string as it is or an object
> > + that describes the directive.
> > + """
> > + # Process the directive values if necessary. name and ret don't
> > + # need any processing.
> > + if d_name.startswith('include'):
> > + d_val = d_val.split(',')
> > + elif d_name == 'args':
> > + d_val = d_val.split(':')
> > + # Check if args type match
> > + if not d_val[0] == func_args:
> > + die("Args mismatch, should be %s, but get %s" % (d_val[0],
> func_args))
> > +
> > + # Return the values.
> > + return d_val
> > +
> > +
> > +def parse_file(func_types):
> > + """Parse an input file
> > +
> > + Given a function name, open and parse an input file for the function
> > + and get the necessary parameters for the generated code and the list
> > + of inputs.
> > +
> > + Args:
> > + func: The function name
> > +
> > + Returns:
> > + A tuple of two elements, one a dictionary of directives and the
> > + other a dictionary of all input values.
> > + """
> > + all_vals = {}
> > + # Valid directives.
> > + directives = {
> > + 'name': '',
> > + 'args': [],
> > + 'includes': [],
> > + 'include-sources': [],
> > + 'ret': '',
> > + 'init': ''
> > + }
> > +
> > + func = func_types[-1]
> > + try:
> > + with open('../sysdeps/x86_64/fpu/libmvec-%s-inputs' % func) as f:
> > + for line in f:
> > + # Look for directives and parse it if found.
> > + if line.startswith('##'):
> > + try:
> > + d_name, d_val = line[2:].split(':', 1)
> > + d_name = d_name.strip()
> > + d_val = d_val.strip()
> > + directives[d_name] = _process_directive(d_name, d_val,
> func_types[1])
> > + except (IndexError, KeyError):
> > + die('Invalid directive: %s' % line[2:])
> > +
> > + # Skip blank lines and comments.
> > + line = line.split('#', 1)[0].rstrip()
> > + if not line:
> > + continue
> > +
> > + # Otherwise, we're an input. Add to the appropriate
> > + # input set.
> > + cur_name = directives['name']
> > + all_vals.setdefault(cur_name, [])
> > + all_vals[cur_name].append(line)
> > + except IOError as ex:
> > + die("Failed to open input file (%s): %s" % (ex.filename,
> ex.strerror))
> > +
> > + return directives, all_vals
> > +
> > +
> > +def die(msg):
> > + """Exit with an error
> > +
> > + Prints an error message to the standard error stream and exits with
> > + a non-zero status.
> > +
> > + Args:
> > + msg: The error message to print to standard error
> > + """
> > + print('%s\n' % msg, file=sys.stderr)
> > + sys.exit(os.EX_DATAERR)
> > +
> > +
> > +def main(args):
> > + """Main function
> > +
> > + Use the first command line argument as function name and parse its
> > + input file to generate C source that calls the function repeatedly
> > + for the input.
> > +
> > + Args:
> > + args: The command line arguments with the program name dropped
> > +
> > + Returns:
> > + os.EX_USAGE on error and os.EX_OK on success.
> > + """
> > + if len(args) != 1:
> > + print('Usage: %s <function>' % sys.argv[0])
> > + return os.EX_USAGE
> > +
> > + func_types = args[0].split('-')
> > + directives, all_vals = parse_file(func_types)
> > + gen_source(func_types, directives, all_vals)
> > + return os.EX_OK
> > +
> > +
> > +if __name__ == '__main__':
> > + sys.exit(main(sys.argv[1:]))
> > --
> > 2.31.1
> >
>
next prev parent reply other threads:[~2021-11-12 22:50 UTC|newest]
Thread overview: 53+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-11-10 3:07 [PATCH 0/6] Implement microbenchmark " Sunil K Pandey
2021-11-10 3:07 ` [PATCH 1/6] x86-64: Create microbenchmark infrastructure " Sunil K Pandey
2021-11-10 3:53 ` Noah Goldstein
2021-11-11 18:34 ` Sunil Pandey
2021-11-12 19:17 ` [PATCH v2 0/6] Implement microbenchmark " Sunil K Pandey
2021-11-12 19:17 ` [PATCH v2 1/6] x86-64: Create microbenchmark infrastructure " Sunil K Pandey
2021-11-12 21:02 ` Noah Goldstein
2021-11-12 22:49 ` Sunil Pandey [this message]
2021-11-13 19:47 ` H.J. Lu
2021-11-14 2:59 ` Sunil Pandey
2021-11-15 21:06 ` [PATCH v3 0/6] Implement microbenchmark " Sunil K Pandey
2021-11-15 21:06 ` [PATCH v3 1/6] x86-64: Create microbenchmark infrastructure " Sunil K Pandey
2021-11-16 17:21 ` H.J. Lu
2021-11-16 18:37 ` [PATCH] " Sunil K Pandey
2021-11-16 18:40 ` H.J. Lu
2021-11-15 21:06 ` [PATCH v3 2/6] x86-64: Add vector cos/cosf to libmvec microbenchmark Sunil K Pandey
2021-11-15 21:06 ` [PATCH v3 3/6] x86-64: Add vector exp/expf " Sunil K Pandey
2021-11-15 21:06 ` [PATCH v3 4/6] x86-64: Add vector log/logf " Sunil K Pandey
2021-11-15 21:06 ` [PATCH v3 5/6] x86-64: Add vector pow/powf " Sunil K Pandey
2021-11-15 21:06 ` [PATCH v3 6/6] x86-64: Add vector sin/sinf " Sunil K Pandey
2021-11-12 19:17 ` [PATCH v2 2/6] x86-64: Add vector cos/cosf " Sunil K Pandey
2021-11-12 19:17 ` [PATCH v2 3/6] x86-64: Add vector exp/expf " Sunil K Pandey
2021-11-12 19:17 ` [PATCH v2 4/6] x86-64: Add vector log/logf " Sunil K Pandey
2021-11-12 23:18 ` Joseph Myers
2021-11-13 1:37 ` Sunil Pandey
2021-11-13 1:44 ` Joseph Myers
2021-11-13 6:14 ` Sunil Pandey
2021-11-16 0:12 ` Joseph Myers
2021-11-23 17:40 ` [PATCH v4 0/5] Add vector math functions to microbenchmark Sunil K Pandey
2021-11-23 17:40 ` [PATCH v4 1/5] x86-64: Add vector cos/cosf to libmvec microbenchmark Sunil K Pandey
2021-11-24 12:22 ` H.J. Lu
2021-11-23 17:40 ` [PATCH v4 2/5] x86-64: Add vector exp/expf " Sunil K Pandey
2021-11-24 12:24 ` H.J. Lu
2021-11-23 17:40 ` [PATCH v4 3/5] x86-64: Add vector log/logf " Sunil K Pandey
2021-11-24 12:25 ` H.J. Lu
2021-11-23 17:40 ` [PATCH v4 4/5] x86-64: Add vector pow/powf " Sunil K Pandey
2021-11-24 12:26 ` H.J. Lu
2021-11-23 17:40 ` [PATCH v4 5/5] x86-64: Add vector sin/sinf " Sunil K Pandey
2021-11-24 12:30 ` H.J. Lu
2021-11-23 18:30 ` [PATCH v2 4/6] x86-64: Add vector log/logf " Sunil Pandey
2021-11-23 22:13 ` Joseph Myers
2021-11-12 19:17 ` [PATCH v2 5/6] x86-64: Add vector pow/powf " Sunil K Pandey
2021-11-12 19:18 ` [PATCH v2 6/6] x86-64: Add vector sin/sinf " Sunil K Pandey
2021-11-10 3:07 ` [PATCH 2/6] x86-64: Add cos/cosf " Sunil K Pandey
2021-11-10 3:07 ` [PATCH 3/6] x86-64: Add exp/expf " Sunil K Pandey
2021-11-10 3:07 ` [PATCH 4/6] x86-64: Add log/logf " Sunil K Pandey
2021-11-10 3:07 ` [PATCH 5/6] x86-64: Add pow/powf " Sunil K Pandey
2021-11-10 20:27 ` Joseph Myers
2021-11-11 3:31 ` Sunil Pandey
2021-11-11 10:39 ` Szabolcs Nagy
2021-11-11 17:40 ` Sunil Pandey
2021-11-11 18:51 ` Sunil Pandey
2021-11-10 3:07 ` [PATCH 6/6] x86-64: Add sin/sinf " Sunil K Pandey
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to='CAMAf5_dBK1msQ+tUcJiNE45n7ZzOR8C53y=E9iLK1NrVrSCFsw@mail.gmail.com' \
--to=skpgkp2@gmail.com \
--cc=goldstein.w.n@gmail.com \
--cc=libc-alpha@sourceware.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).