From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-qt1-x830.google.com (mail-qt1-x830.google.com [IPv6:2607:f8b0:4864:20::830]) by sourceware.org (Postfix) with ESMTPS id 310ED3857C58 for ; Thu, 11 Nov 2021 18:35:31 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 310ED3857C58 Received: by mail-qt1-x830.google.com with SMTP id v22so1526081qtx.8 for ; Thu, 11 Nov 2021 10:35:31 -0800 (PST) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:mime-version:references:in-reply-to:from:date :message-id:subject:to:cc; bh=G+Jl1pJSJuC2chImRztaM0+pEbSkOGaFYLGWxKoZso4=; b=VZofbuER3p8lwqepE0ohVoSRG3zL85cHDQ1l5y10Eh4Mel8RP29TIyQA2OQPeZVYU2 lsY3bCGVbLKlkOsmWjoLfr6jXqGhKA+uQlpNkTZEZS7LF7xH0sD8pvEgPXe82F6cjywO PZlmC+sWZLuTVidjeBjqB+GlapApx5gRlYHiu3t1wmaEOOEEgUr/7pyeMWZWr4nohaAN TNU4IXdjx2lkbL8S5nFvaUrW4F+YH+jmB0H9eFFrEh4ipMSQeHHvh8yCf7C3e0FVC6oj pJI3CcyuDD8N6sAJbm9PvscExrQOl+comH7OVkqkjJgU/3Li6URem9C1+MMZAL41Au29 1D1w== X-Gm-Message-State: AOAM531BLIa3xJpRJuKpZltsGe5JdYChUzEi5MQF12g9+WdjYaASyjaa UOcDcfAzaFp4gAIc9iH3h1N95/LeGBFWbkvkmRYN+Td6 X-Google-Smtp-Source: ABdhPJz8xVX9M1rznvelcBge0hkKMP9eNMCLb2+50dI5ui+qksfMfSjg9ytYoZe4xU6YvWZ6Mdz7ErwWbn51BTLtzgU= X-Received: by 2002:ac8:610b:: with SMTP id a11mr9765914qtm.182.1636655730473; Thu, 11 Nov 2021 10:35:30 -0800 (PST) MIME-Version: 1.0 References: <20211110030726.2468302-1-skpgkp2@gmail.com> <20211110030726.2468302-2-skpgkp2@gmail.com> In-Reply-To: From: Sunil Pandey Date: Thu, 11 Nov 2021 10:34:54 -0800 Message-ID: Subject: Re: [PATCH 1/6] x86-64: Create microbenchmark infrastructure for libmvec To: Noah Goldstein Cc: GNU C Library X-Spam-Status: No, score=-8.3 required=5.0 tests=BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, DKIM_VALID_EF, FREEMAIL_ENVFROM_END_DIGIT, FREEMAIL_FROM, GIT_PATCH_0, HK_RANDOM_ENVFROM, HK_RANDOM_FROM, HTML_MESSAGE, KAM_SHORT, RCVD_IN_DNSWL_NONE, SPF_HELO_NONE, SPF_PASS, TXREP autolearn=ham autolearn_force=no version=3.4.4 X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on server2.sourceware.org Content-Type: text/plain; charset="UTF-8" X-Content-Filtered-By: Mailman/MimeDel 2.1.29 X-BeenThere: libc-alpha@sourceware.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Libc-alpha mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 11 Nov 2021 18:35:36 -0000 On Tue, Nov 9, 2021 at 7:54 PM Noah Goldstein wrote: > On Tue, Nov 9, 2021 at 9:08 PM Sunil K Pandey via Libc-alpha > wrote: > > > > Add python script to generate libmvec microbenchmark from the input > > values for each libmvec function using skeleton benchmark template. > > > > Creates double and float benchmarks with vector length 1, 2, 4, 8, > > and 16 for each libmvec function. Vector length 1 corresponds to > > scalar version of function and is included for vector function perf > > comparison. > > --- > > sysdeps/x86_64/fpu/Makeconfig | 35 ++ > > sysdeps/x86_64/fpu/Makefile | 40 ++ > > sysdeps/x86_64/fpu/bench-libmvec-skeleton.c | 105 +++++ > > sysdeps/x86_64/fpu/scripts/bench_libmvec.py | 464 ++++++++++++++++++++ > > 4 files changed, 644 insertions(+) > > create mode 100644 sysdeps/x86_64/fpu/bench-libmvec-skeleton.c > > create mode 100755 sysdeps/x86_64/fpu/scripts/bench_libmvec.py > > > > diff --git a/sysdeps/x86_64/fpu/Makeconfig > b/sysdeps/x86_64/fpu/Makeconfig > > index 24aaee1a43..503e9b5ffa 100644 > > --- a/sysdeps/x86_64/fpu/Makeconfig > > +++ b/sysdeps/x86_64/fpu/Makeconfig > > @@ -29,6 +29,23 @@ libmvec-funcs = \ > > sin \ > > sincos \ > > > > +# Define libmvec function for benchtests directory. > > +libmvec-bench-funcs = \ > > + > > +bench-libmvec-double = \ > > + $(addprefix double-vlen1-, $(libmvec-bench-funcs)) \ > > + $(addprefix double-vlen2-, $(libmvec-bench-funcs)) \ > > + $(addprefix double-vlen4-, $(libmvec-bench-funcs)) \ > > + $(addprefix double-vlen4-avx2-, $(libmvec-bench-funcs)) \ > > + $(addprefix double-vlen8-, $(libmvec-bench-funcs)) \ > > + > > +bench-libmvec-float = \ > > + $(addsuffix f, $(addprefix float-vlen1-, $(libmvec-bench-funcs))) \ > > + $(addsuffix f, $(addprefix float-vlen4-, $(libmvec-bench-funcs))) \ > > + $(addsuffix f, $(addprefix float-vlen8-, $(libmvec-bench-funcs))) \ > > + $(addsuffix f, $(addprefix float-vlen8-avx2-, > $(libmvec-bench-funcs))) \ > > + $(addsuffix f, $(addprefix float-vlen16-, $(libmvec-bench-funcs))) \ > > + > > # The base libmvec ABI tests. > > libmvec-abi-func-tests = \ > > $(addprefix test-double-libmvec-,$(libmvec-funcs)) \ > > @@ -83,5 +100,23 @@ $(common-objpfx)libmvec.mk: > $(common-objpfx)config.make > > echo " \$$(float-vlen16-arch-ext-cflags)"; \ > > echo; \ > > done; \ > > + echo "endif"; \ > > + echo "ifeq (\$$(subdir),benchtests)"; \ > > + for t in $(libmvec-bench-funcs); do \ > > + echo "CFLAGS-bench-double-vlen4-$$t.c = \\"; \ > > + echo " \$$(double-vlen4-arch-ext-cflags)"; \ > > + echo "CFLAGS-bench-double-vlen4-avx2-$$t.c = \\"; \ > > + echo " \$$(double-vlen4-arch-ext2-cflags)"; \ > > + echo "CFLAGS-bench-double-vlen8-$$t.c = \\"; \ > > + echo " \$$(double-vlen8-arch-ext-cflags)"; \ > > + echo; \ > > + echo "CFLAGS-bench-float-vlen8-$${t}f.c = \\"; \ > > + echo " \$$(float-vlen8-arch-ext-cflags)"; \ > > + echo "CFLAGS-bench-float-vlen8-avx2-$${t}f.c = \\"; \ > > + echo " \$$(float-vlen8-arch-ext2-cflags)"; \ > > + echo "CFLAGS-bench-float-vlen16-$${t}f.c = \\"; \ > > + echo " \$$(float-vlen16-arch-ext-cflags)"; \ > > + echo; \ > > + done; \ > > echo "endif") > $@T > > mv -f $@T $@ > > diff --git a/sysdeps/x86_64/fpu/Makefile b/sysdeps/x86_64/fpu/Makefile > > index d172ae815d..9fb587cf8f 100644 > > --- a/sysdeps/x86_64/fpu/Makefile > > +++ b/sysdeps/x86_64/fpu/Makefile > > @@ -72,3 +72,43 @@ ifeq > ($(subdir)$(config-cflags-mprefer-vector-width),mathyes) > > # performance of sin and cos by more than 40% on Skylake. > > CFLAGS-branred.c = -mprefer-vector-width=128 > > endif > > + > > +ifeq ($(subdir),benchtests) > > +double-vlen4-arch-ext-cflags = -mavx > > +double-vlen4-arch-ext2-cflags = -mavx2 > > +double-vlen8-arch-ext-cflags = -mavx512f > > + > > +float-vlen8-arch-ext-cflags = -mavx > > +float-vlen8-arch-ext2-cflags = -mavx2 > > +float-vlen16-arch-ext-cflags = -mavx512f > > + > > +bench-libmvec := $(bench-libmvec-double) $(bench-libmvec-float) > > + > > +ifeq (${BENCHSET},) > > +bench += $(bench-libmvec) > > +endif > > + > > +ifeq (${STATIC-BENCHTESTS},yes) > > +libmvec-benchtests = $(common-objpfx)mathvec/libmvec.a > $(common-objpfx)math/libm.a > > +else > > +libmvec-benchtests = $(libmvec) $(libm) > > +endif > > + > > +$(addprefix $(objpfx)bench-,$(bench-libmvec-double)): > $(libmvec-benchtests) > > +$(addprefix $(objpfx)bench-,$(bench-libmvec-float)): > $(libmvec-benchtests) > > +bench-libmvec-deps = $(..)sysdeps/x86_64/fpu/bench-libmvec-skeleton.c > bench-timing.h Makefile > > + > > +$(objpfx)bench-float-%.c: $(bench-libmvec-deps) > > + { if [ -n "$($*-INCLUDE)" ]; then \ > > + cat $($*-INCLUDE); \ > > + fi; \ > > + $(PYTHON) $(..)sysdeps/x86_64/fpu/scripts/bench_libmvec.py > $(basename $(@F)); } > $@-tmp > > + mv -f $@-tmp $@ > > + > > +$(objpfx)bench-double-%.c: $(bench-libmvec-deps) > > + { if [ -n "$($*-INCLUDE)" ]; then \ > > + cat $($*-INCLUDE); \ > > + fi; \ > > + $(PYTHON) $(..)sysdeps/x86_64/fpu/scripts/bench_libmvec.py > $(basename $(@F)); } > $@-tmp > > + mv -f $@-tmp $@ > > +endif > > diff --git a/sysdeps/x86_64/fpu/bench-libmvec-skeleton.c > b/sysdeps/x86_64/fpu/bench-libmvec-skeleton.c > > new file mode 100644 > > index 0000000000..6df1b221b8 > > --- /dev/null > > +++ b/sysdeps/x86_64/fpu/bench-libmvec-skeleton.c > > @@ -0,0 +1,105 @@ > > +/* Skeleton for libmvec benchmark programs. > > + Copyright (C) 2021 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + . */ > > + > > +#include > > +#include > > +#include > > +#include > > +#include > > +#include > > +#include > > +#include > > +#include > > + > > +#include > > +#include > > + > > +int > > +main (int argc, char **argv) > > +{ > > + unsigned long i, k; > > + timing_t start, end; > > + json_ctx_t json_ctx; > > + > > +#if defined REQUIRE_AVX > > + if (!CPU_FEATURE_ACTIVE (AVX)) > > + { > > + printf ("AVX not supported.\n"); > > + return 0; > > + } > > +#elif defined REQUIRE_AVX2 > > + if (!CPU_FEATURE_ACTIVE (AVX2)) > > + { > > + printf ("AVX2 not supported.\n"); > > + return 0; > > + } > > +#elif defined REQUIRE_AVX512F > > + if (!CPU_FEATURE_ACTIVE (AVX512F)) > > + { > > + printf ("AVX512F not supported.\n"); > > + return 0; > > + } > > +#endif > > + > > + bench_start (); > > + > > + unsigned long d_iters = 10000; > > Think this might be best off as a defined constant. > Agree, will fix it in v2. > > + > > +#ifdef BENCH_INIT > > + BENCH_INIT (); > > +#endif > > + > > + json_init (&json_ctx, 2, stdout); > > + > > + /* Begin function. */ > > + json_attr_object_begin (&json_ctx, FUNCNAME); > > + > > + for (int v = 0; v < NUM_VARIANTS; v++) > > + { > > + double d_total_time = 0; > > + uint64_t cur; > > + for (k = 0; k < d_iters; k++) > > + { > > + TIMING_NOW (start); > > + for (i = 0; i < NUM_SAMPLES (v); i++) > > + BENCH_FUNC (v, i); > > + TIMING_NOW (end); > > + > > + TIMING_DIFF (cur, start, end); > > + > > + d_total_time += cur; > > + > > + } > > + double d_total_data_set = d_iters * NUM_SAMPLES (v) * STRIDE; > > + > > + /* Begin variant. */ > > + json_attr_object_begin (&json_ctx, VARIANT (v)); > > + > > + json_attr_double (&json_ctx, "duration", d_total_time); > > + json_attr_double (&json_ctx, "iterations", d_total_data_set); > > + json_attr_double (&json_ctx, "mean", d_total_time / > d_total_data_set); > > + > > + /* End variant. */ > > + json_attr_object_end (&json_ctx); > > + } > > + > > + /* End function. */ > > + json_attr_object_end (&json_ctx); > > + > > + return 0; > > +} > > diff --git a/sysdeps/x86_64/fpu/scripts/bench_libmvec.py > b/sysdeps/x86_64/fpu/scripts/bench_libmvec.py > > new file mode 100755 > > index 0000000000..762865de8f > > --- /dev/null > > +++ b/sysdeps/x86_64/fpu/scripts/bench_libmvec.py > > @@ -0,0 +1,464 @@ > > +#!/usr/bin/python3 > > +# Copyright (C) 2021 Free Software Foundation, Inc. > > +# This file is part of the GNU C Library. > > +# > > +# The GNU C Library is free software; you can redistribute it and/or > > +# modify it under the terms of the GNU Lesser General Public > > +# License as published by the Free Software Foundation; either > > +# version 2.1 of the License, or (at your option) any later version. > > +# > > +# The GNU C Library is distributed in the hope that it will be useful, > > +# but WITHOUT ANY WARRANTY; without even the implied warranty of > > +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > +# Lesser General Public License for more details. > > +# > > +# You should have received a copy of the GNU Lesser General Public > > +# License along with the GNU C Library; if not, see > > +# . > > + > > +"""Benchmark program generator script > > + > > +This script takes a function name as input and generates a program using > > +an libmvec input file located in the sysdeps/x86_64/fpu directory. The > > +name of the input file should be of the form libmvec-foo-inputs where > > +'foo' is the name of the function. > > +""" > > + > > +from __future__ import print_function > > +import sys > > +import os > > +import itertools > > +import re > > + > > +# Macro definitions for functions that take no arguments. For functions > > +# that take arguments, the STRUCT_TEMPLATE, ARGS_TEMPLATE and > > +# VARIANTS_TEMPLATE are used instead. > > +DEFINES_TEMPLATE = ''' > > +#define CALL_BENCH_FUNC(v, i) %(func)s () > > +#define NUM_VARIANTS (1) > > +#define NUM_SAMPLES(v) (1) > > +#define VARIANT(v) FUNCNAME "()" > > +''' > > + > > +# Structures to store arguments for the function call. A function may > > +# have its inputs partitioned to represent distinct performance > > +# characteristics or distinct flavors of the function. Each such > > +# variant is represented by the _VARIANT structure. The ARGS structure > > +# represents a single set of arguments. > > +BENCH_VEC_TEMPLATE = ''' > > +#define CALL_BENCH_FUNC(v, i) (__extension__ ({ \\ > > + %(defs)s mx0 = %(func)s (%(func_args)s); \\ > > + mx0; })) > > +''' > > + > > +BENCH_SCALAR_TEMPLATE = ''' > > +#define CALL_BENCH_FUNC(v, i) %(func)s (%(func_args)s) > > +''' > > + > > +STRUCT_TEMPLATE = '''struct args > > +{ > > +%(args)s > > + double timing; > > +}; > > + > > +struct _variants > > +{ > > + const char *name; > > + int count; > > + struct args *in; > > +}; > > +''' > > + > > +# The actual input arguments. > > +ARGS_TEMPLATE = '''struct args in%(argnum)d[%(num_args)d] = { > > +%(args)s > > +}; > > +''' > > + > > +# The actual variants, along with macros defined to access the variants. > > +VARIANTS_TEMPLATE = '''struct _variants variants[%(num_variants)d] = { > > +%(variants)s > > +}; > > + > > +#define NUM_VARIANTS %(num_variants)d > > +#define NUM_SAMPLES(i) (variants[i].count) > > +#define VARIANT(i) (variants[i].name) > > +''' > > + > > +# Epilogue for the generated source file. > > +EPILOGUE = ''' > > +#define BENCH_FUNC(i, j) ({%(getret)s CALL_BENCH_FUNC (i, j);}) > > +#define FUNCNAME "%(func)s" > > +#include ''' > > + > > + > > +def gen_source(func_types, directives, all_vals): > > + """Generate source for the function > > + > > + Generate the C source for the function from the values and > > + directives. > > + > > + Args: > > + func: The function name > > + directives: A dictionary of directives applicable to this function > > + all_vals: A dictionary input values > > + """ > > + # The includes go in first. > > + for header in directives['includes']: > > + print('#include <%s>' % header) > > + > > + for header in directives['include-sources']: > > + print('#include "%s"' % header) > > + > > + argtype_vtable = { > > + 2: '128', > > + 4: '256', > > + 8: '512' > > + } > > + prefix_vtable = { > > + 2: 'b', > > + 4: 'c', > > + 8: 'e' > > + } > > + > > + # Get all the function properties > > + funcname_argtype = '' > > + float_flag = False > > + if func_types[1] == 'float': > > + float_flag = True > > + avx_flag = False > > + if func_types[3] == 'avx2': > > + avx_flag = True > > + funcname_stride = int(func_types[2][4:]) > > + funcname_origin = func_types[-1] > > + if float_flag: > > + funcname_origin = funcname_origin[:-1] > > + > > + if funcname_stride == 1: > > + # Prepare for scalar functions file generation > > + funcname_prefix = '' > > + funcname_prefix_1 = '' > > + funcname_argtype = 'double' > > + if float_flag: > > + funcname_argtype = 'float' > > + else: > > + # Prepare for libmvec functions file generation > > + funcname_prefix_1 = len(directives['args']) * 'v' + '_' > > + aligned_stride = funcname_stride > > + if float_flag: > > + aligned_stride /= 2 > > + funcname_prefix = '_ZGV' > > + if (avx_flag and (aligned_stride == 4)): > > + funcname_prefix += 'd' > > + else: > > + funcname_prefix += prefix_vtable[aligned_stride] > > + funcname_prefix = funcname_prefix + 'N' + func_types[2][4:] > > + funcname_argtype = '__m' + argtype_vtable[aligned_stride] > > + if not float_flag: > > + funcname_argtype += 'd' > > + > > + # Include x86intrin.h for vector functions > > + if not funcname_stride == 1: > > + print('#include ') > > + if (avx_flag and (aligned_stride == 4)): > > + # For bench-float-vlen8-avx2* and bench-double-vlen4-avx2* > > + print('#define REQUIRE_AVX2') > > + elif aligned_stride == 8: > > + # For bench-float-vlen16* and bench-double-vlen8* > > + print('#define REQUIRE_AVX512F') > > + elif aligned_stride == 4: > > + # For bench-float-vlen8* and bench-double-vlen4* without avx2 > > + print('#define REQUIRE_AVX') > > + else: > > + print('#define FUNCTYPE %s' % funcname_argtype) > > + > > + print('#define STRIDE %d ' % funcname_stride) > > + > > + funcname = funcname_prefix + funcname_prefix_1 + funcname_origin > > + if float_flag: > > + funcname += 'f' > > + > > + funcname_rettype = funcname_argtype > > + if directives['ret'] == '': > > + funcname_rettype = 'void' > > + > > + funcname_inputtype = [] > > + for arg, i in zip(directives['args'], itertools.count()): > > + funcname_inputtype.append(funcname_argtype) > > + if arg[0] == '<' and arg[-1] == '>': > > + pos = arg.rfind('*') > > + if pos == -1: > > + die('Output argument must be a pointer type') > > + funcname_inputtype[i] += ' *' > > + > > + if not funcname_stride == 1: > > + if len(directives['args']) == 2: > > + print('extern %s %s (%s, %s);' % (funcname_rettype, funcname, > funcname_inputtype[0], funcname_inputtype[1])) > > + elif len(directives['args']) == 3: > > + print('extern %s %s (%s, %s, %s);' % (funcname_rettype, funcname, > funcname_inputtype[0], funcname_inputtype[1], funcname_inputtype[2])) > > + else: > > + print('extern %s %s (%s);' % (funcname_rettype, funcname, > funcname_inputtype[0])) > > + > > + # Print macros. This branches out to a separate routine if > > + # the function takes arguments. > > + if not directives['args']: > > + print(DEFINES_TEMPLATE % {'funcname': funcname}) > > + outargs = [] > > + else: > > + outargs = _print_arg_data(funcname, float_flag, funcname_argtype, > funcname_stride, directives, all_vals) > > + > > + # Print the output variable definitions if necessary. > > + for out in outargs: > > + print(out) > > + > > + # If we have a return value from the function, make sure it is > > + # assigned to prevent the compiler from optimizing out the > > + # call. > > + getret = '' > > + > > + if directives['ret']: > > + if funcname_argtype != '': > > + print('static %s volatile ret;' % funcname_argtype) > > + getret = 'ret =' > > + else: > > + print('static %s volatile ret;' % directives['ret']) > > + getret = 'ret =' > > + > > + # Test initialization. > > + if directives['init']: > > + print('#define BENCH_INIT %s' % directives['init']) > > + > > + print(EPILOGUE % {'getret': getret, 'func': funcname}) > > + > > + > > +def _print_arg_data(func, float_flag, funcname_argtype, > funcname_stride, directives, all_vals): > > + """Print argument data > > + > > + This is a helper function for gen_source that prints structure and > > + values for arguments and their variants and returns output arguments > > + if any are found. > > + > > + Args: > > + func: Function name > > + float_flag: True if function is float type > > + funcname_argtype: Type for vector variants > > + funcname_stride: Vector Length > > + directives: A dictionary of directives applicable to this function > > + all_vals: A dictionary input values > > + > > + Returns: > > + Returns a list of definitions for function arguments that act as > > + output parameters. > > + """ > > + # First, all of the definitions. We process writing of > > + # CALL_BENCH_FUNC, struct args and also the output arguments > > + # together in a single traversal of the arguments list. > > + func_args = [] > > + _func_args = [] > > + arg_struct = [] > > + outargs = [] > > + # Conversion function for each type > > + vtable = { > > + '__m128d': '_mm_loadu_pd', > > + '__m256d': '_mm256_loadu_pd', > > + '__m512d': '_mm512_loadu_pd', > > + '__m128': '_mm_loadu_ps', > > + '__m256': '_mm256_loadu_ps', > > + '__m512': '_mm512_loadu_ps', > > + 'double': '', > > + 'float': '' > > + } > > + > > + # For double max_vlen=8, for float max_vlen=16. > > + if float_flag == True: > > + max_vlen = 16 > > + else: > > + max_vlen = 8 > > + > > + for arg, i in zip(directives['args'], itertools.count()): > > + if arg[0] == '<' and arg[-1] == '>': > > + outargs.append('static %s out%d __attribute__((used));' % > (funcname_argtype, i)) > > + func_args.append('&out%d' % i) > > + _func_args.append('&out%d' % i) > > + else: > > + arg_struct.append(' %s arg%d[STRIDE];' % (arg, i)) > > + func_args.append('%s (variants[v].in[i].arg%d)' % > > + (vtable[funcname_argtype], i)) > > + _func_args.append('variants[v].in[i].arg%d[0]' % i) > > + > > + if funcname_stride == 1: > > + print(BENCH_SCALAR_TEMPLATE % {'func': func, > > + 'func_args': ', '.join(_func_args)}) > > + elif directives['ret'] == '': > > + print(BENCH_SCALAR_TEMPLATE % {'func': func, > > + 'func_args': ', '.join(func_args)}) > > + else: > > + print(BENCH_VEC_TEMPLATE % {'func': func, 'func_args': ', > '.join(func_args), > > + 'defs': funcname_argtype}) > > + print(STRUCT_TEMPLATE % {'args': '\n'.join(arg_struct)}) > > + > > + # Now print the values. > > + variants = [] > > + for (k, _vals), i in zip(all_vals.items(), itertools.count()): > > + vals = [] > > + temp_vals = [] > > + j = 0 > > + temp_j = 0 > > + result_v = ['', '', ''] > > + for _v in _vals: > > + nums = _v.split(',') > > + for l in range(0, len(nums)): > > + result_v[l] = result_v[l] + nums[l].strip() + ',' > > + j += 1 > > + temp_j += 1 > > + > > + if temp_j == funcname_stride: > > + final_result = '' > > + for l in range(0, len(nums)): > > + final_result = final_result + '{' + result_v[l][:-1] + '},' > > + temp_vals.append(final_result[:-1]) > > + temp_j = 0 > > + result_v = ['', '', ''] > > + > > + # Make sure amount of test data is multiple of max_vlen > > + # to keep data size same for all vector length. > > + if j == max_vlen: > > + vals.extend(temp_vals) > > + temp_vals = [] > > + j = 0 > > + > > + out = [' {%s, 0},' % v for v in vals] > > + > > + # Members for the variants structure list that we will > > + # print later. > > + variants.append(' {"%s", %d, in%d},' % (k, len(vals), i)) > > + print(ARGS_TEMPLATE % {'argnum': i, 'num_args': len(vals), > > + 'args': '\n'.join(out)}) > > + > > + # Print the variants and the last set of macros. > > + print(VARIANTS_TEMPLATE % {'num_variants': len(all_vals), > > + 'variants': '\n'.join(variants)}) > > + return outargs > > + > > + > > +def _process_directive(d_name, d_val, func_args): > > + """Process a directive. > > + > > + Evaluate the directive name and value passed and return the > > + processed value. This is a helper function for parse_file. > > + > > + Args: > > + d_name: Name of the directive > > + d_val: The string value to process > > + > > + Returns: > > + The processed value, which may be the string as it is or an object > > + that describes the directive. > > + """ > > + # Process the directive values if necessary. name and ret don't > > + # need any processing. > > + if d_name.startswith('include'): > > + d_val = d_val.split(',') > > + elif d_name == 'args': > > + d_val = d_val.split(':') > > + # Check if args type match > > + if not d_val[0] == func_args: > > + die("Args mismatch, should be %s, but get %s" % (d_val[0], > func_args)) > > + > > + # Return the values. > > + return d_val > > + > > + > > +def parse_file(func_types): > > + """Parse an input file > > + > > + Given a function name, open and parse an input file for the function > > + and get the necessary parameters for the generated code and the list > > + of inputs. > > + > > + Args: > > + func: The function name > > + > > + Returns: > > + A tuple of two elements, one a dictionary of directives and the > > + other a dictionary of all input values. > > + """ > > + all_vals = {} > > + # Valid directives. > > + directives = { > > + 'name': '', > > + 'args': [], > > + 'includes': [], > > + 'include-sources': [], > > + 'ret': '', > > + 'init': '' > > + } > > + > > + func = func_types[-1] > > + try: > > + with open('../sysdeps/x86_64/fpu/libmvec-%s-inputs' % func) as f: > > + for line in f: > > + # Look for directives and parse it if found. > > + if line.startswith('##'): > > + try: > > + d_name, d_val = line[2:].split(':', 1) > > + d_name = d_name.strip() > > + d_val = d_val.strip() > > + directives[d_name] = _process_directive(d_name, d_val, > func_types[1]) > > + except (IndexError, KeyError): > > + die('Invalid directive: %s' % line[2:]) > > + > > + # Skip blank lines and comments. > > + line = line.split('#', 1)[0].rstrip() > > + if not line: > > + continue > > + > > + # Otherwise, we're an input. Add to the appropriate > > + # input set. > > + cur_name = directives['name'] > > + all_vals.setdefault(cur_name, []) > > + all_vals[cur_name].append(line) > > + except IOError as ex: > > + die("Failed to open input file (%s): %s" % (ex.filename, > ex.strerror)) > > + > > + return directives, all_vals > > + > > + > > +def die(msg): > > + """Exit with an error > > + > > + Prints an error message to the standard error stream and exits with > > + a non-zero status. > > + > > + Args: > > + msg: The error message to print to standard error > > + """ > > + print('%s\n' % msg, file=sys.stderr) > > + sys.exit(os.EX_DATAERR) > > + > > + > > +def main(args): > > + """Main function > > + > > + Use the first command line argument as function name and parse its > > + input file to generate C source that calls the function repeatedly > > + for the input. > > + > > + Args: > > + args: The command line arguments with the program name dropped > > + > > + Returns: > > + os.EX_USAGE on error and os.EX_OK on success. > > + """ > > + if len(args) != 1: > > + print('Usage: %s ' % sys.argv[0]) > > + return os.EX_USAGE > > + > > + func_types = args[0].split('-') > > + directives, all_vals = parse_file(func_types) > > + gen_source(func_types, directives, all_vals) > > + return os.EX_OK > > + > > + > > +if __name__ == '__main__': > > + sys.exit(main(sys.argv[1:])) > > -- > > 2.31.1 > > >