From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-pj1-x102a.google.com (mail-pj1-x102a.google.com [IPv6:2607:f8b0:4864:20::102a]) by sourceware.org (Postfix) with ESMTPS id E76073858413 for ; Fri, 12 Nov 2021 21:02:40 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org E76073858413 Received: by mail-pj1-x102a.google.com with SMTP id n15-20020a17090a160f00b001a75089daa3so8175169pja.1 for ; Fri, 12 Nov 2021 13:02:40 -0800 (PST) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:mime-version:references:in-reply-to:from:date :message-id:subject:to:cc; bh=nDU8Axn9E4TrDDCpXk3W8oO3OFT0FtflfOKx7YbJMFQ=; b=sUOCnTYuej/vG9RYMx+1DsA98zNfXpshDC5AeLS80un5vxOA7S2CjuLxXX+xKznKkp cK29xDg7v9UP7pSgF4EBwYHrVQvF6jS6Mw93RRwIX6CwWC1NTWVXV9cLGLIlWVHu1/V6 MQmd4l8C0tawkHcxTo6bFf3M7+mi+xnPDugHEmFjh63wjoXv0301/O8AXhn+113nyJNN VN4P5Sa0WCqjBciKLuG/7x4CVcWJWdlfEMiQS5U5h2qM6C/wq8o6Ql/r9Nv9RKdMX5CK 4YuEZGFbiJZUQ5lQ18+GsDEDngud6NOTmGNJf729Fk8OeeLGz5Z/Lh+8arHlfV2qXV3m fJzw== X-Gm-Message-State: AOAM532r/D53sjpc8iVvtQvDABkomJtPEt0Tu1BHFeON6bVLU6tTpXeT +RGszvJtDHtCZoYXwAwvTziDw/8ak69FUnICQ4S+aXQlykg= X-Google-Smtp-Source: ABdhPJyjnC/lEnjFHs1oCAZ3jFHwIyEK9vHRwpxMRjMt0USxtMp0ysCQtiAgtMsZKUj6NtXCrHjTOeC7qAzaPUI4qLA= X-Received: by 2002:a17:90a:bf0b:: with SMTP id c11mr21414852pjs.208.1636750959876; Fri, 12 Nov 2021 13:02:39 -0800 (PST) MIME-Version: 1.0 References: <20211112191800.790574-1-skpgkp2@gmail.com> <20211112191800.790574-2-skpgkp2@gmail.com> In-Reply-To: <20211112191800.790574-2-skpgkp2@gmail.com> From: Noah Goldstein Date: Fri, 12 Nov 2021 15:02:28 -0600 Message-ID: Subject: Re: [PATCH v2 1/6] x86-64: Create microbenchmark infrastructure for libmvec To: Sunil K Pandey Cc: GNU C Library Content-Type: text/plain; charset="UTF-8" X-Spam-Status: No, score=-9.8 required=5.0 tests=BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, DKIM_VALID_EF, FREEMAIL_FROM, GIT_PATCH_0, KAM_SHORT, RCVD_IN_DNSWL_NONE, SPF_HELO_NONE, SPF_PASS, TXREP autolearn=ham autolearn_force=no version=3.4.4 X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on server2.sourceware.org X-BeenThere: libc-alpha@sourceware.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Libc-alpha mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 12 Nov 2021 21:02:44 -0000 On Fri, Nov 12, 2021 at 1:19 PM Sunil K Pandey via Libc-alpha wrote: > > Add python script to generate libmvec microbenchmark from the input > values for each libmvec function using skeleton benchmark template. > > Creates double and float benchmarks with vector length 1, 2, 4, 8, > and 16 for each libmvec function. Vector length 1 corresponds to > scalar version of function and is included for vector function perf > comparison. > --- > sysdeps/x86_64/fpu/Makeconfig | 35 ++ > sysdeps/x86_64/fpu/Makefile | 40 ++ > sysdeps/x86_64/fpu/bench-libmvec-skeleton.c | 104 +++++ > sysdeps/x86_64/fpu/scripts/bench_libmvec.py | 464 ++++++++++++++++++++ > 4 files changed, 643 insertions(+) > create mode 100644 sysdeps/x86_64/fpu/bench-libmvec-skeleton.c > create mode 100755 sysdeps/x86_64/fpu/scripts/bench_libmvec.py > > diff --git a/sysdeps/x86_64/fpu/Makeconfig b/sysdeps/x86_64/fpu/Makeconfig > index 24aaee1a43..503e9b5ffa 100644 > --- a/sysdeps/x86_64/fpu/Makeconfig > +++ b/sysdeps/x86_64/fpu/Makeconfig > @@ -29,6 +29,23 @@ libmvec-funcs = \ > sin \ > sincos \ > > +# Define libmvec function for benchtests directory. > +libmvec-bench-funcs = \ > + > +bench-libmvec-double = \ > + $(addprefix double-vlen1-, $(libmvec-bench-funcs)) \ > + $(addprefix double-vlen2-, $(libmvec-bench-funcs)) \ > + $(addprefix double-vlen4-, $(libmvec-bench-funcs)) \ > + $(addprefix double-vlen4-avx2-, $(libmvec-bench-funcs)) \ > + $(addprefix double-vlen8-, $(libmvec-bench-funcs)) \ > + > +bench-libmvec-float = \ > + $(addsuffix f, $(addprefix float-vlen1-, $(libmvec-bench-funcs))) \ > + $(addsuffix f, $(addprefix float-vlen4-, $(libmvec-bench-funcs))) \ > + $(addsuffix f, $(addprefix float-vlen8-, $(libmvec-bench-funcs))) \ > + $(addsuffix f, $(addprefix float-vlen8-avx2-, $(libmvec-bench-funcs))) \ > + $(addsuffix f, $(addprefix float-vlen16-, $(libmvec-bench-funcs))) \ > + > # The base libmvec ABI tests. > libmvec-abi-func-tests = \ > $(addprefix test-double-libmvec-,$(libmvec-funcs)) \ > @@ -83,5 +100,23 @@ $(common-objpfx)libmvec.mk: $(common-objpfx)config.make > echo " \$$(float-vlen16-arch-ext-cflags)"; \ > echo; \ > done; \ > + echo "endif"; \ > + echo "ifeq (\$$(subdir),benchtests)"; \ > + for t in $(libmvec-bench-funcs); do \ > + echo "CFLAGS-bench-double-vlen4-$$t.c = \\"; \ > + echo " \$$(double-vlen4-arch-ext-cflags)"; \ > + echo "CFLAGS-bench-double-vlen4-avx2-$$t.c = \\"; \ > + echo " \$$(double-vlen4-arch-ext2-cflags)"; \ > + echo "CFLAGS-bench-double-vlen8-$$t.c = \\"; \ > + echo " \$$(double-vlen8-arch-ext-cflags)"; \ > + echo; \ > + echo "CFLAGS-bench-float-vlen8-$${t}f.c = \\"; \ > + echo " \$$(float-vlen8-arch-ext-cflags)"; \ > + echo "CFLAGS-bench-float-vlen8-avx2-$${t}f.c = \\"; \ > + echo " \$$(float-vlen8-arch-ext2-cflags)"; \ > + echo "CFLAGS-bench-float-vlen16-$${t}f.c = \\"; \ > + echo " \$$(float-vlen16-arch-ext-cflags)"; \ > + echo; \ > + done; \ > echo "endif") > $@T > mv -f $@T $@ > diff --git a/sysdeps/x86_64/fpu/Makefile b/sysdeps/x86_64/fpu/Makefile > index d172ae815d..9fb587cf8f 100644 > --- a/sysdeps/x86_64/fpu/Makefile > +++ b/sysdeps/x86_64/fpu/Makefile > @@ -72,3 +72,43 @@ ifeq ($(subdir)$(config-cflags-mprefer-vector-width),mathyes) > # performance of sin and cos by more than 40% on Skylake. > CFLAGS-branred.c = -mprefer-vector-width=128 > endif > + > +ifeq ($(subdir),benchtests) > +double-vlen4-arch-ext-cflags = -mavx > +double-vlen4-arch-ext2-cflags = -mavx2 > +double-vlen8-arch-ext-cflags = -mavx512f > + > +float-vlen8-arch-ext-cflags = -mavx > +float-vlen8-arch-ext2-cflags = -mavx2 > +float-vlen16-arch-ext-cflags = -mavx512f > + > +bench-libmvec := $(bench-libmvec-double) $(bench-libmvec-float) > + > +ifeq (${BENCHSET},) > +bench += $(bench-libmvec) > +endif > + > +ifeq (${STATIC-BENCHTESTS},yes) > +libmvec-benchtests = $(common-objpfx)mathvec/libmvec.a $(common-objpfx)math/libm.a > +else > +libmvec-benchtests = $(libmvec) $(libm) > +endif > + > +$(addprefix $(objpfx)bench-,$(bench-libmvec-double)): $(libmvec-benchtests) > +$(addprefix $(objpfx)bench-,$(bench-libmvec-float)): $(libmvec-benchtests) > +bench-libmvec-deps = $(..)sysdeps/x86_64/fpu/bench-libmvec-skeleton.c bench-timing.h Makefile > + > +$(objpfx)bench-float-%.c: $(bench-libmvec-deps) > + { if [ -n "$($*-INCLUDE)" ]; then \ > + cat $($*-INCLUDE); \ > + fi; \ > + $(PYTHON) $(..)sysdeps/x86_64/fpu/scripts/bench_libmvec.py $(basename $(@F)); } > $@-tmp > + mv -f $@-tmp $@ > + > +$(objpfx)bench-double-%.c: $(bench-libmvec-deps) > + { if [ -n "$($*-INCLUDE)" ]; then \ > + cat $($*-INCLUDE); \ > + fi; \ > + $(PYTHON) $(..)sysdeps/x86_64/fpu/scripts/bench_libmvec.py $(basename $(@F)); } > $@-tmp > + mv -f $@-tmp $@ > +endif > diff --git a/sysdeps/x86_64/fpu/bench-libmvec-skeleton.c b/sysdeps/x86_64/fpu/bench-libmvec-skeleton.c > new file mode 100644 > index 0000000000..d56a0c4462 > --- /dev/null > +++ b/sysdeps/x86_64/fpu/bench-libmvec-skeleton.c > @@ -0,0 +1,104 @@ > +/* Skeleton for libmvec benchmark programs. > + Copyright (C) 2021 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + . */ > + > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > + > +#include > +#include > +#define D_ITERS 10000 > + > +int > +main (int argc, char **argv) > +{ > + unsigned long i, k; > + timing_t start, end; > + json_ctx_t json_ctx; > + > +#if defined REQUIRE_AVX > + if (!CPU_FEATURE_ACTIVE (AVX)) > + { > + printf ("AVX not supported.\n"); > + return 0; > + } > +#elif defined REQUIRE_AVX2 > + if (!CPU_FEATURE_ACTIVE (AVX2)) > + { > + printf ("AVX2 not supported.\n"); > + return 0; > + } > +#elif defined REQUIRE_AVX512F > + if (!CPU_FEATURE_ACTIVE (AVX512F)) > + { > + printf ("AVX512F not supported.\n"); > + return 0; > + } > +#endif > + > + bench_start (); > + > +#ifdef BENCH_INIT > + BENCH_INIT (); > +#endif > + > + json_init (&json_ctx, 2, stdout); > + > + /* Begin function. */ > + json_attr_object_begin (&json_ctx, FUNCNAME); > + > + for (int v = 0; v < NUM_VARIANTS; v++) > + { > + double d_total_time = 0; > + uint64_t cur; Think these should also be type `timing_t` > + for (k = 0; k < D_ITERS; k++) > + { > + TIMING_NOW (start); > + for (i = 0; i < NUM_SAMPLES (v); i++) What is the rationale for both `D_ITERS` and `NUM_SAMPLES (v)`? Why not one loop that iterates for `D_ITERS * NUM_SAMPLES (v)`? > + BENCH_FUNC (v, i); > + TIMING_NOW (end); > + > + TIMING_DIFF (cur, start, end); > + > + d_total_time += cur; Think this should be `TIMING_ACCUM(d_total_time, cur)`. > + > + } > + double d_total_data_set = D_ITERS * NUM_SAMPLES (v) * STRIDE; > + > + /* Begin variant. */ > + json_attr_object_begin (&json_ctx, VARIANT (v)); > + > + json_attr_double (&json_ctx, "duration", d_total_time); > + json_attr_double (&json_ctx, "iterations", d_total_data_set); > + json_attr_double (&json_ctx, "mean", d_total_time / d_total_data_set); > + > + /* End variant. */ > + json_attr_object_end (&json_ctx); > + } > + > + /* End function. */ > + json_attr_object_end (&json_ctx); > + > + return 0; > +} > diff --git a/sysdeps/x86_64/fpu/scripts/bench_libmvec.py b/sysdeps/x86_64/fpu/scripts/bench_libmvec.py > new file mode 100755 > index 0000000000..762865de8f > --- /dev/null > +++ b/sysdeps/x86_64/fpu/scripts/bench_libmvec.py > @@ -0,0 +1,464 @@ > +#!/usr/bin/python3 > +# Copyright (C) 2021 Free Software Foundation, Inc. > +# This file is part of the GNU C Library. > +# > +# The GNU C Library is free software; you can redistribute it and/or > +# modify it under the terms of the GNU Lesser General Public > +# License as published by the Free Software Foundation; either > +# version 2.1 of the License, or (at your option) any later version. > +# > +# The GNU C Library is distributed in the hope that it will be useful, > +# but WITHOUT ANY WARRANTY; without even the implied warranty of > +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > +# Lesser General Public License for more details. > +# > +# You should have received a copy of the GNU Lesser General Public > +# License along with the GNU C Library; if not, see > +# . > + > +"""Benchmark program generator script > + > +This script takes a function name as input and generates a program using > +an libmvec input file located in the sysdeps/x86_64/fpu directory. The > +name of the input file should be of the form libmvec-foo-inputs where > +'foo' is the name of the function. > +""" > + > +from __future__ import print_function > +import sys > +import os > +import itertools > +import re > + > +# Macro definitions for functions that take no arguments. For functions > +# that take arguments, the STRUCT_TEMPLATE, ARGS_TEMPLATE and > +# VARIANTS_TEMPLATE are used instead. > +DEFINES_TEMPLATE = ''' > +#define CALL_BENCH_FUNC(v, i) %(func)s () > +#define NUM_VARIANTS (1) > +#define NUM_SAMPLES(v) (1) > +#define VARIANT(v) FUNCNAME "()" > +''' > + > +# Structures to store arguments for the function call. A function may > +# have its inputs partitioned to represent distinct performance > +# characteristics or distinct flavors of the function. Each such > +# variant is represented by the _VARIANT structure. The ARGS structure > +# represents a single set of arguments. > +BENCH_VEC_TEMPLATE = ''' > +#define CALL_BENCH_FUNC(v, i) (__extension__ ({ \\ > + %(defs)s mx0 = %(func)s (%(func_args)s); \\ > + mx0; })) > +''' > + > +BENCH_SCALAR_TEMPLATE = ''' > +#define CALL_BENCH_FUNC(v, i) %(func)s (%(func_args)s) > +''' > + > +STRUCT_TEMPLATE = '''struct args > +{ > +%(args)s > + double timing; > +}; > + > +struct _variants > +{ > + const char *name; > + int count; > + struct args *in; > +}; > +''' > + > +# The actual input arguments. > +ARGS_TEMPLATE = '''struct args in%(argnum)d[%(num_args)d] = { > +%(args)s > +}; > +''' > + > +# The actual variants, along with macros defined to access the variants. > +VARIANTS_TEMPLATE = '''struct _variants variants[%(num_variants)d] = { > +%(variants)s > +}; > + > +#define NUM_VARIANTS %(num_variants)d > +#define NUM_SAMPLES(i) (variants[i].count) > +#define VARIANT(i) (variants[i].name) > +''' > + > +# Epilogue for the generated source file. > +EPILOGUE = ''' > +#define BENCH_FUNC(i, j) ({%(getret)s CALL_BENCH_FUNC (i, j);}) > +#define FUNCNAME "%(func)s" > +#include ''' > + > + > +def gen_source(func_types, directives, all_vals): > + """Generate source for the function > + > + Generate the C source for the function from the values and > + directives. > + > + Args: > + func: The function name > + directives: A dictionary of directives applicable to this function > + all_vals: A dictionary input values > + """ > + # The includes go in first. > + for header in directives['includes']: > + print('#include <%s>' % header) > + > + for header in directives['include-sources']: > + print('#include "%s"' % header) > + > + argtype_vtable = { > + 2: '128', > + 4: '256', > + 8: '512' > + } > + prefix_vtable = { > + 2: 'b', > + 4: 'c', > + 8: 'e' > + } > + > + # Get all the function properties > + funcname_argtype = '' > + float_flag = False > + if func_types[1] == 'float': > + float_flag = True > + avx_flag = False > + if func_types[3] == 'avx2': > + avx_flag = True > + funcname_stride = int(func_types[2][4:]) > + funcname_origin = func_types[-1] > + if float_flag: > + funcname_origin = funcname_origin[:-1] > + > + if funcname_stride == 1: > + # Prepare for scalar functions file generation > + funcname_prefix = '' > + funcname_prefix_1 = '' > + funcname_argtype = 'double' > + if float_flag: > + funcname_argtype = 'float' > + else: > + # Prepare for libmvec functions file generation > + funcname_prefix_1 = len(directives['args']) * 'v' + '_' > + aligned_stride = funcname_stride > + if float_flag: > + aligned_stride /= 2 > + funcname_prefix = '_ZGV' > + if (avx_flag and (aligned_stride == 4)): > + funcname_prefix += 'd' > + else: > + funcname_prefix += prefix_vtable[aligned_stride] > + funcname_prefix = funcname_prefix + 'N' + func_types[2][4:] > + funcname_argtype = '__m' + argtype_vtable[aligned_stride] > + if not float_flag: > + funcname_argtype += 'd' > + > + # Include x86intrin.h for vector functions > + if not funcname_stride == 1: > + print('#include ') > + if (avx_flag and (aligned_stride == 4)): > + # For bench-float-vlen8-avx2* and bench-double-vlen4-avx2* > + print('#define REQUIRE_AVX2') > + elif aligned_stride == 8: > + # For bench-float-vlen16* and bench-double-vlen8* > + print('#define REQUIRE_AVX512F') > + elif aligned_stride == 4: > + # For bench-float-vlen8* and bench-double-vlen4* without avx2 > + print('#define REQUIRE_AVX') > + else: > + print('#define FUNCTYPE %s' % funcname_argtype) > + > + print('#define STRIDE %d ' % funcname_stride) > + > + funcname = funcname_prefix + funcname_prefix_1 + funcname_origin > + if float_flag: > + funcname += 'f' > + > + funcname_rettype = funcname_argtype > + if directives['ret'] == '': > + funcname_rettype = 'void' > + > + funcname_inputtype = [] > + for arg, i in zip(directives['args'], itertools.count()): > + funcname_inputtype.append(funcname_argtype) > + if arg[0] == '<' and arg[-1] == '>': > + pos = arg.rfind('*') > + if pos == -1: > + die('Output argument must be a pointer type') > + funcname_inputtype[i] += ' *' > + > + if not funcname_stride == 1: > + if len(directives['args']) == 2: > + print('extern %s %s (%s, %s);' % (funcname_rettype, funcname, funcname_inputtype[0], funcname_inputtype[1])) > + elif len(directives['args']) == 3: > + print('extern %s %s (%s, %s, %s);' % (funcname_rettype, funcname, funcname_inputtype[0], funcname_inputtype[1], funcname_inputtype[2])) > + else: > + print('extern %s %s (%s);' % (funcname_rettype, funcname, funcname_inputtype[0])) > + > + # Print macros. This branches out to a separate routine if > + # the function takes arguments. > + if not directives['args']: > + print(DEFINES_TEMPLATE % {'funcname': funcname}) > + outargs = [] > + else: > + outargs = _print_arg_data(funcname, float_flag, funcname_argtype, funcname_stride, directives, all_vals) > + > + # Print the output variable definitions if necessary. > + for out in outargs: > + print(out) > + > + # If we have a return value from the function, make sure it is > + # assigned to prevent the compiler from optimizing out the > + # call. > + getret = '' > + > + if directives['ret']: > + if funcname_argtype != '': > + print('static %s volatile ret;' % funcname_argtype) > + getret = 'ret =' > + else: > + print('static %s volatile ret;' % directives['ret']) > + getret = 'ret =' > + > + # Test initialization. > + if directives['init']: > + print('#define BENCH_INIT %s' % directives['init']) > + > + print(EPILOGUE % {'getret': getret, 'func': funcname}) > + > + > +def _print_arg_data(func, float_flag, funcname_argtype, funcname_stride, directives, all_vals): > + """Print argument data > + > + This is a helper function for gen_source that prints structure and > + values for arguments and their variants and returns output arguments > + if any are found. > + > + Args: > + func: Function name > + float_flag: True if function is float type > + funcname_argtype: Type for vector variants > + funcname_stride: Vector Length > + directives: A dictionary of directives applicable to this function > + all_vals: A dictionary input values > + > + Returns: > + Returns a list of definitions for function arguments that act as > + output parameters. > + """ > + # First, all of the definitions. We process writing of > + # CALL_BENCH_FUNC, struct args and also the output arguments > + # together in a single traversal of the arguments list. > + func_args = [] > + _func_args = [] > + arg_struct = [] > + outargs = [] > + # Conversion function for each type > + vtable = { > + '__m128d': '_mm_loadu_pd', > + '__m256d': '_mm256_loadu_pd', > + '__m512d': '_mm512_loadu_pd', > + '__m128': '_mm_loadu_ps', > + '__m256': '_mm256_loadu_ps', > + '__m512': '_mm512_loadu_ps', > + 'double': '', > + 'float': '' > + } > + > + # For double max_vlen=8, for float max_vlen=16. > + if float_flag == True: > + max_vlen = 16 > + else: > + max_vlen = 8 > + > + for arg, i in zip(directives['args'], itertools.count()): > + if arg[0] == '<' and arg[-1] == '>': > + outargs.append('static %s out%d __attribute__((used));' % (funcname_argtype, i)) > + func_args.append('&out%d' % i) > + _func_args.append('&out%d' % i) > + else: > + arg_struct.append(' %s arg%d[STRIDE];' % (arg, i)) > + func_args.append('%s (variants[v].in[i].arg%d)' % > + (vtable[funcname_argtype], i)) > + _func_args.append('variants[v].in[i].arg%d[0]' % i) > + > + if funcname_stride == 1: > + print(BENCH_SCALAR_TEMPLATE % {'func': func, > + 'func_args': ', '.join(_func_args)}) > + elif directives['ret'] == '': > + print(BENCH_SCALAR_TEMPLATE % {'func': func, > + 'func_args': ', '.join(func_args)}) > + else: > + print(BENCH_VEC_TEMPLATE % {'func': func, 'func_args': ', '.join(func_args), > + 'defs': funcname_argtype}) > + print(STRUCT_TEMPLATE % {'args': '\n'.join(arg_struct)}) > + > + # Now print the values. > + variants = [] > + for (k, _vals), i in zip(all_vals.items(), itertools.count()): > + vals = [] > + temp_vals = [] > + j = 0 > + temp_j = 0 > + result_v = ['', '', ''] > + for _v in _vals: > + nums = _v.split(',') > + for l in range(0, len(nums)): > + result_v[l] = result_v[l] + nums[l].strip() + ',' > + j += 1 > + temp_j += 1 > + > + if temp_j == funcname_stride: > + final_result = '' > + for l in range(0, len(nums)): > + final_result = final_result + '{' + result_v[l][:-1] + '},' > + temp_vals.append(final_result[:-1]) > + temp_j = 0 > + result_v = ['', '', ''] > + > + # Make sure amount of test data is multiple of max_vlen > + # to keep data size same for all vector length. > + if j == max_vlen: > + vals.extend(temp_vals) > + temp_vals = [] > + j = 0 > + > + out = [' {%s, 0},' % v for v in vals] > + > + # Members for the variants structure list that we will > + # print later. > + variants.append(' {"%s", %d, in%d},' % (k, len(vals), i)) > + print(ARGS_TEMPLATE % {'argnum': i, 'num_args': len(vals), > + 'args': '\n'.join(out)}) > + > + # Print the variants and the last set of macros. > + print(VARIANTS_TEMPLATE % {'num_variants': len(all_vals), > + 'variants': '\n'.join(variants)}) > + return outargs > + > + > +def _process_directive(d_name, d_val, func_args): > + """Process a directive. > + > + Evaluate the directive name and value passed and return the > + processed value. This is a helper function for parse_file. > + > + Args: > + d_name: Name of the directive > + d_val: The string value to process > + > + Returns: > + The processed value, which may be the string as it is or an object > + that describes the directive. > + """ > + # Process the directive values if necessary. name and ret don't > + # need any processing. > + if d_name.startswith('include'): > + d_val = d_val.split(',') > + elif d_name == 'args': > + d_val = d_val.split(':') > + # Check if args type match > + if not d_val[0] == func_args: > + die("Args mismatch, should be %s, but get %s" % (d_val[0], func_args)) > + > + # Return the values. > + return d_val > + > + > +def parse_file(func_types): > + """Parse an input file > + > + Given a function name, open and parse an input file for the function > + and get the necessary parameters for the generated code and the list > + of inputs. > + > + Args: > + func: The function name > + > + Returns: > + A tuple of two elements, one a dictionary of directives and the > + other a dictionary of all input values. > + """ > + all_vals = {} > + # Valid directives. > + directives = { > + 'name': '', > + 'args': [], > + 'includes': [], > + 'include-sources': [], > + 'ret': '', > + 'init': '' > + } > + > + func = func_types[-1] > + try: > + with open('../sysdeps/x86_64/fpu/libmvec-%s-inputs' % func) as f: > + for line in f: > + # Look for directives and parse it if found. > + if line.startswith('##'): > + try: > + d_name, d_val = line[2:].split(':', 1) > + d_name = d_name.strip() > + d_val = d_val.strip() > + directives[d_name] = _process_directive(d_name, d_val, func_types[1]) > + except (IndexError, KeyError): > + die('Invalid directive: %s' % line[2:]) > + > + # Skip blank lines and comments. > + line = line.split('#', 1)[0].rstrip() > + if not line: > + continue > + > + # Otherwise, we're an input. Add to the appropriate > + # input set. > + cur_name = directives['name'] > + all_vals.setdefault(cur_name, []) > + all_vals[cur_name].append(line) > + except IOError as ex: > + die("Failed to open input file (%s): %s" % (ex.filename, ex.strerror)) > + > + return directives, all_vals > + > + > +def die(msg): > + """Exit with an error > + > + Prints an error message to the standard error stream and exits with > + a non-zero status. > + > + Args: > + msg: The error message to print to standard error > + """ > + print('%s\n' % msg, file=sys.stderr) > + sys.exit(os.EX_DATAERR) > + > + > +def main(args): > + """Main function > + > + Use the first command line argument as function name and parse its > + input file to generate C source that calls the function repeatedly > + for the input. > + > + Args: > + args: The command line arguments with the program name dropped > + > + Returns: > + os.EX_USAGE on error and os.EX_OK on success. > + """ > + if len(args) != 1: > + print('Usage: %s ' % sys.argv[0]) > + return os.EX_USAGE > + > + func_types = args[0].split('-') > + directives, all_vals = parse_file(func_types) > + gen_source(func_types, directives, all_vals) > + return os.EX_OK > + > + > +if __name__ == '__main__': > + sys.exit(main(sys.argv[1:])) > -- > 2.31.1 >