* [PATCH 4/4] [ARM] Add attribute/pragma target fpu= @ 2015-09-14 14:38 Christian Bruel 2015-09-14 19:50 ` Bernhard Reutner-Fischer 2015-09-15 10:48 ` Christian Bruel 0 siblings, 2 replies; 9+ messages in thread From: Christian Bruel @ 2015-09-14 14:38 UTC (permalink / raw) To: kyrylo.tkachov, Ramana.Radhakrishnan; +Cc: gcc-patches [-- Attachment #1: Type: text/plain, Size: 218 bytes --] Finally, the final part of the patch set does the attribute target parsing and checking, redefines the preprocessor macros and implements the inlining rules. testcases and documentation included. thanks Christian [-- Attachment #2: p4.patch --] [-- Type: text/x-patch, Size: 12045 bytes --] 2015-05-26 Christian Bruel <christian.bruel@st.com> PR target/65837 * config/arm/arm-c.c (arm_cpu_builtins): Set or reset __ARM_FEATURE_CRYPTO, __VFP_FP__, __ARM_NEON__ (arm_pragma_target_parse): Change check for arm_cpu_builtins. undefine __ARM_FP. * doc/invoke.texi (-mfpu=): Mention attribute and pragma. * doc/extend.texi (-mfpu=): Describe attribute. 2015-09-14 Christian Bruel <christian.bruel@st.com> PR target/65837 gcc.target/arm/lto/pr65837_0.c gcc.target/arm/attr-neon2.c gcc.target/arm/attr-neon.c gcc.target/arm/attr-neon-builtin-fail.c gcc.target/arm/attr-crypto.c diff -ruN gnu_trunk.p3/gcc/gcc/config/arm/arm.c gnu_trunk.p4/gcc/gcc/config/arm/arm.c --- gnu_trunk.p3/gcc/gcc/config/arm/arm.c 2015-09-11 16:26:33.869000746 +0200 +++ gnu_trunk.p4/gcc/gcc/config/arm/arm.c 2015-09-11 17:24:23.636876647 +0200 @@ -29486,11 +29486,42 @@ /* Hook to determine if one function can safely inline another. */ static bool -arm_can_inline_p (tree caller ATTRIBUTE_UNUSED, tree callee ATTRIBUTE_UNUSED) +arm_can_inline_p (tree caller, tree callee) { - /* Overidde default hook: Always OK to inline between different modes. - Function with mode specific instructions, e.g using asm, must be explicitely - protected with noinline. */ + tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller); + tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee); + + struct cl_target_option *caller_opts + = TREE_TARGET_OPTION (caller_tree ? caller_tree + : target_option_default_node); + + struct cl_target_option *callee_opts + = TREE_TARGET_OPTION (callee_tree ? callee_tree + : target_option_default_node); + + const struct arm_fpu_desc *fpu_desc1 + = &all_fpus[caller_opts->x_arm_fpu_index]; + const struct arm_fpu_desc *fpu_desc2 + = &all_fpus[callee_opts->x_arm_fpu_index]; + + /* Can't inline NEON extension if the caller doesn't support it. */ + if (ARM_FPU_FSET_HAS (fpu_desc2->features, FPU_FL_NEON) + && ! ARM_FPU_FSET_HAS (fpu_desc1->features, FPU_FL_NEON)) + return false; + + /* Can't inline CRYPTO extension if the caller doesn't support it. */ + if (ARM_FPU_FSET_HAS (fpu_desc2->features, FPU_FL_CRYPTO) + && ! ARM_FPU_FSET_HAS (fpu_desc1->features, FPU_FL_CRYPTO)) + return false; + + /* Need same model and regs. */ + if (fpu_desc2->model != fpu_desc1->model + || fpu_desc2->regs != fpu_desc1->regs) + return false; + + /* OK to inline between different modes. + Function with mode specific instructions, e.g using asm, + must be explicitely protected with noinline. */ return true; } @@ -29501,6 +29532,8 @@ static bool arm_valid_target_attribute_rec (tree args, struct gcc_options *opts) { + int ret=true; + if (TREE_CODE (args) == TREE_LIST) { bool ret = true; @@ -29518,30 +29551,35 @@ } char *argstr = ASTRDUP (TREE_STRING_POINTER (args)); - while (argstr && *argstr != '\0') + char *q; + + while ((q = strtok (argstr, ",")) != NULL) { - while (ISSPACE (*argstr)) - argstr++; + while (ISSPACE (*q)) ++q; - if (!strcmp (argstr, "thumb")) - { + argstr = NULL; + if (!strncmp (q, "thumb", 5)) opts->x_target_flags |= MASK_THUMB; - arm_option_check_internal (opts); - return true; - } - if (!strcmp (argstr, "arm")) - { + else if (!strncmp (q, "arm", 3)) opts->x_target_flags &= ~MASK_THUMB; - arm_option_check_internal (opts); - return true; + + else if (!strncmp (q, "fpu=", 4)) + { + if (! opt_enum_arg_to_value (OPT_mfpu_, q+4, + &opts->x_arm_fpu_index, CL_TARGET)) + { + error ("invalid fpu for attribute(target(\"%s\"))", q); + return false; + } } + else + warning (0, "attribute(target(\"%s\")) is unknown", argstr); - warning (0, "attribute(target(\"%s\")) is unknown", argstr); - return false; + arm_option_check_internal (opts); } - return false; + return ret; } /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */ diff -ruN gnu_trunk.p3/gcc/gcc/config/arm/arm-c.c gnu_trunk.p4/gcc/gcc/config/arm/arm-c.c --- gnu_trunk.p3/gcc/gcc/config/arm/arm-c.c 2015-09-11 16:25:32.180858606 +0200 +++ gnu_trunk.p4/gcc/gcc/config/arm/arm-c.c 2015-09-11 17:00:26.085645968 +0200 @@ -68,8 +68,8 @@ def_or_undef_macro (pfile, "__ARM_FEATURE_DSP", TARGET_DSP_MULTIPLY); def_or_undef_macro (pfile, "__ARM_FEATURE_QBIT", TARGET_ARM_QBIT); def_or_undef_macro (pfile, "__ARM_FEATURE_SAT", TARGET_ARM_SAT); - if (TARGET_CRYPTO) - builtin_define ("__ARM_FEATURE_CRYPTO"); + def_or_undef_macro (pfile, "__ARM_FEATURE_CRYPTO", TARGET_CRYPTO); + if (unaligned_access) builtin_define ("__ARM_FEATURE_UNALIGNED"); if (TARGET_CRC32) @@ -129,8 +129,7 @@ if (TARGET_SOFT_FLOAT) builtin_define ("__SOFTFP__"); - if (TARGET_VFP) - builtin_define ("__VFP_FP__"); + def_or_undef_macro (pfile, "__VFP_FP__", TARGET_VFP); if (TARGET_ARM_FP) builtin_define_with_int_value ("__ARM_FP", TARGET_ARM_FP); @@ -141,11 +140,9 @@ if (TARGET_FMA) builtin_define ("__ARM_FEATURE_FMA"); - if (TARGET_NEON) - { - builtin_define ("__ARM_NEON__"); - builtin_define ("__ARM_NEON"); - } + def_or_undef_macro (pfile, "__ARM_NEON__", TARGET_NEON); + def_or_undef_macro (pfile, "__ARM_NEON", TARGET_NEON); + if (TARGET_NEON_FP) builtin_define_with_int_value ("__ARM_NEON_FP", TARGET_NEON_FP); @@ -231,7 +228,7 @@ gcc_assert (prev_opt); gcc_assert (cur_opt); - if (cur_opt->x_target_flags != prev_opt->x_target_flags) + if (cur_opt != prev_opt) { /* For the definitions, ensure all newly defined macros are considered as used for -Wunused-macros. There is no point warning about the @@ -242,6 +239,8 @@ /* Update macros. */ gcc_assert (cur_opt->x_target_flags == target_flags); + /* This one can be redefined by the pragma without warning. */ + cpp_undef (parse_in, "__ARM_FP"); arm_cpu_builtins (parse_in); cpp_opts->warn_unused_macros = saved_warn_unused_macros; diff -ruN gnu_trunk.p3/gcc/gcc/doc/extend.texi gnu_trunk.p4/gcc/gcc/doc/extend.texi --- gnu_trunk.p3/gcc/gcc/doc/extend.texi 2015-09-07 13:35:20.777683005 +0200 +++ gnu_trunk.p4/gcc/gcc/doc/extend.texi 2015-09-14 13:58:49.271385001 +0200 @@ -3606,10 +3606,17 @@ @item arm @cindex @code{target("arm")} function attribute, ARM Force code generation in the ARM (A32) ISA. -@end table Functions from different modes can be inlined in the caller's mode. +@item fpu= +@cindex @code{target("fpu=")} function attribute, ARM +Specifies the fpu for which to tune the performance of this function. +The behavior and permissible arguments are the same as for the @option{-mfpu=} +command-line option. + +@end table + @end table @node AVR Function Attributes diff -ruN gnu_trunk.p3/gcc/gcc/doc/invoke.texi gnu_trunk.p4/gcc/gcc/doc/invoke.texi --- gnu_trunk.p3/gcc/gcc/doc/invoke.texi 2015-09-10 12:21:00.698911244 +0200 +++ gnu_trunk.p4/gcc/gcc/doc/invoke.texi 2015-09-14 10:27:20.281932581 +0200 @@ -13360,6 +13363,8 @@ floating-point arithmetic (in particular denormal values are treated as zero), so the use of NEON instructions may lead to a loss of precision. +You can also set the fpu name at function level by using the @code{target("mfpu=")} function attributes (@pxref{ARM Function Attributes}) or pragmas (@pxref{Function Specific Option Pragmas}). + @item -mfp16-format=@var{name} @opindex mfp16-format Specify the format of the @code{__fp16} half-precision floating-point type. diff -ruN gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-crypto.c gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-crypto.c --- gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-crypto.c 1970-01-01 01:00:00.000000000 +0100 +++ gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-crypto.c 2015-09-14 15:58:24.967898634 +0200 @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_crypto_ok } */ + +#pragma GCC target ("fpu=crypto-neon-fp-armv8") + +#ifndef __ARM_FEATURE_CRYPTO +#error __ARM_FEATURE_CRYPTO not defined. +#endif + +#ifndef __ARM_NEON +#error __ARM_NEON not defined. +#endif + +#if !defined(__ARM_FP) || (__ARM_FP != 14) +#error __ARM_FP +#endif + +#include "arm_neon.h" + +int +foo (void) +{ + uint32x4_t a = {0xd, 0xe, 0xa, 0xd}; + uint32x4_t b = {0, 1, 2, 3}; + + uint32x4_t res = vsha256su0q_u32 (a, b); + return res[0]; +} + +#pragma GCC reset_options + +/* Check that the FP version is correctly reset. */ + +#if !defined(__ARM_FP) || (__ARM_FP != 12) +#error __ARM_FP +#endif + +/* { dg-final { scan-assembler "sha256su0.32\tq\[0-9\]+, q\[0-9\]+" } } */ diff -ruN gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-neon2.c gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon2.c --- gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-neon2.c 1970-01-01 01:00:00.000000000 +0100 +++ gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon2.c 2015-09-14 15:58:24.967898634 +0200 @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O2 -mfloat-abi=softfp -mfpu=vfp" } */ + +#pragma GCC target ("fpu=neon") +#include <arm_neon.h> + +/* Check that pragma target is used. */ +int8x8_t +my (int8x8_t __a, int8x8_t __b) +{ + return __a + __b; +} + +#pragma GCC reset_options + +/* Check that command line option is restored. */ +int8x8_t +my1 (int8x8_t __a, int8x8_t __b) +{ + return __a + __b; +} + +/* { dg-final { scan-assembler-times "\.fpu vfp" 1 } } */ +/* { dg-final { scan-assembler-times "\.fpu neon" 1 } } */ +/* { dg-final { scan-assembler "vadd" } } */ + + diff -ruN gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-neon-builtin-fail.c gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon-builtin-fail.c --- gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-neon-builtin-fail.c 1970-01-01 01:00:00.000000000 +0100 +++ gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon-builtin-fail.c 2015-09-14 15:58:24.967898634 +0200 @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O2 -mfloat-abi=softfp -mfpu=neon" } */ + +#include <arm_neon.h> + +void __attribute__ ((target ("fpu=vfp"))) +foo (uint8x16_t *p) +{ + *p = vmovq_n_u8 (3); /* { dg-error "called from here" } */ + +} + + +/* { dg-error "inlining failed in call to always_inline" "" { target *-*-* } 0 } */ + + + diff -ruN gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-neon.c gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon.c --- gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-neon.c 1970-01-01 01:00:00.000000000 +0100 +++ gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon.c 2015-09-14 16:12:08.449698268 +0200 @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O3 -mfloat-abi=softfp -ftree-vectorize" } */ + +void +f3(int n, int x[], int y[]) { + int i; + for (i = 0; i < n; ++i) + y[i] = x[i] << 3; +} + +/* Verify that neon instructions are emitted once. */ +void __attribute__ ((target("fpu=neon"))) + f1(int n, int x[], int y[]) { + int i; + for (i = 0; i < n; ++i) + y[i] = x[i] << 3; +} + +/* { dg-final { scan-assembler-times "\.fpu vfp" 1 } } */ +/* { dg-final { scan-assembler-times "\.fpu neon" 1 } } */ +/* { dg-final { scan-assembler-times "vshl" 1 } } */ + + + + diff -ruN gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/lto/pr65837_0.c gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/lto/pr65837_0.c --- gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/lto/pr65837_0.c 1970-01-01 01:00:00.000000000 +0100 +++ gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/lto/pr65837_0.c 2015-09-14 15:58:13.899874587 +0200 @@ -0,0 +1,14 @@ +/* { dg-lto-do run } */ +/* { dg-lto-options {{-flto -mfpu=neon}} } */ +/* { dg-suppress-ld-options {-mfpu=neon} } */ + +#include "arm_neon.h" + +float32x2_t a, b, c, e; + +int main() +{ + e = __builtin_neon_vmls_lanev2sf (a, b, c, 0); + return 0; +} + ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 4/4] [ARM] Add attribute/pragma target fpu= 2015-09-14 14:38 [PATCH 4/4] [ARM] Add attribute/pragma target fpu= Christian Bruel @ 2015-09-14 19:50 ` Bernhard Reutner-Fischer 2015-09-15 10:07 ` Christian Bruel 2015-09-15 10:48 ` Christian Bruel 1 sibling, 1 reply; 9+ messages in thread From: Bernhard Reutner-Fischer @ 2015-09-14 19:50 UTC (permalink / raw) To: Christian Bruel, kyrylo.tkachov, Ramana.Radhakrishnan; +Cc: gcc-patches On September 14, 2015 4:30:23 PM GMT+02:00, Christian Bruel <christian.bruel@st.com> wrote: >Finally, the final part of the patch set does the attribute target >parsing and checking, redefines the preprocessor macros and implements >the inlining rules. > >testcases and documentation included. @@ -29501,6 +29532,8 @@ static bool arm_valid_target_attribute_rec (tree args, struct gcc_options *opts) { + int ret=true; + if (TREE_CODE (args) == TREE_LIST) { bool ret = true; Doesn't the hunk above trigger a shadow warning? Furthermore there are missing spaces before and after the '='. And finally (no diff -p so I can only guess) why the int if the function returns a bool? Thanks, @@ -29518,30 +29551,35 @@ } char *argstr = ASTRDUP (TREE_STRING_POINTER (args)); - while (argstr && *argstr != '\0') + char *q; + + while ((q = strtok (argstr, ",")) != NULL) { - while (ISSPACE (*argstr)) - argstr++; + while (ISSPACE (*q)) ++q; - if (!strcmp (argstr, "thumb")) - { + argstr = NULL; + if (!strncmp (q, "thumb", 5)) opts->x_target_flags |= MASK_THUMB; - arm_option_check_internal (opts); - return true; - } - if (!strcmp (argstr, "arm")) - { + else if (!strncmp (q, "arm", 3)) opts->x_target_flags &= ~MASK_THUMB; - arm_option_check_internal (opts); - return true; + + else if (!strncmp (q, "fpu=", 4)) + { + if (! opt_enum_arg_to_value (OPT_mfpu_, q+4, + &opts->x_arm_fpu_index, CL_TARGET)) + { + error ("invalid fpu for attribute(target(\"%s\"))", q); + return false; + } } + else + warning (0, "attribute(target(\"%s\")) is unknown", argstr); - warning (0, "attribute(target(\"%s\")) is unknown", argstr); - return false; + arm_option_check_internal (opts); } - return false; + return ret; } > >thanks > >Christian ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 4/4] [ARM] Add attribute/pragma target fpu= 2015-09-14 19:50 ` Bernhard Reutner-Fischer @ 2015-09-15 10:07 ` Christian Bruel 0 siblings, 0 replies; 9+ messages in thread From: Christian Bruel @ 2015-09-15 10:07 UTC (permalink / raw) To: Bernhard Reutner-Fischer, kyrylo.tkachov, Ramana.Radhakrishnan Cc: gcc-patches On 09/14/2015 09:44 PM, Bernhard Reutner-Fischer wrote: > On September 14, 2015 4:30:23 PM GMT+02:00, Christian Bruel <christian.bruel@st.com> wrote: >> Finally, the final part of the patch set does the attribute target >> parsing and checking, redefines the preprocessor macros and implements >> the inlining rules. >> >> testcases and documentation included. > > @@ -29501,6 +29532,8 @@ > static bool > arm_valid_target_attribute_rec (tree args, struct gcc_options *opts) > { > + int ret=true; > + > if (TREE_CODE (args) == TREE_LIST) > { > bool ret = true; > > > Doesn't the hunk above trigger a shadow warning? Furthermore there are missing spaces before and after the '='. And finally (no diff -p so I can only guess) why the int if the function returns a bool? > no warning with -Wall, but nevertheless I agree this is a piece of remnant code that had to be removed. thanks ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 4/4] [ARM] Add attribute/pragma target fpu= 2015-09-14 14:38 [PATCH 4/4] [ARM] Add attribute/pragma target fpu= Christian Bruel 2015-09-14 19:50 ` Bernhard Reutner-Fischer @ 2015-09-15 10:48 ` Christian Bruel 2015-09-18 9:13 ` Kyrill Tkachov 1 sibling, 1 reply; 9+ messages in thread From: Christian Bruel @ 2015-09-15 10:48 UTC (permalink / raw) To: gcc-patches, kyrylo.tkachov, Ramana.Radhakrishnan; +Cc: gcc-patches [-- Attachment #1: Type: text/plain, Size: 345 bytes --] On 09/14/2015 04:30 PM, Christian Bruel wrote: > Finally, the final part of the patch set does the attribute target > parsing and checking, redefines the preprocessor macros and implements > the inlining rules. > > testcases and documentation included. > new version to remove a shadowed remnant piece of code. > thanks > > Christian > [-- Attachment #2: p41.patch --] [-- Type: text/x-patch, Size: 12098 bytes --] 2015-09-14 Christian Bruel <christian.bruel@st.com> PR target/65837 * config/arm/arm-c.c (arm_cpu_builtins): Set or reset __ARM_FEATURE_CRYPTO, __VFP_FP__, __ARM_NEON__ (arm_pragma_target_parse): Change check for arm_cpu_builtins. undefine __ARM_FP. * doc/invoke.texi (-mfpu=): Mention attribute and pragma. * doc/extend.texi (-mfpu=): Describe attribute. 2015-09-14 Christian Bruel <christian.bruel@st.com> PR target/65837 gcc.target/arm/lto/pr65837_0.c gcc.target/arm/attr-neon2.c gcc.target/arm/attr-neon.c gcc.target/arm/attr-neon-builtin-fail.c gcc.target/arm/attr-crypto.c diff -ruN gnu_trunk.p3/gcc/gcc/config/arm/arm.c gnu_trunk.p4/gcc/gcc/config/arm/arm.c --- gnu_trunk.p3/gcc/gcc/config/arm/arm.c 2015-09-11 16:26:33.869000746 +0200 +++ gnu_trunk.p4/gcc/gcc/config/arm/arm.c 2015-09-15 12:26:12.756161709 +0200 @@ -29486,11 +29486,42 @@ /* Hook to determine if one function can safely inline another. */ static bool -arm_can_inline_p (tree caller ATTRIBUTE_UNUSED, tree callee ATTRIBUTE_UNUSED) +arm_can_inline_p (tree caller, tree callee) { - /* Overidde default hook: Always OK to inline between different modes. - Function with mode specific instructions, e.g using asm, must be explicitely - protected with noinline. */ + tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller); + tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee); + + struct cl_target_option *caller_opts + = TREE_TARGET_OPTION (caller_tree ? caller_tree + : target_option_default_node); + + struct cl_target_option *callee_opts + = TREE_TARGET_OPTION (callee_tree ? callee_tree + : target_option_default_node); + + const struct arm_fpu_desc *fpu_desc1 + = &all_fpus[caller_opts->x_arm_fpu_index]; + const struct arm_fpu_desc *fpu_desc2 + = &all_fpus[callee_opts->x_arm_fpu_index]; + + /* Can't inline NEON extension if the caller doesn't support it. */ + if (ARM_FPU_FSET_HAS (fpu_desc2->features, FPU_FL_NEON) + && ! ARM_FPU_FSET_HAS (fpu_desc1->features, FPU_FL_NEON)) + return false; + + /* Can't inline CRYPTO extension if the caller doesn't support it. */ + if (ARM_FPU_FSET_HAS (fpu_desc2->features, FPU_FL_CRYPTO) + && ! ARM_FPU_FSET_HAS (fpu_desc1->features, FPU_FL_CRYPTO)) + return false; + + /* Need same model and regs. */ + if (fpu_desc2->model != fpu_desc1->model + || fpu_desc2->regs != fpu_desc1->regs) + return false; + + /* OK to inline between different modes. + Function with mode specific instructions, e.g using asm, + must be explicitely protected with noinline. */ return true; } @@ -29504,6 +29535,7 @@ if (TREE_CODE (args) == TREE_LIST) { bool ret = true; + for (; args; args = TREE_CHAIN (args)) if (TREE_VALUE (args) && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts)) @@ -29518,30 +29550,38 @@ } char *argstr = ASTRDUP (TREE_STRING_POINTER (args)); - while (argstr && *argstr != '\0') + char *q; + + while ((q = strtok (argstr, ",")) != NULL) { - while (ISSPACE (*argstr)) - argstr++; + while (ISSPACE (*q)) ++q; - if (!strcmp (argstr, "thumb")) - { + argstr = NULL; + if (!strncmp (q, "thumb", 5)) opts->x_target_flags |= MASK_THUMB; - arm_option_check_internal (opts); - return true; - } - if (!strcmp (argstr, "arm")) - { + else if (!strncmp (q, "arm", 3)) opts->x_target_flags &= ~MASK_THUMB; - arm_option_check_internal (opts); - return true; + + else if (!strncmp (q, "fpu=", 4)) + { + if (! opt_enum_arg_to_value (OPT_mfpu_, q+4, + &opts->x_arm_fpu_index, CL_TARGET)) + { + error ("invalid fpu for attribute(target(\"%s\"))", q); + return false; + } + } + else + { + error ("attribute(target(\"%s\")) is unknown", q); + return false; } - warning (0, "attribute(target(\"%s\")) is unknown", argstr); - return false; + arm_option_check_internal (opts); } - return false; + return true; } /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */ diff -ruN gnu_trunk.p3/gcc/gcc/config/arm/arm-c.c gnu_trunk.p4/gcc/gcc/config/arm/arm-c.c --- gnu_trunk.p3/gcc/gcc/config/arm/arm-c.c 2015-09-11 16:25:32.180858606 +0200 +++ gnu_trunk.p4/gcc/gcc/config/arm/arm-c.c 2015-09-11 17:00:26.085645968 +0200 @@ -68,8 +68,8 @@ def_or_undef_macro (pfile, "__ARM_FEATURE_DSP", TARGET_DSP_MULTIPLY); def_or_undef_macro (pfile, "__ARM_FEATURE_QBIT", TARGET_ARM_QBIT); def_or_undef_macro (pfile, "__ARM_FEATURE_SAT", TARGET_ARM_SAT); - if (TARGET_CRYPTO) - builtin_define ("__ARM_FEATURE_CRYPTO"); + def_or_undef_macro (pfile, "__ARM_FEATURE_CRYPTO", TARGET_CRYPTO); + if (unaligned_access) builtin_define ("__ARM_FEATURE_UNALIGNED"); if (TARGET_CRC32) @@ -129,8 +129,7 @@ if (TARGET_SOFT_FLOAT) builtin_define ("__SOFTFP__"); - if (TARGET_VFP) - builtin_define ("__VFP_FP__"); + def_or_undef_macro (pfile, "__VFP_FP__", TARGET_VFP); if (TARGET_ARM_FP) builtin_define_with_int_value ("__ARM_FP", TARGET_ARM_FP); @@ -141,11 +140,9 @@ if (TARGET_FMA) builtin_define ("__ARM_FEATURE_FMA"); - if (TARGET_NEON) - { - builtin_define ("__ARM_NEON__"); - builtin_define ("__ARM_NEON"); - } + def_or_undef_macro (pfile, "__ARM_NEON__", TARGET_NEON); + def_or_undef_macro (pfile, "__ARM_NEON", TARGET_NEON); + if (TARGET_NEON_FP) builtin_define_with_int_value ("__ARM_NEON_FP", TARGET_NEON_FP); @@ -231,7 +228,7 @@ gcc_assert (prev_opt); gcc_assert (cur_opt); - if (cur_opt->x_target_flags != prev_opt->x_target_flags) + if (cur_opt != prev_opt) { /* For the definitions, ensure all newly defined macros are considered as used for -Wunused-macros. There is no point warning about the @@ -242,6 +239,8 @@ /* Update macros. */ gcc_assert (cur_opt->x_target_flags == target_flags); + /* This one can be redefined by the pragma without warning. */ + cpp_undef (parse_in, "__ARM_FP"); arm_cpu_builtins (parse_in); cpp_opts->warn_unused_macros = saved_warn_unused_macros; diff -ruN gnu_trunk.p3/gcc/gcc/doc/extend.texi gnu_trunk.p4/gcc/gcc/doc/extend.texi --- gnu_trunk.p3/gcc/gcc/doc/extend.texi 2015-09-07 13:35:20.777683005 +0200 +++ gnu_trunk.p4/gcc/gcc/doc/extend.texi 2015-09-14 13:58:49.271385001 +0200 @@ -3606,10 +3606,17 @@ @item arm @cindex @code{target("arm")} function attribute, ARM Force code generation in the ARM (A32) ISA. -@end table Functions from different modes can be inlined in the caller's mode. +@item fpu= +@cindex @code{target("fpu=")} function attribute, ARM +Specifies the fpu for which to tune the performance of this function. +The behavior and permissible arguments are the same as for the @option{-mfpu=} +command-line option. + +@end table + @end table @node AVR Function Attributes diff -ruN gnu_trunk.p3/gcc/gcc/doc/invoke.texi gnu_trunk.p4/gcc/gcc/doc/invoke.texi --- gnu_trunk.p3/gcc/gcc/doc/invoke.texi 2015-09-10 12:21:00.698911244 +0200 +++ gnu_trunk.p4/gcc/gcc/doc/invoke.texi 2015-09-14 10:27:20.281932581 +0200 @@ -13360,6 +13363,8 @@ floating-point arithmetic (in particular denormal values are treated as zero), so the use of NEON instructions may lead to a loss of precision. +You can also set the fpu name at function level by using the @code{target("mfpu=")} function attributes (@pxref{ARM Function Attributes}) or pragmas (@pxref{Function Specific Option Pragmas}). + @item -mfp16-format=@var{name} @opindex mfp16-format Specify the format of the @code{__fp16} half-precision floating-point type. diff -ruN gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-crypto.c gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-crypto.c --- gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-crypto.c 1970-01-01 01:00:00.000000000 +0100 +++ gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-crypto.c 2015-09-14 15:58:24.967898634 +0200 @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_crypto_ok } */ + +#pragma GCC target ("fpu=crypto-neon-fp-armv8") + +#ifndef __ARM_FEATURE_CRYPTO +#error __ARM_FEATURE_CRYPTO not defined. +#endif + +#ifndef __ARM_NEON +#error __ARM_NEON not defined. +#endif + +#if !defined(__ARM_FP) || (__ARM_FP != 14) +#error __ARM_FP +#endif + +#include "arm_neon.h" + +int +foo (void) +{ + uint32x4_t a = {0xd, 0xe, 0xa, 0xd}; + uint32x4_t b = {0, 1, 2, 3}; + + uint32x4_t res = vsha256su0q_u32 (a, b); + return res[0]; +} + +#pragma GCC reset_options + +/* Check that the FP version is correctly reset. */ + +#if !defined(__ARM_FP) || (__ARM_FP != 12) +#error __ARM_FP +#endif + +/* { dg-final { scan-assembler "sha256su0.32\tq\[0-9\]+, q\[0-9\]+" } } */ diff -ruN gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-neon2.c gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon2.c --- gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-neon2.c 1970-01-01 01:00:00.000000000 +0100 +++ gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon2.c 2015-09-14 15:58:24.967898634 +0200 @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O2 -mfloat-abi=softfp -mfpu=vfp" } */ + +#pragma GCC target ("fpu=neon") +#include <arm_neon.h> + +/* Check that pragma target is used. */ +int8x8_t +my (int8x8_t __a, int8x8_t __b) +{ + return __a + __b; +} + +#pragma GCC reset_options + +/* Check that command line option is restored. */ +int8x8_t +my1 (int8x8_t __a, int8x8_t __b) +{ + return __a + __b; +} + +/* { dg-final { scan-assembler-times "\.fpu vfp" 1 } } */ +/* { dg-final { scan-assembler-times "\.fpu neon" 1 } } */ +/* { dg-final { scan-assembler "vadd" } } */ + + diff -ruN gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-neon-builtin-fail.c gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon-builtin-fail.c --- gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-neon-builtin-fail.c 1970-01-01 01:00:00.000000000 +0100 +++ gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon-builtin-fail.c 2015-09-14 15:58:24.967898634 +0200 @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O2 -mfloat-abi=softfp -mfpu=neon" } */ + +#include <arm_neon.h> + +void __attribute__ ((target ("fpu=vfp"))) +foo (uint8x16_t *p) +{ + *p = vmovq_n_u8 (3); /* { dg-error "called from here" } */ + +} + + +/* { dg-error "inlining failed in call to always_inline" "" { target *-*-* } 0 } */ + + + diff -ruN gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-neon.c gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon.c --- gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-neon.c 1970-01-01 01:00:00.000000000 +0100 +++ gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon.c 2015-09-14 16:12:08.449698268 +0200 @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O3 -mfloat-abi=softfp -ftree-vectorize" } */ + +void +f3(int n, int x[], int y[]) { + int i; + for (i = 0; i < n; ++i) + y[i] = x[i] << 3; +} + +/* Verify that neon instructions are emitted once. */ +void __attribute__ ((target("fpu=neon"))) + f1(int n, int x[], int y[]) { + int i; + for (i = 0; i < n; ++i) + y[i] = x[i] << 3; +} + +/* { dg-final { scan-assembler-times "\.fpu vfp" 1 } } */ +/* { dg-final { scan-assembler-times "\.fpu neon" 1 } } */ +/* { dg-final { scan-assembler-times "vshl" 1 } } */ + + + + diff -ruN gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/lto/pr65837_0.c gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/lto/pr65837_0.c --- gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/lto/pr65837_0.c 1970-01-01 01:00:00.000000000 +0100 +++ gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/lto/pr65837_0.c 2015-09-14 15:58:13.899874587 +0200 @@ -0,0 +1,14 @@ +/* { dg-lto-do run } */ +/* { dg-lto-options {{-flto -mfpu=neon}} } */ +/* { dg-suppress-ld-options {-mfpu=neon} } */ + +#include "arm_neon.h" + +float32x2_t a, b, c, e; + +int main() +{ + e = __builtin_neon_vmls_lanev2sf (a, b, c, 0); + return 0; +} + ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 4/4] [ARM] Add attribute/pragma target fpu= 2015-09-15 10:48 ` Christian Bruel @ 2015-09-18 9:13 ` Kyrill Tkachov 2015-09-21 13:46 ` Christian Bruel 0 siblings, 1 reply; 9+ messages in thread From: Kyrill Tkachov @ 2015-09-18 9:13 UTC (permalink / raw) To: Christian Bruel, gcc-patches, Ramana Radhakrishnan On 15/09/15 11:47, Christian Bruel wrote: > > On 09/14/2015 04:30 PM, Christian Bruel wrote: >> Finally, the final part of the patch set does the attribute target >> parsing and checking, redefines the preprocessor macros and implements >> the inlining rules. >> >> testcases and documentation included. >> > new version to remove a shadowed remnant piece of code. > > > > thanks > > > > Christian > > + /* OK to inline between different modes. + Function with mode specific instructions, e.g using asm, + must be explicitely protected with noinline. */ s/explicitely/explicitly/ + const struct arm_fpu_desc *fpu_desc1 + = &all_fpus[caller_opts->x_arm_fpu_index]; + const struct arm_fpu_desc *fpu_desc2 + = &all_fpus[callee_opts->x_arm_fpu_index]; Please call these caller_fpu and callee_fpu, it's much easier to reason about the inlining rules that way + + /* Can't inline NEON extension if the caller doesn't support it. */ + if (ARM_FPU_FSET_HAS (fpu_desc2->features, FPU_FL_NEON) + && ! ARM_FPU_FSET_HAS (fpu_desc1->features, FPU_FL_NEON)) + return false; + + /* Can't inline CRYPTO extension if the caller doesn't support it. */ + if (ARM_FPU_FSET_HAS (fpu_desc2->features, FPU_FL_CRYPTO) + && ! ARM_FPU_FSET_HAS (fpu_desc1->features, FPU_FL_CRYPTO)) + return false; + We also need to take into account FPU_FL_FP16... In general what we want is for the callee FPU features to be a subset of the callers features, similar to the way we handle the x_aarch64_isa_flags handling in aarch64_can_inline_p from the aarch64 port. I think that's the way to go here rather than explicitly writing down a check for each feature. @@ -242,6 +239,8 @@ /* Update macros. */ gcc_assert (cur_opt->x_target_flags == target_flags); + /* This one can be redefined by the pragma without warning. */ + cpp_undef (parse_in, "__ARM_FP"); arm_cpu_builtins (parse_in); Could you elaborate why the cpp_undef here? If you want to undefine __ARM_FP so you can redefine it to a new value in arm_cpu_builtins then I think you should just undefine it in that function. diff -ruN gnu_trunk.p3/gcc/gcc/doc/invoke.texi gnu_trunk.p4/gcc/gcc/doc/invoke.texi --- gnu_trunk.p3/gcc/gcc/doc/invoke.texi 2015-09-10 12:21:00.698911244 +0200 +++ gnu_trunk.p4/gcc/gcc/doc/invoke.texi 2015-09-14 10:27:20.281932581 +0200 @@ -13360,6 +13363,8 @@ floating-point arithmetic (in particular denormal values are treated as zero), so the use of NEON instructions may lead to a loss of precision. +You can also set the fpu name at function level by using the @code{target("mfpu=")} function attributes (@pxref{ARM Function Attributes}) or pragmas (@pxref{Function Specific Option Pragmas}). + s/"mfpu="/"fpu=" --- gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-neon.c 1970-01-01 01:00:00.000000000 +0100 +++ gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon.c 2015-09-14 16:12:08.449698268 +0200 @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O3 -mfloat-abi=softfp -ftree-vectorize" } */ + +void +f3(int n, int x[], int y[]) { + int i; + for (i = 0; i < n; ++i) + y[i] = x[i] << 3; +} + What if GCC has been configured with --with-fpu=neon? Then f3 will be compiled assuming NEON. You should add a -mfpu=vfp to the dg-options. ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 4/4] [ARM] Add attribute/pragma target fpu= 2015-09-18 9:13 ` Kyrill Tkachov @ 2015-09-21 13:46 ` Christian Bruel 2015-10-08 8:53 ` Kyrill Tkachov 0 siblings, 1 reply; 9+ messages in thread From: Christian Bruel @ 2015-09-21 13:46 UTC (permalink / raw) To: kyrylo.tkachov; +Cc: Ramana.Radhakrishnan, gcc-patches [-- Attachment #1: Type: text/plain, Size: 4473 bytes --] Hi Kyrill, Thanks for your comments. Answers interleaved and the new patch attached. On 09/18/2015 11:04 AM, Kyrill Tkachov wrote: > > On 15/09/15 11:47, Christian Bruel wrote: >> >> On 09/14/2015 04:30 PM, Christian Bruel wrote: >>> Finally, the final part of the patch set does the attribute target >>> parsing and checking, redefines the preprocessor macros and implements >>> the inlining rules. >>> >>> testcases and documentation included. >>> >> new version to remove a shadowed remnant piece of code. >> >> >> > thanks >> > >> > Christian >> > > > + /* OK to inline between different modes. > + Function with mode specific instructions, e.g using asm, > + must be explicitely protected with noinline. */ > > s/explicitely/explicitly/ > thanks > > + const struct arm_fpu_desc *fpu_desc1 > + = &all_fpus[caller_opts->x_arm_fpu_index]; > + const struct arm_fpu_desc *fpu_desc2 > + = &all_fpus[callee_opts->x_arm_fpu_index]; > > Please call these caller_fpu and callee_fpu, it's much easier to reason about the inlining rules that way ok > > + > + /* Can't inline NEON extension if the caller doesn't support it. */ > + if (ARM_FPU_FSET_HAS (fpu_desc2->features, FPU_FL_NEON) > + && ! ARM_FPU_FSET_HAS (fpu_desc1->features, FPU_FL_NEON)) > + return false; > + > + /* Can't inline CRYPTO extension if the caller doesn't support it. */ > + if (ARM_FPU_FSET_HAS (fpu_desc2->features, FPU_FL_CRYPTO) > + && ! ARM_FPU_FSET_HAS (fpu_desc1->features, FPU_FL_CRYPTO)) > + return false; > + > > We also need to take into account FPU_FL_FP16... > In general what we want is for the callee FPU features to be > a subset of the callers features, similar to the way we handle > the x_aarch64_isa_flags handling in aarch64_can_inline_p from the > aarch64 port. I think that's the way to go here rather than explicitly > writing down a check for each feature. ok, with FL_FP16 now, > > @@ -242,6 +239,8 @@ > > /* Update macros. */ > gcc_assert (cur_opt->x_target_flags == target_flags); > + /* This one can be redefined by the pragma without warning. */ > + cpp_undef (parse_in, "__ARM_FP"); > arm_cpu_builtins (parse_in); > > Could you elaborate why the cpp_undef here? > If you want to undefine __ARM_FP so you can redefine it to a new value > in arm_cpu_builtins then I think you should just undefine it in that function. This is to avoid a warning: "__ARM_FP" redefined when creating a new pragma scope. (See the test attr-crypto.c). We cannot call the cpp_undef inside arm_cpu_builtins, because it is also used for the TARGET_CPU_CPP_BUILTINS hook and then would prevent real illegitimate redefinitions. Alternatively, I thought to reset the warn_builtin_macro_redefined flag, but that doesn't work as the macro is not NODE_BUILTIN (see the definition of warn_of_redefinition in libcpp). We might need to change this later : should target macros be marked as NOTE_BUILTIN ? We can discuss this separately (I can open a defect) as we have the cpp_undep solution for now, if you agree. > > > diff -ruN gnu_trunk.p3/gcc/gcc/doc/invoke.texi gnu_trunk.p4/gcc/gcc/doc/invoke.texi > --- gnu_trunk.p3/gcc/gcc/doc/invoke.texi 2015-09-10 12:21:00.698911244 +0200 > +++ gnu_trunk.p4/gcc/gcc/doc/invoke.texi 2015-09-14 10:27:20.281932581 +0200 > @@ -13360,6 +13363,8 @@ > floating-point arithmetic (in particular denormal values are treated as > zero), so the use of NEON instructions may lead to a loss of precision. > > +You can also set the fpu name at function level by using the @code{target("mfpu=")} function attributes (@pxref{ARM Function Attributes}) or pragmas (@pxref{Function Specific Option Pragmas}). > + > > s/"mfpu="/"fpu=" > thanks > > --- gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-neon.c 1970-01-01 01:00:00.000000000 +0100 > +++ gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon.c 2015-09-14 16:12:08.449698268 +0200 > @@ -0,0 +1,26 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target arm_neon_ok } */ > +/* { dg-options "-O3 -mfloat-abi=softfp -ftree-vectorize" } */ > + > +void > +f3(int n, int x[], int y[]) { > + int i; > + for (i = 0; i < n; ++i) > + y[i] = x[i] << 3; > +} > + > > What if GCC has been configured with --with-fpu=neon? > Then f3 will be compiled assuming NEON. You should add a -mfpu=vfp to the dg-options. Ah yes. I've added ((target("fpu=vfp")) instead, since we are testing the attribute. [-- Attachment #2: p42.patch --] [-- Type: text/x-patch, Size: 11741 bytes --] 2015-05-26 Christian Bruel <christian.bruel@st.com> PR target/65837 * config/arm/arm-c.c (arm_cpu_builtins): Set or reset __ARM_FEATURE_CRYPTO, __VFP_FP__, __ARM_NEON__ (arm_pragma_target_parse): Change check for arm_cpu_builtins. undefine __ARM_FP. * config/arm/arm.c (arm_can_inline_p): Check FPUs. (arm_valid_target_attribute_rec): Handle -mfpu attribute target. * doc/invoke.texi (-mfpu=): Mention attribute and pragma. * doc/extend.texi (-mfpu=): Describe attribute. 2015-09-14 Christian Bruel <christian.bruel@st.com> PR target/65837 gcc.target/arm/lto/pr65837_0.c gcc.target/arm/attr-neon2.c gcc.target/arm/attr-neon.c gcc.target/arm/attr-neon-builtin-fail.c gcc.target/arm/attr-crypto.c diff -ruN gnu_trunk.p3/gcc/gcc/config/arm/arm.c gnu_trunk.p4/gcc/gcc/config/arm/arm.c --- gnu_trunk.p3/gcc/gcc/config/arm/arm.c 2015-09-21 14:07:39.218566954 +0200 +++ gnu_trunk.p4/gcc/gcc/config/arm/arm.c 2015-09-21 13:36:36.242397513 +0200 @@ -29789,11 +29788,36 @@ /* Hook to determine if one function can safely inline another. */ static bool -arm_can_inline_p (tree caller ATTRIBUTE_UNUSED, tree callee ATTRIBUTE_UNUSED) +arm_can_inline_p (tree caller, tree callee) { - /* Overidde default hook: Always OK to inline between different modes. - Function with mode specific instructions, e.g using asm, must be explicitely - protected with noinline. */ + tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller); + tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee); + + struct cl_target_option *caller_opts + = TREE_TARGET_OPTION (caller_tree ? caller_tree + : target_option_default_node); + + struct cl_target_option *callee_opts + = TREE_TARGET_OPTION (callee_tree ? callee_tree + : target_option_default_node); + + const struct arm_fpu_desc *caller_fpu + = &all_fpus[caller_opts->x_arm_fpu_index]; + const struct arm_fpu_desc *callee_fpu + = &all_fpus[callee_opts->x_arm_fpu_index]; + + /* Callee's fpu features should be a subset of the caller's. */ + if ((caller_fpu->features & callee_fpu->features) != callee_fpu->features) + return false; + + /* Need same model and regs. */ + if (callee_fpu->model != caller_fpu->model + || callee_fpu->regs != callee_fpu->regs) + return false; + + /* OK to inline between different modes. + Function with mode specific instructions, e.g using asm, + must be explicitly protected with noinline. */ return true; } @@ -29821,30 +29846,38 @@ } char *argstr = ASTRDUP (TREE_STRING_POINTER (args)); - while (argstr && *argstr != '\0') + char *q; + + while ((q = strtok (argstr, ",")) != NULL) { - while (ISSPACE (*argstr)) - argstr++; + while (ISSPACE (*q)) ++q; - if (!strcmp (argstr, "thumb")) - { + argstr = NULL; + if (!strncmp (q, "thumb", 5)) opts->x_target_flags |= MASK_THUMB; - arm_option_check_internal (opts); - return true; - } - if (!strcmp (argstr, "arm")) - { + else if (!strncmp (q, "arm", 3)) opts->x_target_flags &= ~MASK_THUMB; - arm_option_check_internal (opts); - return true; + + else if (!strncmp (q, "fpu=", 4)) + { + if (! opt_enum_arg_to_value (OPT_mfpu_, q+4, + &opts->x_arm_fpu_index, CL_TARGET)) + { + error ("invalid fpu for attribute(target(\"%s\"))", q); + return false; + } + } + else + { + error ("attribute(target(\"%s\")) is unknown", q); + return false; } - warning (0, "attribute(target(\"%s\")) is unknown", argstr); - return false; + arm_option_check_internal (opts); } - return false; + return true; } /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */ diff -ruN gnu_trunk.p3/gcc/gcc/config/arm/arm-c.c gnu_trunk.p4/gcc/gcc/config/arm/arm-c.c --- gnu_trunk.p3/gcc/gcc/config/arm/arm-c.c 2015-09-21 14:07:12.186506227 +0200 +++ gnu_trunk.p4/gcc/gcc/config/arm/arm-c.c 2015-09-21 13:46:02.655664904 +0200 @@ -68,8 +68,8 @@ def_or_undef_macro (pfile, "__ARM_FEATURE_DSP", TARGET_DSP_MULTIPLY); def_or_undef_macro (pfile, "__ARM_FEATURE_QBIT", TARGET_ARM_QBIT); def_or_undef_macro (pfile, "__ARM_FEATURE_SAT", TARGET_ARM_SAT); - if (TARGET_CRYPTO) - builtin_define ("__ARM_FEATURE_CRYPTO"); + def_or_undef_macro (pfile, "__ARM_FEATURE_CRYPTO", TARGET_CRYPTO); + if (unaligned_access) builtin_define ("__ARM_FEATURE_UNALIGNED"); if (TARGET_CRC32) @@ -129,8 +129,7 @@ if (TARGET_SOFT_FLOAT) builtin_define ("__SOFTFP__"); - if (TARGET_VFP) - builtin_define ("__VFP_FP__"); + def_or_undef_macro (pfile, "__VFP_FP__", TARGET_VFP); if (TARGET_ARM_FP) builtin_define_with_int_value ("__ARM_FP", TARGET_ARM_FP); @@ -141,11 +140,9 @@ if (TARGET_FMA) builtin_define ("__ARM_FEATURE_FMA"); - if (TARGET_NEON) - { - builtin_define ("__ARM_NEON__"); - builtin_define ("__ARM_NEON"); - } + def_or_undef_macro (pfile, "__ARM_NEON__", TARGET_NEON); + def_or_undef_macro (pfile, "__ARM_NEON", TARGET_NEON); + if (TARGET_NEON_FP) builtin_define_with_int_value ("__ARM_NEON_FP", TARGET_NEON_FP); @@ -232,7 +228,7 @@ gcc_assert (prev_opt); gcc_assert (cur_opt); - if (cur_opt->x_target_flags != prev_opt->x_target_flags) + if (cur_opt != prev_opt) { /* For the definitions, ensure all newly defined macros are considered as used for -Wunused-macros. There is no point warning about the @@ -243,6 +239,8 @@ /* Update macros. */ gcc_assert (cur_opt->x_target_flags == target_flags); + /* This one can be redefined by the pragma without warning. */ + cpp_undef (parse_in, "__ARM_FP"); arm_cpu_builtins (parse_in); cpp_opts->warn_unused_macros = saved_warn_unused_macros; diff -ruN gnu_trunk.p3/gcc/gcc/doc/extend.texi gnu_trunk.p4/gcc/gcc/doc/extend.texi --- gnu_trunk.p3/gcc/gcc/doc/extend.texi 2015-09-07 13:35:20.777683005 +0200 +++ gnu_trunk.p4/gcc/gcc/doc/extend.texi 2015-09-14 13:58:49.271385001 +0200 @@ -3606,10 +3606,17 @@ @item arm @cindex @code{target("arm")} function attribute, ARM Force code generation in the ARM (A32) ISA. -@end table Functions from different modes can be inlined in the caller's mode. +@item fpu= +@cindex @code{target("fpu=")} function attribute, ARM +Specifies the fpu for which to tune the performance of this function. +The behavior and permissible arguments are the same as for the @option{-mfpu=} +command-line option. + +@end table + @end table @node AVR Function Attributes diff -ruN gnu_trunk.p3/gcc/gcc/doc/invoke.texi gnu_trunk.p4/gcc/gcc/doc/invoke.texi --- gnu_trunk.p3/gcc/gcc/doc/invoke.texi 2015-09-21 13:35:49.274292268 +0200 +++ gnu_trunk.p4/gcc/gcc/doc/invoke.texi 2015-09-21 13:36:18.798358427 +0200 @@ -13386,6 +13386,8 @@ floating-point arithmetic (in particular denormal values are treated as zero), so the use of NEON instructions may lead to a loss of precision. +You can also set the fpu name at function level by using the @code{target("fpu=")} function attributes (@pxref{ARM Function Attributes}) or pragmas (@pxref{Function Specific Option Pragmas}). + @item -mfp16-format=@var{name} @opindex mfp16-format Specify the format of the @code{__fp16} half-precision floating-point type. diff -ruN gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-crypto.c gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-crypto.c --- gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-crypto.c 1970-01-01 01:00:00.000000000 +0100 +++ gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-crypto.c 2015-09-14 15:58:24.967898634 +0200 @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_crypto_ok } */ + +#pragma GCC target ("fpu=crypto-neon-fp-armv8") + +#ifndef __ARM_FEATURE_CRYPTO +#error __ARM_FEATURE_CRYPTO not defined. +#endif + +#ifndef __ARM_NEON +#error __ARM_NEON not defined. +#endif + +#if !defined(__ARM_FP) || (__ARM_FP != 14) +#error __ARM_FP +#endif + +#include "arm_neon.h" + +int +foo (void) +{ + uint32x4_t a = {0xd, 0xe, 0xa, 0xd}; + uint32x4_t b = {0, 1, 2, 3}; + + uint32x4_t res = vsha256su0q_u32 (a, b); + return res[0]; +} + +#pragma GCC reset_options + +/* Check that the FP version is correctly reset. */ + +#if !defined(__ARM_FP) || (__ARM_FP != 12) +#error __ARM_FP +#endif + +/* { dg-final { scan-assembler "sha256su0.32\tq\[0-9\]+, q\[0-9\]+" } } */ diff -ruN gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-neon2.c gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon2.c --- gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-neon2.c 1970-01-01 01:00:00.000000000 +0100 +++ gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon2.c 2015-09-14 15:58:24.967898634 +0200 @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O2 -mfloat-abi=softfp -mfpu=vfp" } */ + +#pragma GCC target ("fpu=neon") +#include <arm_neon.h> + +/* Check that pragma target is used. */ +int8x8_t +my (int8x8_t __a, int8x8_t __b) +{ + return __a + __b; +} + +#pragma GCC reset_options + +/* Check that command line option is restored. */ +int8x8_t +my1 (int8x8_t __a, int8x8_t __b) +{ + return __a + __b; +} + +/* { dg-final { scan-assembler-times "\.fpu vfp" 1 } } */ +/* { dg-final { scan-assembler-times "\.fpu neon" 1 } } */ +/* { dg-final { scan-assembler "vadd" } } */ + + diff -ruN gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-neon-builtin-fail.c gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon-builtin-fail.c --- gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-neon-builtin-fail.c 1970-01-01 01:00:00.000000000 +0100 +++ gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon-builtin-fail.c 2015-09-14 15:58:24.967898634 +0200 @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O2 -mfloat-abi=softfp -mfpu=neon" } */ + +#include <arm_neon.h> + +void __attribute__ ((target ("fpu=vfp"))) +foo (uint8x16_t *p) +{ + *p = vmovq_n_u8 (3); /* { dg-error "called from here" } */ + +} + + +/* { dg-error "inlining failed in call to always_inline" "" { target *-*-* } 0 } */ + + + diff -ruN gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-neon.c gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon.c --- gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-neon.c 1970-01-01 01:00:00.000000000 +0100 +++ gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon.c 2015-09-21 13:43:45.983359388 +0200 @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O2 -mfloat-abi=softfp -ftree-vectorize" } */ + +/* Verify that neon instructions are emitted once. */ +void __attribute__ ((target("fpu=neon"))) + f1(int n, int x[], int y[]) { + int i; + for (i = 0; i < n; ++i) + y[i] = x[i] << 3; +} + +void __attribute__ ((target("fpu=vfp"))) +f3(int n, int x[], int y[]) { + int i; + for (i = 0; i < n; ++i) + y[i] = x[i] << 3; +} + +/* { dg-final { scan-assembler-times "\.fpu vfp" 1 } } */ +/* { dg-final { scan-assembler-times "\.fpu neon" 1 } } */ +/* { dg-final { scan-assembler-times "vshl" 1 } } */ + + + + diff -ruN gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/lto/pr65837_0.c gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/lto/pr65837_0.c --- gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/lto/pr65837_0.c 1970-01-01 01:00:00.000000000 +0100 +++ gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/lto/pr65837_0.c 2015-09-14 15:58:13.899874587 +0200 @@ -0,0 +1,14 @@ +/* { dg-lto-do run } */ +/* { dg-lto-options {{-flto -mfpu=neon}} } */ +/* { dg-suppress-ld-options {-mfpu=neon} } */ + +#include "arm_neon.h" + +float32x2_t a, b, c, e; + +int main() +{ + e = __builtin_neon_vmls_lanev2sf (a, b, c, 0); + return 0; +} + ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 4/4] [ARM] Add attribute/pragma target fpu= 2015-09-21 13:46 ` Christian Bruel @ 2015-10-08 8:53 ` Kyrill Tkachov 2015-11-12 14:54 ` Christian Bruel 0 siblings, 1 reply; 9+ messages in thread From: Kyrill Tkachov @ 2015-10-08 8:53 UTC (permalink / raw) To: Christian Bruel; +Cc: Ramana Radhakrishnan, gcc-patches Hi Christian, On 21/09/15 14:43, Christian Bruel wrote: > Hi Kyrill, > > Thanks for your comments. Answers interleaved and the new patch attached. > > On 09/18/2015 11:04 AM, Kyrill Tkachov wrote: >> On 15/09/15 11:47, Christian Bruel wrote: >>> On 09/14/2015 04:30 PM, Christian Bruel wrote: >>>> Finally, the final part of the patch set does the attribute target >>>> parsing and checking, redefines the preprocessor macros and implements >>>> the inlining rules. >>>> >>>> testcases and documentation included. >>>> >>> new version to remove a shadowed remnant piece of code. >>> >>> >>> > thanks >>> > >>> > Christian >>> > >> + /* OK to inline between different modes. >> + Function with mode specific instructions, e.g using asm, >> + must be explicitely protected with noinline. */ >> >> s/explicitely/explicitly/ >> > thanks > >> + const struct arm_fpu_desc *fpu_desc1 >> + = &all_fpus[caller_opts->x_arm_fpu_index]; >> + const struct arm_fpu_desc *fpu_desc2 >> + = &all_fpus[callee_opts->x_arm_fpu_index]; >> >> Please call these caller_fpu and callee_fpu, it's much easier to reason about the inlining rules that way > ok > >> + >> + /* Can't inline NEON extension if the caller doesn't support it. */ >> + if (ARM_FPU_FSET_HAS (fpu_desc2->features, FPU_FL_NEON) >> + && ! ARM_FPU_FSET_HAS (fpu_desc1->features, FPU_FL_NEON)) >> + return false; >> + >> + /* Can't inline CRYPTO extension if the caller doesn't support it. */ >> + if (ARM_FPU_FSET_HAS (fpu_desc2->features, FPU_FL_CRYPTO) >> + && ! ARM_FPU_FSET_HAS (fpu_desc1->features, FPU_FL_CRYPTO)) >> + return false; >> + >> >> We also need to take into account FPU_FL_FP16... >> In general what we want is for the callee FPU features to be >> a subset of the callers features, similar to the way we handle >> the x_aarch64_isa_flags handling in aarch64_can_inline_p from the >> aarch64 port. I think that's the way to go here rather than explicitly >> writing down a check for each feature. > ok, with FL_FP16 now, > >> @@ -242,6 +239,8 @@ >> >> /* Update macros. */ >> gcc_assert (cur_opt->x_target_flags == target_flags); >> + /* This one can be redefined by the pragma without warning. */ >> + cpp_undef (parse_in, "__ARM_FP"); >> arm_cpu_builtins (parse_in); >> >> Could you elaborate why the cpp_undef here? >> If you want to undefine __ARM_FP so you can redefine it to a new value >> in arm_cpu_builtins then I think you should just undefine it in that function. > This is to avoid a warning: "__ARM_FP" redefined when creating a new > pragma scope. (See the test attr-crypto.c). > > We cannot call the cpp_undef inside arm_cpu_builtins, because it is also > used for the TARGET_CPU_CPP_BUILTINS hook and then would prevent real > illegitimate redefinitions. > > Alternatively, I thought to reset the warn_builtin_macro_redefined flag, > but that doesn't work as the macro is not NODE_BUILTIN (see the > definition of warn_of_redefinition in libcpp). > We might need to change this later : should target macros be marked as > NOTE_BUILTIN ? We can discuss this separately (I can open a defect) as > we have the cpp_undep solution for now, if you agree. > >> >> diff -ruN gnu_trunk.p3/gcc/gcc/doc/invoke.texi gnu_trunk.p4/gcc/gcc/doc/invoke.texi >> --- gnu_trunk.p3/gcc/gcc/doc/invoke.texi 2015-09-10 12:21:00.698911244 +0200 >> +++ gnu_trunk.p4/gcc/gcc/doc/invoke.texi 2015-09-14 10:27:20.281932581 +0200 >> @@ -13360,6 +13363,8 @@ >> floating-point arithmetic (in particular denormal values are treated as >> zero), so the use of NEON instructions may lead to a loss of precision. >> >> +You can also set the fpu name at function level by using the @code{target("mfpu=")} function attributes (@pxref{ARM Function Attributes}) or pragmas (@pxref{Function Specific Option Pragmas}). >> + >> >> s/"mfpu="/"fpu=" >> > thanks > >> --- gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-neon.c 1970-01-01 01:00:00.000000000 +0100 >> +++ gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon.c 2015-09-14 16:12:08.449698268 +0200 >> @@ -0,0 +1,26 @@ >> +/* { dg-do compile } */ >> +/* { dg-require-effective-target arm_neon_ok } */ >> +/* { dg-options "-O3 -mfloat-abi=softfp -ftree-vectorize" } */ >> + >> +void >> +f3(int n, int x[], int y[]) { >> + int i; >> + for (i = 0; i < n; ++i) >> + y[i] = x[i] << 3; >> +} >> + >> >> What if GCC has been configured with --with-fpu=neon? >> Then f3 will be compiled assuming NEON. You should add a -mfpu=vfp to the dg-options. > Ah yes. I've added ((target("fpu=vfp")) instead, since we are testing > the attribute. > 2015-05-26 Christian Bruel<christian.bruel@st.com> PR target/65837 * config/arm/arm-c.c (arm_cpu_builtins): Set or reset __ARM_FEATURE_CRYPTO, __VFP_FP__, __ARM_NEON__ (arm_pragma_target_parse): Change check for arm_cpu_builtins. undefine __ARM_FP. * config/arm/arm.c (arm_can_inline_p): Check FPUs. (arm_valid_target_attribute_rec): Handle -mfpu attribute target. * doc/invoke.texi (-mfpu=): Mention attribute and pragma. * doc/extend.texi (-mfpu=): Describe attribute. 2015-09-14 Christian Bruel<christian.bruel@st.com> PR target/65837 gcc.target/arm/lto/pr65837_0.c gcc.target/arm/attr-neon2.c gcc.target/arm/attr-neon.c gcc.target/arm/attr-neon-builtin-fail.c gcc.target/arm/attr-crypto.c The parts in this patch look ok to me. However, I think we need some more functionality In aarch64 we support compiling a file with no simd, including arm_neon.h and using arm_neon.h intrinsics within functions tagged with simd support. We want to support such functionality on arm i.e. compile a file with -mfpu=vfp and use arm_neon.h intrinsics in a function tagged with an fpu=neon attribute. For that we'd need to wrap the intrinsics in arm_neon.h in appropriate pragmas, like in the aarch64 version of arm_neon.h Thanks, Kyrill ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 4/4] [ARM] Add attribute/pragma target fpu= 2015-10-08 8:53 ` Kyrill Tkachov @ 2015-11-12 14:54 ` Christian Bruel 2015-11-13 11:49 ` Kyrill Tkachov 0 siblings, 1 reply; 9+ messages in thread From: Christian Bruel @ 2015-11-12 14:54 UTC (permalink / raw) To: Kyrill Tkachov; +Cc: Ramana Radhakrishnan, gcc-patches [-- Attachment #1: Type: text/plain, Size: 2023 bytes --] Hi Kyril, > ... > The parts in this patch look ok to me. > However, I think we need some more functionality > In aarch64 we support compiling a file with no simd, including arm_neon.h and using arm_neon.h intrinsics > within functions tagged with simd support. > We want to support such functionality on arm i.e. compile a file with -mfpu=vfp and use arm_neon.h intrinsics > in a function tagged with an fpu=neon attribute. > For that we'd need to wrap the intrinsics in arm_neon.h in appropriate pragmas, like in the aarch64 version of arm_neon.h As discussed, here is arm_neon.h for aarch32/neon with the same programming model than aarch64/simd. As you said lets use one of the fpu=neon attributes even if the file is compiled with -mfpu=vfp. The drawback for this is that now we unconditionally makes available every neon intrinsics, introducing a small legacy change with regards to error checking (that you didn't have with aarch64). Then it's worth to stress that: - One cannot check #include "arm_neon.h" to check if the compiler can use neon instruction. Instead use #ifndef __ARM_NEON__. (Found in target-supports.exp) - Types cannot be checked. For instance: #include <arm_neon.h> poly128_t foo (poly128_t* ptr) { return vldrq_p128 (ptr); } compiled with -mfpu=neon used to be rejected with error: unknown type name 'poly128_t' ... Now the error, as a side effect from the inlining rules between incompatible modes, becomes error: inlining failed in call to always_inline 'vldrq_p128': target specific option mismatch ... I found this more confusing, so I was a little bit reluctant to implement this, but the code is correctly rejected and the message makes sense, after all. Just a different check. This patch applies on top of the preceding attribute/pragma target fpu= series. Tested with arm-none-eabi configured with default and --with-cpu=cortex-a9 --with-fp --with-float=hard Also fixes a few macro that depends on fpu=, that I forgot to redefine. Christian [-- Attachment #2: arm_neon.patch --] [-- Type: text/x-patch, Size: 55802 bytes --] 2015-11-12 Christian Bruel <christian.bruel@st.com> * config/arm/arm_neon.h: Remove #ifndef check on __ARM_NEON. Replace #ifdef __ARM_FEATURE_CRYPTO, __ARM_FEATURE_FMA, __ARM_FP with appropriate pragma GCC target. * config/arm/arm-c.c (arm_cpu_builtins): Conditionally set and reset __ARM_FEATURE_FMA and __ARM_NEON_FP, __ARM_FP. 2015-11-12 Christian Bruel <christian.bruel@st.com> * lib/target-supports.exp (check_effective_target_arm_neon_ok_nocache): Check __ARM_NEON__ instead of "arm_neon.h. * gcc.target/arm/attr-neon3.c: New test. * gcc.target/arm/attr-neon-fp16.c: Likewise diff -ruN '--exclude=#*#' '--exclude=.svn' '--exclude=*~' -ruN gnu_trunk.p4/gcc/gcc/config/arm/arm-c.c gnu_trunk.p5/gcc/gcc/config/arm/arm-c.c --- gnu_trunk.p4/gcc/gcc/config/arm/arm-c.c 2015-11-10 14:52:22.968943246 +0100 +++ gnu_trunk.p5/gcc/gcc/config/arm/arm-c.c 2015-11-09 14:33:08.395442761 +0100 @@ -129,18 +129,22 @@ if (TARGET_ARM_FP) builtin_define_with_int_value ("__ARM_FP", TARGET_ARM_FP); + else + cpp_undef (pfile, "__ARM_FP"); + if (arm_fp16_format == ARM_FP16_FORMAT_IEEE) builtin_define ("__ARM_FP16_FORMAT_IEEE"); if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE) builtin_define ("__ARM_FP16_FORMAT_ALTERNATIVE"); - if (TARGET_FMA) - builtin_define ("__ARM_FEATURE_FMA"); + def_or_undef_macro (pfile, "__ARM_FEATURE_FMA", TARGET_FMA); def_or_undef_macro (pfile, "__ARM_NEON__", TARGET_NEON); def_or_undef_macro (pfile, "__ARM_NEON", TARGET_NEON); if (TARGET_NEON_FP) builtin_define_with_int_value ("__ARM_NEON_FP", TARGET_NEON_FP); + else + cpp_undef (pfile, "__ARM_NEON_FP"); /* Add a define for interworking. Needed when building libgcc.a. */ if (arm_cpp_interwork) diff -ruN '--exclude=#*#' '--exclude=.svn' '--exclude=*~' -ruN gnu_trunk.p4/gcc/gcc/config/arm/arm_neon.h gnu_trunk.p5/gcc/gcc/config/arm/arm_neon.h --- gnu_trunk.p4/gcc/gcc/config/arm/arm_neon.h 2015-09-10 14:57:15.363897373 +0200 +++ gnu_trunk.p5/gcc/gcc/config/arm/arm_neon.h 2015-11-12 14:22:23.071626491 +0100 @@ -27,9 +27,8 @@ #ifndef _GCC_ARM_NEON_H #define _GCC_ARM_NEON_H 1 -#ifndef __ARM_NEON__ -#error You must enable NEON instructions (e.g. -mfloat-abi=softfp -mfpu=neon) to use arm_neon.h -#else +#pragma GCC push_options +#pragma GCC target ("fpu=neon") #ifdef __cplusplus extern "C" { @@ -48,9 +47,10 @@ typedef __simd64_float32_t float32x2_t; typedef __simd64_poly8_t poly8x8_t; typedef __simd64_poly16_t poly16x4_t; -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") typedef __builtin_neon_poly64 poly64x1_t; -#endif +#pragma GCC pop_options typedef __simd64_uint8_t uint8x8_t; typedef __simd64_uint16_t uint16x4_t; typedef __simd64_uint32_t uint32x2_t; @@ -66,9 +66,10 @@ typedef __simd128_float32_t float32x4_t; typedef __simd128_poly8_t poly8x16_t; typedef __simd128_poly16_t poly16x8_t; -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") typedef __builtin_neon_poly64 poly64x2_t __attribute__ ((__vector_size__ (16))); -#endif +#pragma GCC pop_options typedef __simd128_uint8_t uint8x16_t; typedef __simd128_uint16_t uint16x8_t; @@ -81,10 +82,11 @@ keep them that way. */ typedef __builtin_neon_poly8 poly8_t; typedef __builtin_neon_poly16 poly16_t; -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") typedef __builtin_neon_poly64 poly64_t; typedef __builtin_neon_poly128 poly128_t; -#endif +#pragma GCC pop_options typedef struct int8x8x2_t { @@ -210,20 +212,19 @@ poly16x8_t val[2]; } poly16x8x2_t; -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") typedef struct poly64x1x2_t { poly64x1_t val[2]; } poly64x1x2_t; -#endif -#ifdef __ARM_FEATURE_CRYPTO typedef struct poly64x2x2_t { poly64x2_t val[2]; } poly64x2x2_t; -#endif +#pragma GCC pop_options typedef struct int8x8x3_t @@ -350,20 +351,19 @@ poly16x8_t val[3]; } poly16x8x3_t; -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") typedef struct poly64x1x3_t { poly64x1_t val[3]; } poly64x1x3_t; -#endif -#ifdef __ARM_FEATURE_CRYPTO typedef struct poly64x2x3_t { poly64x2_t val[3]; } poly64x2x3_t; -#endif +#pragma GCC pop_options typedef struct int8x8x4_t @@ -490,20 +490,19 @@ poly16x8_t val[4]; } poly16x8x4_t; -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") typedef struct poly64x1x4_t { poly64x1_t val[4]; } poly64x1x4_t; -#endif -#ifdef __ARM_FEATURE_CRYPTO typedef struct poly64x2x4_t { poly64x2_t val[4]; } poly64x2x4_t; -#endif +#pragma GCC pop_options /* vadd */ __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) @@ -1477,38 +1476,33 @@ return (int64x2_t)__builtin_neon_vqdmlslv2si (__a, __b, __c); } -#ifdef __ARM_FEATURE_FMA +#pragma GCC push_options +#pragma GCC target ("fpu=neon-vfpv4") __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vfma_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c) { return (float32x2_t)__builtin_neon_vfmav2sf (__a, __b, __c); } -#endif -#ifdef __ARM_FEATURE_FMA __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vfmaq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c) { return (float32x4_t)__builtin_neon_vfmav4sf (__a, __b, __c); } -#endif -#ifdef __ARM_FEATURE_FMA __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vfms_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c) { return (float32x2_t)__builtin_neon_vfmsv2sf (__a, __b, __c); } -#endif -#ifdef __ARM_FEATURE_FMA __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vfmsq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c) { return (float32x4_t)__builtin_neon_vfmsv4sf (__a, __b, __c); } +#pragma GCC pop_options -#endif #if __ARM_ARCH >= 8 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vrndn_f32 (float32x2_t __a) @@ -4515,14 +4509,15 @@ return (uint64x2_t)__builtin_neon_vrsrau_nv2di ((int64x2_t) __a, (int64x2_t) __b, __c); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) vsri_n_p64 (poly64x1_t __a, poly64x1_t __b, const int __c) { return (poly64x1_t)__builtin_neon_vsri_ndi (__a, __b, __c); } -#endif +#pragma GCC pop_options __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vsri_n_s8 (int8x8_t __a, int8x8_t __b, const int __c) { @@ -4583,14 +4578,15 @@ return (poly16x4_t)__builtin_neon_vsri_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) vsriq_n_p64 (poly64x2_t __a, poly64x2_t __b, const int __c) { return (poly64x2_t)__builtin_neon_vsri_nv2di ((int64x2_t) __a, (int64x2_t) __b, __c); } -#endif +#pragma GCC pop_options __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c) { @@ -4651,14 +4647,15 @@ return (poly16x8_t)__builtin_neon_vsri_nv8hi ((int16x8_t) __a, (int16x8_t) __b, __c); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) vsli_n_p64 (poly64x1_t __a, poly64x1_t __b, const int __c) { return (poly64x1_t)__builtin_neon_vsli_ndi (__a, __b, __c); } -#endif +#pragma GCC pop_options __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vsli_n_s8 (int8x8_t __a, int8x8_t __b, const int __c) { @@ -4719,14 +4716,15 @@ return (poly16x4_t)__builtin_neon_vsli_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) vsliq_n_p64 (poly64x2_t __a, poly64x2_t __b, const int __c) { return (poly64x2_t)__builtin_neon_vsli_nv2di ((int64x2_t) __a, (int64x2_t) __b, __c); } -#endif +#pragma GCC pop_options __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vsliq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c) { @@ -5545,14 +5543,15 @@ return (uint64x2_t)__builtin_neon_vset_lanev2di ((__builtin_neon_di) __a, (int64x2_t) __b, __c); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) vcreate_p64 (uint64_t __a) { return (poly64x1_t)__builtin_neon_vcreatedi ((__builtin_neon_di) __a); } -#endif +#pragma GCC pop_options __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vcreate_s8 (uint64_t __a) { @@ -5681,14 +5680,15 @@ return (poly16x4_t)__builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __a); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) vdup_n_p64 (poly64_t __a) { return (poly64x1_t)__builtin_neon_vdup_ndi ((__builtin_neon_di) __a); } -#endif +#pragma GCC pop_options __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vdup_n_s64 (int64_t __a) { @@ -5701,14 +5701,15 @@ return (uint64x1_t)__builtin_neon_vdup_ndi ((__builtin_neon_di) __a); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) vdupq_n_p64 (poly64_t __a) { return (poly64x2_t)__builtin_neon_vdup_nv2di ((__builtin_neon_di) __a); } -#endif +#pragma GCC pop_options __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vdupq_n_s8 (int8_t __a) { @@ -5961,14 +5962,15 @@ return (poly16x4_t)__builtin_neon_vdup_lanev4hi ((int16x4_t) __a, __b); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) vdup_lane_p64 (poly64x1_t __a, const int __b) { return (poly64x1_t)__builtin_neon_vdup_lanedi (__a, __b); } -#endif +#pragma GCC pop_options __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vdup_lane_s64 (int64x1_t __a, const int __b) { @@ -6035,14 +6037,15 @@ return (poly16x8_t)__builtin_neon_vdup_lanev8hi ((int16x4_t) __a, __b); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) vdupq_lane_p64 (poly64x1_t __a, const int __b) { return (poly64x2_t)__builtin_neon_vdup_lanev2di (__a, __b); } -#endif +#pragma GCC pop_options __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vdupq_lane_s64 (int64x1_t __a, const int __b) { @@ -6055,14 +6058,15 @@ return (uint64x2_t)__builtin_neon_vdup_lanev2di ((int64x1_t) __a, __b); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) vcombine_p64 (poly64x1_t __a, poly64x1_t __b) { return (poly64x2_t)__builtin_neon_vcombinedi (__a, __b); } -#endif +#pragma GCC pop_options __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vcombine_s8 (int8x8_t __a, int8x8_t __b) { @@ -6137,14 +6141,15 @@ return (poly16x8_t)__builtin_neon_vcombinev4hi ((int16x4_t) __a, (int16x4_t) __b); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) vget_high_p64 (poly64x2_t __a) { return (poly64x1_t)__builtin_neon_vget_highv2di ((int64x2_t) __a); } -#endif +#pragma GCC pop_options __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vget_high_s8 (int8x16_t __a) { @@ -6281,14 +6286,15 @@ return (poly16x4_t)__builtin_neon_vget_lowv8hi ((int16x8_t) __a); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) vget_low_p64 (poly64x2_t __a) { return (poly64x1_t)__builtin_neon_vget_lowv2di ((int64x2_t) __a); } -#endif +#pragma GCC pop_options __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vget_low_s64 (int64x2_t __a) { @@ -6349,7 +6355,8 @@ return (uint32x4_t)__builtin_neon_vcvtuv4sf (__a); } -#if ((__ARM_FP & 0x2) != 0) +#pragma GCC push_options +#pragma GCC target ("fpu=neon-fp16") #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) __extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) vcvt_f16_f32 (float32x4_t __a) @@ -6357,9 +6364,7 @@ return (float16x4_t)__builtin_neon_vcvtv4hfv4sf (__a); } #endif -#endif -#if ((__ARM_FP & 0x2) != 0) #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vcvt_f32_f16 (float16x4_t __a) @@ -6367,7 +6372,7 @@ return (float32x4_t)__builtin_neon_vcvtv4sfv4hf (__a); } #endif -#endif +#pragma GCC pop_options __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) vcvt_n_s32_f32 (float32x2_t __a, const int __b) @@ -7377,14 +7382,15 @@ return (int64x2_t)__builtin_neon_vqdmlsl_nv2si (__a, __b, (__builtin_neon_si) __c); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) vext_p64 (poly64x1_t __a, poly64x1_t __b, const int __c) { return (poly64x1_t)__builtin_neon_vextdi (__a, __b, __c); } -#endif +#pragma GCC pop_options __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vext_s8 (int8x8_t __a, int8x8_t __b, const int __c) { @@ -7451,14 +7457,15 @@ return (poly16x4_t)__builtin_neon_vextv4hi ((int16x4_t) __a, (int16x4_t) __b, __c); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) vextq_p64 (poly64x2_t __a, poly64x2_t __b, const int __c) { return (poly64x2_t)__builtin_neon_vextv2di ((int64x2_t) __a, (int64x2_t) __b, __c); } -#endif +#pragma GCC pop_options __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vextq_s8 (int8x16_t __a, int8x16_t __b, const int __c) { @@ -7741,14 +7748,15 @@ return (poly8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) vbsl_p64 (uint64x1_t __a, poly64x1_t __b, poly64x1_t __c) { return (poly64x1_t)__builtin_neon_vbsldi ((int64x1_t) __a, __b, __c); } -#endif +#pragma GCC pop_options __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vbsl_s8 (uint8x8_t __a, int8x8_t __b, int8x8_t __c) { @@ -7815,14 +7823,15 @@ return (poly16x4_t)__builtin_neon_vbslv4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) vbslq_p64 (uint64x2_t __a, poly64x2_t __b, poly64x2_t __c) { return (poly64x2_t)__builtin_neon_vbslv2di ((int64x2_t) __a, (int64x2_t) __b, (int64x2_t) __c); } -#endif +#pragma GCC pop_options __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vbslq_s8 (uint8x16_t __a, int8x16_t __b, int8x16_t __c) { @@ -8764,14 +8773,15 @@ return __rv; } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) vld1_p64 (const poly64_t * __a) { return (poly64x1_t)__builtin_neon_vld1di ((const __builtin_neon_di *) __a); } -#endif +#pragma GCC pop_options __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vld1_s8 (const int8_t * __a) { @@ -8846,14 +8856,15 @@ return (poly16x4_t)__builtin_neon_vld1v4hi ((const __builtin_neon_hi *) __a); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) vld1q_p64 (const poly64_t * __a) { return (poly64x2_t)__builtin_neon_vld1v2di ((const __builtin_neon_di *) __a); } -#endif +#pragma GCC pop_options __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vld1q_s8 (const int8_t * __a) { @@ -8990,14 +9001,15 @@ return (poly16x4_t)__builtin_neon_vld1_lanev4hi ((const __builtin_neon_hi *) __a, (int16x4_t) __b, __c); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) vld1_lane_p64 (const poly64_t * __a, poly64x1_t __b, const int __c) { return (poly64x1_t)__builtin_neon_vld1_lanedi ((const __builtin_neon_di *) __a, __b, __c); } -#endif +#pragma GCC pop_options __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vld1_lane_s64 (const int64_t * __a, int64x1_t __b, const int __c) { @@ -9072,14 +9084,15 @@ return (poly16x8_t)__builtin_neon_vld1_lanev8hi ((const __builtin_neon_hi *) __a, (int16x8_t) __b, __c); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) vld1q_lane_p64 (const poly64_t * __a, poly64x2_t __b, const int __c) { return (poly64x2_t)__builtin_neon_vld1_lanev2di ((const __builtin_neon_di *) __a, (int64x2_t) __b, __c); } -#endif +#pragma GCC pop_options __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vld1q_lane_s64 (const int64_t * __a, int64x2_t __b, const int __c) { @@ -9155,14 +9168,15 @@ return (poly16x4_t)__builtin_neon_vld1_dupv4hi ((const __builtin_neon_hi *) __a); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) vld1_dup_p64 (const poly64_t * __a) { return (poly64x1_t)__builtin_neon_vld1_dupdi ((const __builtin_neon_di *) __a); } -#endif +#pragma GCC pop_options __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vld1_dup_s64 (const int64_t * __a) { @@ -9238,14 +9252,15 @@ return (poly16x8_t)__builtin_neon_vld1_dupv8hi ((const __builtin_neon_hi *) __a); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) vld1q_dup_p64 (const poly64_t * __a) { return (poly64x2_t)__builtin_neon_vld1_dupv2di ((const __builtin_neon_di *) __a); } -#endif +#pragma GCC pop_options __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vld1q_dup_s64 (const int64_t * __a) { @@ -9258,14 +9273,15 @@ return (uint64x2_t)__builtin_neon_vld1_dupv2di ((const __builtin_neon_di *) __a); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline void __attribute__ ((__always_inline__)) vst1_p64 (poly64_t * __a, poly64x1_t __b) { __builtin_neon_vst1di ((__builtin_neon_di *) __a, __b); } -#endif +#pragma GCC pop_options __extension__ static __inline void __attribute__ ((__always_inline__)) vst1_s8 (int8_t * __a, int8x8_t __b) { @@ -9340,14 +9356,15 @@ __builtin_neon_vst1v4hi ((__builtin_neon_hi *) __a, (int16x4_t) __b); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline void __attribute__ ((__always_inline__)) vst1q_p64 (poly64_t * __a, poly64x2_t __b) { __builtin_neon_vst1v2di ((__builtin_neon_di *) __a, (int64x2_t) __b); } -#endif +#pragma GCC pop_options __extension__ static __inline void __attribute__ ((__always_inline__)) vst1q_s8 (int8_t * __a, int8x16_t __b) { @@ -9484,14 +9501,15 @@ __builtin_neon_vst1_lanev4hi ((__builtin_neon_hi *) __a, (int16x4_t) __b, __c); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline void __attribute__ ((__always_inline__)) vst1_lane_p64 (poly64_t * __a, poly64x1_t __b, const int __c) { __builtin_neon_vst1_lanedi ((__builtin_neon_di *) __a, __b, __c); } -#endif +#pragma GCC pop_options __extension__ static __inline void __attribute__ ((__always_inline__)) vst1_lane_s64 (int64_t * __a, int64x1_t __b, const int __c) { @@ -9566,14 +9584,15 @@ __builtin_neon_vst1_lanev8hi ((__builtin_neon_hi *) __a, (int16x8_t) __b, __c); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline void __attribute__ ((__always_inline__)) vst1q_lane_p64 (poly64_t * __a, poly64x2_t __b, const int __c) { __builtin_neon_vst1_lanev2di ((__builtin_neon_di *) __a, (int64x2_t) __b, __c); } -#endif +#pragma GCC pop_options __extension__ static __inline void __attribute__ ((__always_inline__)) vst1q_lane_s64 (int64_t * __a, int64x2_t __b, const int __c) { @@ -9668,7 +9687,8 @@ return __rv.__i; } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline poly64x1x2_t __attribute__ ((__always_inline__)) vld2_p64 (const poly64_t * __a) { @@ -9677,7 +9697,7 @@ return __rv.__i; } -#endif +#pragma GCC pop_options __extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__)) vld2_s64 (const int64_t * __a) { @@ -10015,7 +10035,8 @@ return __rv.__i; } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline poly64x1x2_t __attribute__ ((__always_inline__)) vld2_dup_p64 (const poly64_t * __a) { @@ -10024,7 +10045,7 @@ return __rv.__i; } -#endif +#pragma GCC pop_options __extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__)) vld2_dup_s64 (const int64_t * __a) { @@ -10113,7 +10134,8 @@ __builtin_neon_vst2v4hi ((__builtin_neon_hi *) __a, __bu.__o); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline void __attribute__ ((__always_inline__)) vst2_p64 (poly64_t * __a, poly64x1x2_t __b) { @@ -10121,7 +10143,7 @@ __builtin_neon_vst2di ((__builtin_neon_di *) __a, __bu.__o); } -#endif +#pragma GCC pop_options __extension__ static __inline void __attribute__ ((__always_inline__)) vst2_s64 (int64_t * __a, int64x1x2_t __b) { @@ -10413,7 +10435,8 @@ return __rv.__i; } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline poly64x1x3_t __attribute__ ((__always_inline__)) vld3_p64 (const poly64_t * __a) { @@ -10422,7 +10445,7 @@ return __rv.__i; } -#endif +#pragma GCC pop_options __extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__)) vld3_s64 (const int64_t * __a) { @@ -10760,7 +10783,8 @@ return __rv.__i; } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline poly64x1x3_t __attribute__ ((__always_inline__)) vld3_dup_p64 (const poly64_t * __a) { @@ -10769,7 +10793,7 @@ return __rv.__i; } -#endif +#pragma GCC pop_options __extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__)) vld3_dup_s64 (const int64_t * __a) { @@ -10858,7 +10882,8 @@ __builtin_neon_vst3v4hi ((__builtin_neon_hi *) __a, __bu.__o); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline void __attribute__ ((__always_inline__)) vst3_p64 (poly64_t * __a, poly64x1x3_t __b) { @@ -10866,7 +10891,7 @@ __builtin_neon_vst3di ((__builtin_neon_di *) __a, __bu.__o); } -#endif +#pragma GCC pop_options __extension__ static __inline void __attribute__ ((__always_inline__)) vst3_s64 (int64_t * __a, int64x1x3_t __b) { @@ -11158,7 +11183,8 @@ return __rv.__i; } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline poly64x1x4_t __attribute__ ((__always_inline__)) vld4_p64 (const poly64_t * __a) { @@ -11167,7 +11193,7 @@ return __rv.__i; } -#endif +#pragma GCC pop_options __extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__)) vld4_s64 (const int64_t * __a) { @@ -11507,7 +11533,8 @@ return __rv.__i; } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline poly64x1x4_t __attribute__ ((__always_inline__)) vld4_dup_p64 (const poly64_t * __a) { @@ -11516,7 +11543,7 @@ return __rv.__i; } -#endif +#pragma GCC pop_options __extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__)) vld4_dup_s64 (const int64_t * __a) { @@ -11605,7 +11632,8 @@ __builtin_neon_vst4v4hi ((__builtin_neon_hi *) __a, __bu.__o); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline void __attribute__ ((__always_inline__)) vst4_p64 (poly64_t * __a, poly64x1x4_t __b) { @@ -11613,7 +11641,7 @@ __builtin_neon_vst4di ((__builtin_neon_di *) __a, __bu.__o); } -#endif +#pragma GCC pop_options __extension__ static __inline void __attribute__ ((__always_inline__)) vst4_s64 (int64_t * __a, int64x1x4_t __b) { @@ -12323,14 +12351,15 @@ return (poly8x8_t)__builtin_neon_vreinterpretv8qiv2sf (__a); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) vreinterpret_p8_p64 (poly64x1_t __a) { return (poly8x8_t)__builtin_neon_vreinterpretv8qidi (__a); } -#endif +#pragma GCC pop_options __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) vreinterpret_p8_s64 (int64x1_t __a) { @@ -12399,14 +12428,15 @@ return (poly16x4_t)__builtin_neon_vreinterpretv4hiv2sf (__a); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) vreinterpret_p16_p64 (poly64x1_t __a) { return (poly16x4_t)__builtin_neon_vreinterpretv4hidi (__a); } -#endif +#pragma GCC pop_options __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) vreinterpret_p16_s64 (int64x1_t __a) { @@ -12479,14 +12509,15 @@ } #endif -#ifdef __ARM_FEATURE_CRYPTO #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) vreinterpret_f16_p64 (poly64x1_t __a) { return (float16x4_t) __a; } -#endif +#pragma GCC pop_options #endif #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) @@ -12573,14 +12604,15 @@ } #endif -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vreinterpret_f32_p64 (poly64x1_t __a) { return (float32x2_t)__builtin_neon_vreinterpretv2sfdi (__a); } -#endif +#pragma GCC pop_options __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vreinterpret_f32_s64 (int64x1_t __a) { @@ -12629,105 +12661,83 @@ return (float32x2_t)__builtin_neon_vreinterpretv2sfv2si ((int32x2_t) __a); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) vreinterpret_p64_p8 (poly8x8_t __a) { return (poly64x1_t)__builtin_neon_vreinterpretdiv8qi ((int8x8_t) __a); } -#endif -#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) vreinterpret_p64_p16 (poly16x4_t __a) { return (poly64x1_t)__builtin_neon_vreinterpretdiv4hi ((int16x4_t) __a); } -#endif - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) vreinterpret_p64_f16 (float16x4_t __a) { return (poly64x1_t) __a; } #endif -#endif -#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) vreinterpret_p64_f32 (float32x2_t __a) { return (poly64x1_t)__builtin_neon_vreinterpretdiv2sf (__a); } -#endif -#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) vreinterpret_p64_s64 (int64x1_t __a) { return (poly64x1_t)__builtin_neon_vreinterpretdidi (__a); } -#endif -#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) vreinterpret_p64_u64 (uint64x1_t __a) { return (poly64x1_t)__builtin_neon_vreinterpretdidi ((int64x1_t) __a); } -#endif -#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) vreinterpret_p64_s8 (int8x8_t __a) { return (poly64x1_t)__builtin_neon_vreinterpretdiv8qi (__a); } -#endif -#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) vreinterpret_p64_s16 (int16x4_t __a) { return (poly64x1_t)__builtin_neon_vreinterpretdiv4hi (__a); } -#endif -#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) vreinterpret_p64_s32 (int32x2_t __a) { return (poly64x1_t)__builtin_neon_vreinterpretdiv2si (__a); } -#endif -#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) vreinterpret_p64_u8 (uint8x8_t __a) { return (poly64x1_t)__builtin_neon_vreinterpretdiv8qi ((int8x8_t) __a); } -#endif -#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) vreinterpret_p64_u16 (uint16x4_t __a) { return (poly64x1_t)__builtin_neon_vreinterpretdiv4hi ((int16x4_t) __a); } -#endif -#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) vreinterpret_p64_u32 (uint32x2_t __a) { return (poly64x1_t)__builtin_neon_vreinterpretdiv2si ((int32x2_t) __a); } -#endif +#pragma GCC pop_options __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vreinterpret_s64_p8 (poly8x8_t __a) { @@ -12754,14 +12764,15 @@ return (int64x1_t)__builtin_neon_vreinterpretdiv2sf (__a); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vreinterpret_s64_p64 (poly64x1_t __a) { return (int64x1_t)__builtin_neon_vreinterpretdidi (__a); } -#endif +#pragma GCC pop_options __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vreinterpret_s64_u64 (uint64x1_t __a) { @@ -12830,14 +12841,15 @@ return (uint64x1_t)__builtin_neon_vreinterpretdiv2sf (__a); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vreinterpret_u64_p64 (poly64x1_t __a) { return (uint64x1_t)__builtin_neon_vreinterpretdidi (__a); } -#endif +#pragma GCC pop_options __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vreinterpret_u64_s64 (int64x1_t __a) { @@ -12906,14 +12918,15 @@ return (int8x8_t)__builtin_neon_vreinterpretv8qiv2sf (__a); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vreinterpret_s8_p64 (poly64x1_t __a) { return (int8x8_t)__builtin_neon_vreinterpretv8qidi (__a); } -#endif +#pragma GCC pop_options __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vreinterpret_s8_s64 (int64x1_t __a) { @@ -12982,14 +12995,15 @@ return (int16x4_t)__builtin_neon_vreinterpretv4hiv2sf (__a); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) vreinterpret_s16_p64 (poly64x1_t __a) { return (int16x4_t)__builtin_neon_vreinterpretv4hidi (__a); } -#endif +#pragma GCC pop_options __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) vreinterpret_s16_s64 (int64x1_t __a) { @@ -13058,14 +13072,15 @@ return (int32x2_t)__builtin_neon_vreinterpretv2siv2sf (__a); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) vreinterpret_s32_p64 (poly64x1_t __a) { return (int32x2_t)__builtin_neon_vreinterpretv2sidi (__a); } -#endif +#pragma GCC pop_options __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) vreinterpret_s32_s64 (int64x1_t __a) { @@ -13134,14 +13149,15 @@ return (uint8x8_t)__builtin_neon_vreinterpretv8qiv2sf (__a); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vreinterpret_u8_p64 (poly64x1_t __a) { return (uint8x8_t)__builtin_neon_vreinterpretv8qidi (__a); } -#endif +#pragma GCC pop_options __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vreinterpret_u8_s64 (int64x1_t __a) { @@ -13210,14 +13226,15 @@ return (uint16x4_t)__builtin_neon_vreinterpretv4hiv2sf (__a); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vreinterpret_u16_p64 (poly64x1_t __a) { return (uint16x4_t)__builtin_neon_vreinterpretv4hidi (__a); } -#endif +#pragma GCC pop_options __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vreinterpret_u16_s64 (int64x1_t __a) { @@ -13286,14 +13303,15 @@ return (uint32x2_t)__builtin_neon_vreinterpretv2siv2sf (__a); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vreinterpret_u32_p64 (poly64x1_t __a) { return (uint32x2_t)__builtin_neon_vreinterpretv2sidi (__a); } -#endif +#pragma GCC pop_options __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vreinterpret_u32_s64 (int64x1_t __a) { @@ -13356,22 +13374,22 @@ return (poly8x16_t)__builtin_neon_vreinterpretv16qiv4sf (__a); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) vreinterpretq_p8_p64 (poly64x2_t __a) { return (poly8x16_t)__builtin_neon_vreinterpretv16qiv2di ((int64x2_t) __a); } -#endif -#ifdef __ARM_FEATURE_CRYPTO + __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) vreinterpretq_p8_p128 (poly128_t __a) { return (poly8x16_t)__builtin_neon_vreinterpretv16qiti ((__builtin_neon_ti) __a); } -#endif +#pragma GCC pop_options __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) vreinterpretq_p8_s64 (int64x2_t __a) { @@ -13440,22 +13458,21 @@ return (poly16x8_t)__builtin_neon_vreinterpretv8hiv4sf (__a); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) vreinterpretq_p16_p64 (poly64x2_t __a) { return (poly16x8_t)__builtin_neon_vreinterpretv8hiv2di ((int64x2_t) __a); } -#endif -#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) vreinterpretq_p16_p128 (poly128_t __a) { return (poly16x8_t)__builtin_neon_vreinterpretv8hiti ((__builtin_neon_ti) __a); } -#endif +#pragma GCC pop_options __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) vreinterpretq_p16_s64 (int64x2_t __a) { @@ -13528,25 +13545,26 @@ } #endif +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") + #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) vreinterpretq_f16_p64 (poly64x2_t __a) { return (float16x8_t) __a; } #endif -#endif #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) vreinterpretq_f16_p128 (poly128_t __a) { return (float16x8_t) __a; } #endif -#endif + +#pragma GCC pop_options #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) __extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) @@ -13632,22 +13650,21 @@ } #endif -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vreinterpretq_f32_p64 (poly64x2_t __a) { return (float32x4_t)__builtin_neon_vreinterpretv4sfv2di ((int64x2_t) __a); } -#endif -#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vreinterpretq_f32_p128 (poly128_t __a) { return (float32x4_t)__builtin_neon_vreinterpretv4sfti ((__builtin_neon_ti) __a); } -#endif +#pragma GCC pop_options __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vreinterpretq_f32_s64 (int64x2_t __a) { @@ -13696,24 +13713,20 @@ return (float32x4_t)__builtin_neon_vreinterpretv4sfv4si ((int32x4_t) __a); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) vreinterpretq_p64_p8 (poly8x16_t __a) { return (poly64x2_t)__builtin_neon_vreinterpretv2div16qi ((int8x16_t) __a); } -#endif -#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) vreinterpretq_p64_p16 (poly16x8_t __a) { return (poly64x2_t)__builtin_neon_vreinterpretv2div8hi ((int16x8_t) __a); } -#endif - -#ifdef __ARM_FEATURE_CRYPTO #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) __extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) vreinterpretq_p64_f16 (float16x8_t __a) @@ -13721,105 +13734,79 @@ return (poly64x2_t) __a; } #endif -#endif -#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) vreinterpretq_p64_f32 (float32x4_t __a) { return (poly64x2_t)__builtin_neon_vreinterpretv2div4sf (__a); } -#endif -#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) vreinterpretq_p64_p128 (poly128_t __a) { return (poly64x2_t)__builtin_neon_vreinterpretv2diti ((__builtin_neon_ti) __a); } -#endif -#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) vreinterpretq_p64_s64 (int64x2_t __a) { return (poly64x2_t)__builtin_neon_vreinterpretv2div2di (__a); } -#endif -#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) vreinterpretq_p64_u64 (uint64x2_t __a) { return (poly64x2_t)__builtin_neon_vreinterpretv2div2di ((int64x2_t) __a); } -#endif -#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) vreinterpretq_p64_s8 (int8x16_t __a) { return (poly64x2_t)__builtin_neon_vreinterpretv2div16qi (__a); } -#endif -#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) vreinterpretq_p64_s16 (int16x8_t __a) { return (poly64x2_t)__builtin_neon_vreinterpretv2div8hi (__a); } -#endif -#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) vreinterpretq_p64_s32 (int32x4_t __a) { return (poly64x2_t)__builtin_neon_vreinterpretv2div4si (__a); } -#endif -#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) vreinterpretq_p64_u8 (uint8x16_t __a) { return (poly64x2_t)__builtin_neon_vreinterpretv2div16qi ((int8x16_t) __a); } -#endif -#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) vreinterpretq_p64_u16 (uint16x8_t __a) { return (poly64x2_t)__builtin_neon_vreinterpretv2div8hi ((int16x8_t) __a); } -#endif -#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) vreinterpretq_p64_u32 (uint32x4_t __a) { return (poly64x2_t)__builtin_neon_vreinterpretv2div4si ((int32x4_t) __a); } -#endif -#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline poly128_t __attribute__ ((__always_inline__)) vreinterpretq_p128_p8 (poly8x16_t __a) { return (poly128_t)__builtin_neon_vreinterprettiv16qi ((int8x16_t) __a); } -#endif -#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline poly128_t __attribute__ ((__always_inline__)) vreinterpretq_p128_p16 (poly16x8_t __a) { return (poly128_t)__builtin_neon_vreinterprettiv8hi ((int16x8_t) __a); } -#endif -#ifdef __ARM_FEATURE_CRYPTO #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) __extension__ static __inline poly128_t __attribute__ ((__always_inline__)) vreinterpretq_p128_f16 (float16x8_t __a) @@ -13827,88 +13814,68 @@ return (poly128_t) __a; } #endif -#endif -#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline poly128_t __attribute__ ((__always_inline__)) vreinterpretq_p128_f32 (float32x4_t __a) { return (poly128_t)__builtin_neon_vreinterprettiv4sf (__a); } -#endif -#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline poly128_t __attribute__ ((__always_inline__)) vreinterpretq_p128_p64 (poly64x2_t __a) { return (poly128_t)__builtin_neon_vreinterprettiv2di ((int64x2_t) __a); } -#endif -#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline poly128_t __attribute__ ((__always_inline__)) vreinterpretq_p128_s64 (int64x2_t __a) { return (poly128_t)__builtin_neon_vreinterprettiv2di (__a); } -#endif -#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline poly128_t __attribute__ ((__always_inline__)) vreinterpretq_p128_u64 (uint64x2_t __a) { return (poly128_t)__builtin_neon_vreinterprettiv2di ((int64x2_t) __a); } -#endif -#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline poly128_t __attribute__ ((__always_inline__)) vreinterpretq_p128_s8 (int8x16_t __a) { return (poly128_t)__builtin_neon_vreinterprettiv16qi (__a); } -#endif -#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline poly128_t __attribute__ ((__always_inline__)) vreinterpretq_p128_s16 (int16x8_t __a) { return (poly128_t)__builtin_neon_vreinterprettiv8hi (__a); } -#endif -#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline poly128_t __attribute__ ((__always_inline__)) vreinterpretq_p128_s32 (int32x4_t __a) { return (poly128_t)__builtin_neon_vreinterprettiv4si (__a); } -#endif -#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline poly128_t __attribute__ ((__always_inline__)) vreinterpretq_p128_u8 (uint8x16_t __a) { return (poly128_t)__builtin_neon_vreinterprettiv16qi ((int8x16_t) __a); } -#endif -#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline poly128_t __attribute__ ((__always_inline__)) vreinterpretq_p128_u16 (uint16x8_t __a) { return (poly128_t)__builtin_neon_vreinterprettiv8hi ((int16x8_t) __a); } -#endif -#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline poly128_t __attribute__ ((__always_inline__)) vreinterpretq_p128_u32 (uint32x4_t __a) { return (poly128_t)__builtin_neon_vreinterprettiv4si ((int32x4_t) __a); } -#endif +#pragma GCC pop_options __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vreinterpretq_s64_p8 (poly8x16_t __a) { @@ -13935,22 +13902,21 @@ return (int64x2_t)__builtin_neon_vreinterpretv2div4sf (__a); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vreinterpretq_s64_p64 (poly64x2_t __a) { return (int64x2_t)__builtin_neon_vreinterpretv2div2di ((int64x2_t) __a); } -#endif -#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vreinterpretq_s64_p128 (poly128_t __a) { return (int64x2_t)__builtin_neon_vreinterpretv2diti ((__builtin_neon_ti) __a); } -#endif +#pragma GCC pop_options __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vreinterpretq_s64_u64 (uint64x2_t __a) { @@ -14019,22 +13985,21 @@ return (uint64x2_t)__builtin_neon_vreinterpretv2div4sf (__a); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) vreinterpretq_u64_p64 (poly64x2_t __a) { return (uint64x2_t)__builtin_neon_vreinterpretv2div2di ((int64x2_t) __a); } -#endif -#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) vreinterpretq_u64_p128 (poly128_t __a) { return (uint64x2_t)__builtin_neon_vreinterpretv2diti ((__builtin_neon_ti) __a); } -#endif +#pragma GCC pop_options __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) vreinterpretq_u64_s64 (int64x2_t __a) { @@ -14103,22 +14068,21 @@ return (int8x16_t)__builtin_neon_vreinterpretv16qiv4sf (__a); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vreinterpretq_s8_p64 (poly64x2_t __a) { return (int8x16_t)__builtin_neon_vreinterpretv16qiv2di ((int64x2_t) __a); } -#endif -#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vreinterpretq_s8_p128 (poly128_t __a) { return (int8x16_t)__builtin_neon_vreinterpretv16qiti ((__builtin_neon_ti) __a); } -#endif +#pragma GCC pop_options __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vreinterpretq_s8_s64 (int64x2_t __a) { @@ -14187,22 +14151,21 @@ return (int16x8_t)__builtin_neon_vreinterpretv8hiv4sf (__a); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) vreinterpretq_s16_p64 (poly64x2_t __a) { return (int16x8_t)__builtin_neon_vreinterpretv8hiv2di ((int64x2_t) __a); } -#endif -#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) vreinterpretq_s16_p128 (poly128_t __a) { return (int16x8_t)__builtin_neon_vreinterpretv8hiti ((__builtin_neon_ti) __a); } -#endif +#pragma GCC pop_options __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) vreinterpretq_s16_s64 (int64x2_t __a) { @@ -14271,22 +14234,21 @@ return (int32x4_t)__builtin_neon_vreinterpretv4siv4sf (__a); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vreinterpretq_s32_p64 (poly64x2_t __a) { return (int32x4_t)__builtin_neon_vreinterpretv4siv2di ((int64x2_t) __a); } -#endif -#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vreinterpretq_s32_p128 (poly128_t __a) { return (int32x4_t)__builtin_neon_vreinterpretv4siti ((__builtin_neon_ti) __a); } -#endif +#pragma GCC pop_options __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vreinterpretq_s32_s64 (int64x2_t __a) { @@ -14355,22 +14317,21 @@ return (uint8x16_t)__builtin_neon_vreinterpretv16qiv4sf (__a); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vreinterpretq_u8_p64 (poly64x2_t __a) { return (uint8x16_t)__builtin_neon_vreinterpretv16qiv2di ((int64x2_t) __a); } -#endif -#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vreinterpretq_u8_p128 (poly128_t __a) { return (uint8x16_t)__builtin_neon_vreinterpretv16qiti ((__builtin_neon_ti) __a); } -#endif +#pragma GCC pop_options __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vreinterpretq_u8_s64 (int64x2_t __a) { @@ -14439,22 +14400,21 @@ return (uint16x8_t)__builtin_neon_vreinterpretv8hiv4sf (__a); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vreinterpretq_u16_p64 (poly64x2_t __a) { return (uint16x8_t)__builtin_neon_vreinterpretv8hiv2di ((int64x2_t) __a); } -#endif -#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vreinterpretq_u16_p128 (poly128_t __a) { return (uint16x8_t)__builtin_neon_vreinterpretv8hiti ((__builtin_neon_ti) __a); } -#endif +#pragma GCC pop_options __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vreinterpretq_u16_s64 (int64x2_t __a) { @@ -14523,22 +14483,21 @@ return (uint32x4_t)__builtin_neon_vreinterpretv4siv4sf (__a); } -#ifdef __ARM_FEATURE_CRYPTO +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vreinterpretq_u32_p64 (poly64x2_t __a) { return (uint32x4_t)__builtin_neon_vreinterpretv4siv2di ((int64x2_t) __a); } -#endif -#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vreinterpretq_u32_p128 (poly128_t __a) { return (uint32x4_t)__builtin_neon_vreinterpretv4siti ((__builtin_neon_ti) __a); } -#endif +#pragma GCC pop_options __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vreinterpretq_u32_s64 (int64x2_t __a) { @@ -14582,8 +14541,8 @@ } -#ifdef __ARM_FEATURE_CRYPTO - +#pragma GCC push_options +#pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ static __inline poly128_t __attribute__ ((__always_inline__)) vldrq_p128 (poly128_t const * __ptr) { @@ -14753,9 +14712,12 @@ return (poly128_t) __builtin_arm_crypto_vmullp64 ((uint64_t) __t1, (uint64_t) __t2); } -#endif +#pragma GCC pop_options + #ifdef __cplusplus } #endif -#endif + +#pragma GCC pop_options + #endif diff -ruN '--exclude=#*#' '--exclude=.svn' '--exclude=*~' -ruN gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon3.c gnu_trunk.p5/gcc/gcc/testsuite/gcc.target/arm/attr-neon3.c --- gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon3.c 1970-01-01 01:00:00.000000000 +0100 +++ gnu_trunk.p5/gcc/gcc/testsuite/gcc.target/arm/attr-neon3.c 2015-10-26 13:44:11.790368746 +0100 @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-O2 -mfloat-abi=softfp -mfpu=vfp" } */ + +#include <arm_neon.h> + +/* Check that neon is used. */ +int8x8_t __attribute__ ((target("fpu=neon"))) +my (int8x8_t __a, int8x8_t __b) +{ + return __a + __b; +} + +/* Check that crypto builtins are recognized. */ +poly128_t __attribute__ ((target("fpu=crypto-neon-fp-armv8"))) +foo (poly128_t* ptr) +{ + return vldrq_p128 (ptr); +} + +/* Check that default mode is restored. */ +int8x8_t +my1 (int8x8_t __a, int8x8_t __b) +{ + return __a + __b; +} + +/* { dg-final { scan-assembler-times "\.fpu vfp" 1 } } */ +/* { dg-final { scan-assembler-times "\.fpu neon" 1 } } */ +/* { dg-final { scan-assembler-times "\.fpu crypto-neon-fp-armv8" 1 } } */ +/* { dg-final { scan-assembler-times "vld1" 1 } } */ +/* { dg-final { scan-assembler-times "vadd" 1} } */ diff -ruN '--exclude=#*#' '--exclude=.svn' '--exclude=*~' -ruN gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon-fp16.c gnu_trunk.p5/gcc/gcc/testsuite/gcc.target/arm/attr-neon-fp16.c --- gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon-fp16.c 1970-01-01 01:00:00.000000000 +0100 +++ gnu_trunk.p5/gcc/gcc/testsuite/gcc.target/arm/attr-neon-fp16.c 2015-11-04 13:30:23.006138103 +0100 @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-mfp16-format=ieee -mfloat-abi=softfp" } */ + +#include "arm_neon.h" + +float16x4_t __attribute__((target("fpu=neon-fp16"))) +foo (float32x4_t arg) +{ + return vcvt_f16_f32 (arg); +} diff -ruN '--exclude=#*#' '--exclude=.svn' '--exclude=*~' -ruN gnu_trunk.p4/gcc/gcc/testsuite/lib/target-supports.exp gnu_trunk.p5/gcc/gcc/testsuite/lib/target-supports.exp --- gnu_trunk.p4/gcc/gcc/testsuite/lib/target-supports.exp 2015-11-10 13:39:31.689982689 +0100 +++ gnu_trunk.p5/gcc/gcc/testsuite/lib/target-supports.exp 2015-11-12 11:06:27.062849846 +0100 @@ -2850,8 +2850,10 @@ if { [check_effective_target_arm32] } { foreach flags {"" "-mfloat-abi=softfp" "-mfpu=neon" "-mfpu=neon -mfloat-abi=softfp"} { if { [check_no_compiler_messages_nocache arm_neon_ok object { - #include "arm_neon.h" int dummy; + #ifndef __ARM_NEON__ + #error not NEON + #endif /* Avoid the case where a test adds -mfpu=neon, but the toolchain is configured for -mcpu=arm926ej-s, for example. */ #if __ARM_ARCH < 7 ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 4/4] [ARM] Add attribute/pragma target fpu= 2015-11-12 14:54 ` Christian Bruel @ 2015-11-13 11:49 ` Kyrill Tkachov 0 siblings, 0 replies; 9+ messages in thread From: Kyrill Tkachov @ 2015-11-13 11:49 UTC (permalink / raw) To: Christian Bruel; +Cc: Ramana Radhakrishnan, gcc-patches Hi Christian, On 12/11/15 14:54, Christian Bruel wrote: > Hi Kyril, > >> ... >> The parts in this patch look ok to me. >> However, I think we need some more functionality >> In aarch64 we support compiling a file with no simd, including arm_neon.h and using arm_neon.h intrinsics >> within functions tagged with simd support. >> We want to support such functionality on arm i.e. compile a file with -mfpu=vfp and use arm_neon.h intrinsics >> in a function tagged with an fpu=neon attribute. >> For that we'd need to wrap the intrinsics in arm_neon.h in appropriate pragmas, like in the aarch64 version of arm_neon.h > > As discussed, here is arm_neon.h for aarch32/neon with the same programming model than aarch64/simd. As you said lets use one of the fpu=neon attributes even if the file is compiled with -mfpu=vfp. > > The drawback for this is that now we unconditionally makes available every neon intrinsics, introducing a small legacy change with regards to error checking (that you didn't have with aarch64). Then it's worth to stress that: > > - One cannot check #include "arm_neon.h" to check if the compiler can use neon instruction. Instead use #ifndef __ARM_NEON__. (Found in target-supports.exp) Checking the macro is the 'canonical' way to check for NEON support, so I reckon we can live with that. > > > - Types cannot be checked. For instance: > > #include <arm_neon.h> > > poly128_t > foo (poly128_t* ptr) > { > return vldrq_p128 (ptr); > } > > compiled with -mfpu=neon used to be rejected with > > error: unknown type name 'poly128_t' ... > > Now the error, as a side effect from the inlining rules between incompatible modes, becomes > > error: inlining failed in call to always_inline 'vldrq_p128': target specific option mismatch ... Well, the previous message is misleading anyway since the user error there is not a type issue but failure to specify the correct -mfpu option. > > I found this more confusing, so I was a little bit reluctant to implement this, but the code is correctly rejected and the message makes sense, after all. Just a different check. > > This patch applies on top of the preceding attribute/pragma target fpu= series. Tested with arm-none-eabi configured with default and --with-cpu=cortex-a9 --with-fp --with-float=hard Do you mean --with-fpu=<something>? > > Also fixes a few macro that depends on fpu=, that I forgot to redefine. Can you please split those changes into a separate patch and ChangeLog and commit the separately? That part is preapproved. This patch is ok then with above comment about splitting the arm-c.c changes separately. Thanks for doing this! I believe all patches in this series are approved then so you can go ahead and start committing. Kyrill > > Christian > ^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2015-11-13 11:49 UTC | newest] Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2015-09-14 14:38 [PATCH 4/4] [ARM] Add attribute/pragma target fpu= Christian Bruel 2015-09-14 19:50 ` Bernhard Reutner-Fischer 2015-09-15 10:07 ` Christian Bruel 2015-09-15 10:48 ` Christian Bruel 2015-09-18 9:13 ` Kyrill Tkachov 2015-09-21 13:46 ` Christian Bruel 2015-10-08 8:53 ` Kyrill Tkachov 2015-11-12 14:54 ` Christian Bruel 2015-11-13 11:49 ` Kyrill Tkachov
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).