* [PATCH 4/4] [ARM] Add attribute/pragma target fpu=
@ 2015-09-14 14:38 Christian Bruel
2015-09-14 19:50 ` Bernhard Reutner-Fischer
2015-09-15 10:48 ` Christian Bruel
0 siblings, 2 replies; 9+ messages in thread
From: Christian Bruel @ 2015-09-14 14:38 UTC (permalink / raw)
To: kyrylo.tkachov, Ramana.Radhakrishnan; +Cc: gcc-patches
[-- Attachment #1: Type: text/plain, Size: 218 bytes --]
Finally, the final part of the patch set does the attribute target
parsing and checking, redefines the preprocessor macros and implements
the inlining rules.
testcases and documentation included.
thanks
Christian
[-- Attachment #2: p4.patch --]
[-- Type: text/x-patch, Size: 12045 bytes --]
2015-05-26 Christian Bruel <christian.bruel@st.com>
PR target/65837
* config/arm/arm-c.c (arm_cpu_builtins): Set or reset
__ARM_FEATURE_CRYPTO, __VFP_FP__, __ARM_NEON__
(arm_pragma_target_parse): Change check for arm_cpu_builtins.
undefine __ARM_FP.
* doc/invoke.texi (-mfpu=): Mention attribute and pragma.
* doc/extend.texi (-mfpu=): Describe attribute.
2015-09-14 Christian Bruel <christian.bruel@st.com>
PR target/65837
gcc.target/arm/lto/pr65837_0.c
gcc.target/arm/attr-neon2.c
gcc.target/arm/attr-neon.c
gcc.target/arm/attr-neon-builtin-fail.c
gcc.target/arm/attr-crypto.c
diff -ruN gnu_trunk.p3/gcc/gcc/config/arm/arm.c gnu_trunk.p4/gcc/gcc/config/arm/arm.c
--- gnu_trunk.p3/gcc/gcc/config/arm/arm.c 2015-09-11 16:26:33.869000746 +0200
+++ gnu_trunk.p4/gcc/gcc/config/arm/arm.c 2015-09-11 17:24:23.636876647 +0200
@@ -29486,11 +29486,42 @@
/* Hook to determine if one function can safely inline another. */
static bool
-arm_can_inline_p (tree caller ATTRIBUTE_UNUSED, tree callee ATTRIBUTE_UNUSED)
+arm_can_inline_p (tree caller, tree callee)
{
- /* Overidde default hook: Always OK to inline between different modes.
- Function with mode specific instructions, e.g using asm, must be explicitely
- protected with noinline. */
+ tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
+ tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
+
+ struct cl_target_option *caller_opts
+ = TREE_TARGET_OPTION (caller_tree ? caller_tree
+ : target_option_default_node);
+
+ struct cl_target_option *callee_opts
+ = TREE_TARGET_OPTION (callee_tree ? callee_tree
+ : target_option_default_node);
+
+ const struct arm_fpu_desc *fpu_desc1
+ = &all_fpus[caller_opts->x_arm_fpu_index];
+ const struct arm_fpu_desc *fpu_desc2
+ = &all_fpus[callee_opts->x_arm_fpu_index];
+
+ /* Can't inline NEON extension if the caller doesn't support it. */
+ if (ARM_FPU_FSET_HAS (fpu_desc2->features, FPU_FL_NEON)
+ && ! ARM_FPU_FSET_HAS (fpu_desc1->features, FPU_FL_NEON))
+ return false;
+
+ /* Can't inline CRYPTO extension if the caller doesn't support it. */
+ if (ARM_FPU_FSET_HAS (fpu_desc2->features, FPU_FL_CRYPTO)
+ && ! ARM_FPU_FSET_HAS (fpu_desc1->features, FPU_FL_CRYPTO))
+ return false;
+
+ /* Need same model and regs. */
+ if (fpu_desc2->model != fpu_desc1->model
+ || fpu_desc2->regs != fpu_desc1->regs)
+ return false;
+
+ /* OK to inline between different modes.
+ Function with mode specific instructions, e.g using asm,
+ must be explicitely protected with noinline. */
return true;
}
@@ -29501,6 +29532,8 @@
static bool
arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
{
+ int ret=true;
+
if (TREE_CODE (args) == TREE_LIST)
{
bool ret = true;
@@ -29518,30 +29551,35 @@
}
char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
- while (argstr && *argstr != '\0')
+ char *q;
+
+ while ((q = strtok (argstr, ",")) != NULL)
{
- while (ISSPACE (*argstr))
- argstr++;
+ while (ISSPACE (*q)) ++q;
- if (!strcmp (argstr, "thumb"))
- {
+ argstr = NULL;
+ if (!strncmp (q, "thumb", 5))
opts->x_target_flags |= MASK_THUMB;
- arm_option_check_internal (opts);
- return true;
- }
- if (!strcmp (argstr, "arm"))
- {
+ else if (!strncmp (q, "arm", 3))
opts->x_target_flags &= ~MASK_THUMB;
- arm_option_check_internal (opts);
- return true;
+
+ else if (!strncmp (q, "fpu=", 4))
+ {
+ if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
+ &opts->x_arm_fpu_index, CL_TARGET))
+ {
+ error ("invalid fpu for attribute(target(\"%s\"))", q);
+ return false;
+ }
}
+ else
+ warning (0, "attribute(target(\"%s\")) is unknown", argstr);
- warning (0, "attribute(target(\"%s\")) is unknown", argstr);
- return false;
+ arm_option_check_internal (opts);
}
- return false;
+ return ret;
}
/* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
diff -ruN gnu_trunk.p3/gcc/gcc/config/arm/arm-c.c gnu_trunk.p4/gcc/gcc/config/arm/arm-c.c
--- gnu_trunk.p3/gcc/gcc/config/arm/arm-c.c 2015-09-11 16:25:32.180858606 +0200
+++ gnu_trunk.p4/gcc/gcc/config/arm/arm-c.c 2015-09-11 17:00:26.085645968 +0200
@@ -68,8 +68,8 @@
def_or_undef_macro (pfile, "__ARM_FEATURE_DSP", TARGET_DSP_MULTIPLY);
def_or_undef_macro (pfile, "__ARM_FEATURE_QBIT", TARGET_ARM_QBIT);
def_or_undef_macro (pfile, "__ARM_FEATURE_SAT", TARGET_ARM_SAT);
- if (TARGET_CRYPTO)
- builtin_define ("__ARM_FEATURE_CRYPTO");
+ def_or_undef_macro (pfile, "__ARM_FEATURE_CRYPTO", TARGET_CRYPTO);
+
if (unaligned_access)
builtin_define ("__ARM_FEATURE_UNALIGNED");
if (TARGET_CRC32)
@@ -129,8 +129,7 @@
if (TARGET_SOFT_FLOAT)
builtin_define ("__SOFTFP__");
- if (TARGET_VFP)
- builtin_define ("__VFP_FP__");
+ def_or_undef_macro (pfile, "__VFP_FP__", TARGET_VFP);
if (TARGET_ARM_FP)
builtin_define_with_int_value ("__ARM_FP", TARGET_ARM_FP);
@@ -141,11 +140,9 @@
if (TARGET_FMA)
builtin_define ("__ARM_FEATURE_FMA");
- if (TARGET_NEON)
- {
- builtin_define ("__ARM_NEON__");
- builtin_define ("__ARM_NEON");
- }
+ def_or_undef_macro (pfile, "__ARM_NEON__", TARGET_NEON);
+ def_or_undef_macro (pfile, "__ARM_NEON", TARGET_NEON);
+
if (TARGET_NEON_FP)
builtin_define_with_int_value ("__ARM_NEON_FP", TARGET_NEON_FP);
@@ -231,7 +228,7 @@
gcc_assert (prev_opt);
gcc_assert (cur_opt);
- if (cur_opt->x_target_flags != prev_opt->x_target_flags)
+ if (cur_opt != prev_opt)
{
/* For the definitions, ensure all newly defined macros are considered
as used for -Wunused-macros. There is no point warning about the
@@ -242,6 +239,8 @@
/* Update macros. */
gcc_assert (cur_opt->x_target_flags == target_flags);
+ /* This one can be redefined by the pragma without warning. */
+ cpp_undef (parse_in, "__ARM_FP");
arm_cpu_builtins (parse_in);
cpp_opts->warn_unused_macros = saved_warn_unused_macros;
diff -ruN gnu_trunk.p3/gcc/gcc/doc/extend.texi gnu_trunk.p4/gcc/gcc/doc/extend.texi
--- gnu_trunk.p3/gcc/gcc/doc/extend.texi 2015-09-07 13:35:20.777683005 +0200
+++ gnu_trunk.p4/gcc/gcc/doc/extend.texi 2015-09-14 13:58:49.271385001 +0200
@@ -3606,10 +3606,17 @@
@item arm
@cindex @code{target("arm")} function attribute, ARM
Force code generation in the ARM (A32) ISA.
-@end table
Functions from different modes can be inlined in the caller's mode.
+@item fpu=
+@cindex @code{target("fpu=")} function attribute, ARM
+Specifies the fpu for which to tune the performance of this function.
+The behavior and permissible arguments are the same as for the @option{-mfpu=}
+command-line option.
+
+@end table
+
@end table
@node AVR Function Attributes
diff -ruN gnu_trunk.p3/gcc/gcc/doc/invoke.texi gnu_trunk.p4/gcc/gcc/doc/invoke.texi
--- gnu_trunk.p3/gcc/gcc/doc/invoke.texi 2015-09-10 12:21:00.698911244 +0200
+++ gnu_trunk.p4/gcc/gcc/doc/invoke.texi 2015-09-14 10:27:20.281932581 +0200
@@ -13360,6 +13363,8 @@
floating-point arithmetic (in particular denormal values are treated as
zero), so the use of NEON instructions may lead to a loss of precision.
+You can also set the fpu name at function level by using the @code{target("mfpu=")} function attributes (@pxref{ARM Function Attributes}) or pragmas (@pxref{Function Specific Option Pragmas}).
+
@item -mfp16-format=@var{name}
@opindex mfp16-format
Specify the format of the @code{__fp16} half-precision floating-point type.
diff -ruN gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-crypto.c gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-crypto.c
--- gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-crypto.c 1970-01-01 01:00:00.000000000 +0100
+++ gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-crypto.c 2015-09-14 15:58:24.967898634 +0200
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_crypto_ok } */
+
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
+
+#ifndef __ARM_FEATURE_CRYPTO
+#error __ARM_FEATURE_CRYPTO not defined.
+#endif
+
+#ifndef __ARM_NEON
+#error __ARM_NEON not defined.
+#endif
+
+#if !defined(__ARM_FP) || (__ARM_FP != 14)
+#error __ARM_FP
+#endif
+
+#include "arm_neon.h"
+
+int
+foo (void)
+{
+ uint32x4_t a = {0xd, 0xe, 0xa, 0xd};
+ uint32x4_t b = {0, 1, 2, 3};
+
+ uint32x4_t res = vsha256su0q_u32 (a, b);
+ return res[0];
+}
+
+#pragma GCC reset_options
+
+/* Check that the FP version is correctly reset. */
+
+#if !defined(__ARM_FP) || (__ARM_FP != 12)
+#error __ARM_FP
+#endif
+
+/* { dg-final { scan-assembler "sha256su0.32\tq\[0-9\]+, q\[0-9\]+" } } */
diff -ruN gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-neon2.c gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon2.c
--- gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-neon2.c 1970-01-01 01:00:00.000000000 +0100
+++ gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon2.c 2015-09-14 15:58:24.967898634 +0200
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O2 -mfloat-abi=softfp -mfpu=vfp" } */
+
+#pragma GCC target ("fpu=neon")
+#include <arm_neon.h>
+
+/* Check that pragma target is used. */
+int8x8_t
+my (int8x8_t __a, int8x8_t __b)
+{
+ return __a + __b;
+}
+
+#pragma GCC reset_options
+
+/* Check that command line option is restored. */
+int8x8_t
+my1 (int8x8_t __a, int8x8_t __b)
+{
+ return __a + __b;
+}
+
+/* { dg-final { scan-assembler-times "\.fpu vfp" 1 } } */
+/* { dg-final { scan-assembler-times "\.fpu neon" 1 } } */
+/* { dg-final { scan-assembler "vadd" } } */
+
+
diff -ruN gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-neon-builtin-fail.c gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon-builtin-fail.c
--- gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-neon-builtin-fail.c 1970-01-01 01:00:00.000000000 +0100
+++ gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon-builtin-fail.c 2015-09-14 15:58:24.967898634 +0200
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O2 -mfloat-abi=softfp -mfpu=neon" } */
+
+#include <arm_neon.h>
+
+void __attribute__ ((target ("fpu=vfp")))
+foo (uint8x16_t *p)
+{
+ *p = vmovq_n_u8 (3); /* { dg-error "called from here" } */
+
+}
+
+
+/* { dg-error "inlining failed in call to always_inline" "" { target *-*-* } 0 } */
+
+
+
diff -ruN gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-neon.c gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon.c
--- gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-neon.c 1970-01-01 01:00:00.000000000 +0100
+++ gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon.c 2015-09-14 16:12:08.449698268 +0200
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O3 -mfloat-abi=softfp -ftree-vectorize" } */
+
+void
+f3(int n, int x[], int y[]) {
+ int i;
+ for (i = 0; i < n; ++i)
+ y[i] = x[i] << 3;
+}
+
+/* Verify that neon instructions are emitted once. */
+void __attribute__ ((target("fpu=neon")))
+ f1(int n, int x[], int y[]) {
+ int i;
+ for (i = 0; i < n; ++i)
+ y[i] = x[i] << 3;
+}
+
+/* { dg-final { scan-assembler-times "\.fpu vfp" 1 } } */
+/* { dg-final { scan-assembler-times "\.fpu neon" 1 } } */
+/* { dg-final { scan-assembler-times "vshl" 1 } } */
+
+
+
+
diff -ruN gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/lto/pr65837_0.c gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/lto/pr65837_0.c
--- gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/lto/pr65837_0.c 1970-01-01 01:00:00.000000000 +0100
+++ gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/lto/pr65837_0.c 2015-09-14 15:58:13.899874587 +0200
@@ -0,0 +1,14 @@
+/* { dg-lto-do run } */
+/* { dg-lto-options {{-flto -mfpu=neon}} } */
+/* { dg-suppress-ld-options {-mfpu=neon} } */
+
+#include "arm_neon.h"
+
+float32x2_t a, b, c, e;
+
+int main()
+{
+ e = __builtin_neon_vmls_lanev2sf (a, b, c, 0);
+ return 0;
+}
+
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 4/4] [ARM] Add attribute/pragma target fpu=
2015-09-14 14:38 [PATCH 4/4] [ARM] Add attribute/pragma target fpu= Christian Bruel
@ 2015-09-14 19:50 ` Bernhard Reutner-Fischer
2015-09-15 10:07 ` Christian Bruel
2015-09-15 10:48 ` Christian Bruel
1 sibling, 1 reply; 9+ messages in thread
From: Bernhard Reutner-Fischer @ 2015-09-14 19:50 UTC (permalink / raw)
To: Christian Bruel, kyrylo.tkachov, Ramana.Radhakrishnan; +Cc: gcc-patches
On September 14, 2015 4:30:23 PM GMT+02:00, Christian Bruel <christian.bruel@st.com> wrote:
>Finally, the final part of the patch set does the attribute target
>parsing and checking, redefines the preprocessor macros and implements
>the inlining rules.
>
>testcases and documentation included.
@@ -29501,6 +29532,8 @@
static bool
arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
{
+ int ret=true;
+
if (TREE_CODE (args) == TREE_LIST)
{
bool ret = true;
Doesn't the hunk above trigger a shadow warning? Furthermore there are missing spaces before and after the '='. And finally (no diff -p so I can only guess) why the int if the function returns a bool?
Thanks,
@@ -29518,30 +29551,35 @@
}
char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
- while (argstr && *argstr != '\0')
+ char *q;
+
+ while ((q = strtok (argstr, ",")) != NULL)
{
- while (ISSPACE (*argstr))
- argstr++;
+ while (ISSPACE (*q)) ++q;
- if (!strcmp (argstr, "thumb"))
- {
+ argstr = NULL;
+ if (!strncmp (q, "thumb", 5))
opts->x_target_flags |= MASK_THUMB;
- arm_option_check_internal (opts);
- return true;
- }
- if (!strcmp (argstr, "arm"))
- {
+ else if (!strncmp (q, "arm", 3))
opts->x_target_flags &= ~MASK_THUMB;
- arm_option_check_internal (opts);
- return true;
+
+ else if (!strncmp (q, "fpu=", 4))
+ {
+ if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
+ &opts->x_arm_fpu_index, CL_TARGET))
+ {
+ error ("invalid fpu for attribute(target(\"%s\"))", q);
+ return false;
+ }
}
+ else
+ warning (0, "attribute(target(\"%s\")) is unknown", argstr);
- warning (0, "attribute(target(\"%s\")) is unknown", argstr);
- return false;
+ arm_option_check_internal (opts);
}
- return false;
+ return ret;
}
>
>thanks
>
>Christian
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 4/4] [ARM] Add attribute/pragma target fpu=
2015-09-14 19:50 ` Bernhard Reutner-Fischer
@ 2015-09-15 10:07 ` Christian Bruel
0 siblings, 0 replies; 9+ messages in thread
From: Christian Bruel @ 2015-09-15 10:07 UTC (permalink / raw)
To: Bernhard Reutner-Fischer, kyrylo.tkachov, Ramana.Radhakrishnan
Cc: gcc-patches
On 09/14/2015 09:44 PM, Bernhard Reutner-Fischer wrote:
> On September 14, 2015 4:30:23 PM GMT+02:00, Christian Bruel <christian.bruel@st.com> wrote:
>> Finally, the final part of the patch set does the attribute target
>> parsing and checking, redefines the preprocessor macros and implements
>> the inlining rules.
>>
>> testcases and documentation included.
>
> @@ -29501,6 +29532,8 @@
> static bool
> arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
> {
> + int ret=true;
> +
> if (TREE_CODE (args) == TREE_LIST)
> {
> bool ret = true;
>
>
> Doesn't the hunk above trigger a shadow warning? Furthermore there are missing spaces before and after the '='. And finally (no diff -p so I can only guess) why the int if the function returns a bool?
>
no warning with -Wall, but nevertheless I agree this is a piece of
remnant code that had to be removed.
thanks
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 4/4] [ARM] Add attribute/pragma target fpu=
2015-09-14 14:38 [PATCH 4/4] [ARM] Add attribute/pragma target fpu= Christian Bruel
2015-09-14 19:50 ` Bernhard Reutner-Fischer
@ 2015-09-15 10:48 ` Christian Bruel
2015-09-18 9:13 ` Kyrill Tkachov
1 sibling, 1 reply; 9+ messages in thread
From: Christian Bruel @ 2015-09-15 10:48 UTC (permalink / raw)
To: gcc-patches, kyrylo.tkachov, Ramana.Radhakrishnan; +Cc: gcc-patches
[-- Attachment #1: Type: text/plain, Size: 345 bytes --]
On 09/14/2015 04:30 PM, Christian Bruel wrote:
> Finally, the final part of the patch set does the attribute target
> parsing and checking, redefines the preprocessor macros and implements
> the inlining rules.
>
> testcases and documentation included.
>
new version to remove a shadowed remnant piece of code.
> thanks
>
> Christian
>
[-- Attachment #2: p41.patch --]
[-- Type: text/x-patch, Size: 12098 bytes --]
2015-09-14 Christian Bruel <christian.bruel@st.com>
PR target/65837
* config/arm/arm-c.c (arm_cpu_builtins): Set or reset
__ARM_FEATURE_CRYPTO, __VFP_FP__, __ARM_NEON__
(arm_pragma_target_parse): Change check for arm_cpu_builtins.
undefine __ARM_FP.
* doc/invoke.texi (-mfpu=): Mention attribute and pragma.
* doc/extend.texi (-mfpu=): Describe attribute.
2015-09-14 Christian Bruel <christian.bruel@st.com>
PR target/65837
gcc.target/arm/lto/pr65837_0.c
gcc.target/arm/attr-neon2.c
gcc.target/arm/attr-neon.c
gcc.target/arm/attr-neon-builtin-fail.c
gcc.target/arm/attr-crypto.c
diff -ruN gnu_trunk.p3/gcc/gcc/config/arm/arm.c gnu_trunk.p4/gcc/gcc/config/arm/arm.c
--- gnu_trunk.p3/gcc/gcc/config/arm/arm.c 2015-09-11 16:26:33.869000746 +0200
+++ gnu_trunk.p4/gcc/gcc/config/arm/arm.c 2015-09-15 12:26:12.756161709 +0200
@@ -29486,11 +29486,42 @@
/* Hook to determine if one function can safely inline another. */
static bool
-arm_can_inline_p (tree caller ATTRIBUTE_UNUSED, tree callee ATTRIBUTE_UNUSED)
+arm_can_inline_p (tree caller, tree callee)
{
- /* Overidde default hook: Always OK to inline between different modes.
- Function with mode specific instructions, e.g using asm, must be explicitely
- protected with noinline. */
+ tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
+ tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
+
+ struct cl_target_option *caller_opts
+ = TREE_TARGET_OPTION (caller_tree ? caller_tree
+ : target_option_default_node);
+
+ struct cl_target_option *callee_opts
+ = TREE_TARGET_OPTION (callee_tree ? callee_tree
+ : target_option_default_node);
+
+ const struct arm_fpu_desc *fpu_desc1
+ = &all_fpus[caller_opts->x_arm_fpu_index];
+ const struct arm_fpu_desc *fpu_desc2
+ = &all_fpus[callee_opts->x_arm_fpu_index];
+
+ /* Can't inline NEON extension if the caller doesn't support it. */
+ if (ARM_FPU_FSET_HAS (fpu_desc2->features, FPU_FL_NEON)
+ && ! ARM_FPU_FSET_HAS (fpu_desc1->features, FPU_FL_NEON))
+ return false;
+
+ /* Can't inline CRYPTO extension if the caller doesn't support it. */
+ if (ARM_FPU_FSET_HAS (fpu_desc2->features, FPU_FL_CRYPTO)
+ && ! ARM_FPU_FSET_HAS (fpu_desc1->features, FPU_FL_CRYPTO))
+ return false;
+
+ /* Need same model and regs. */
+ if (fpu_desc2->model != fpu_desc1->model
+ || fpu_desc2->regs != fpu_desc1->regs)
+ return false;
+
+ /* OK to inline between different modes.
+ Function with mode specific instructions, e.g using asm,
+ must be explicitely protected with noinline. */
return true;
}
@@ -29504,6 +29535,7 @@
if (TREE_CODE (args) == TREE_LIST)
{
bool ret = true;
+
for (; args; args = TREE_CHAIN (args))
if (TREE_VALUE (args)
&& !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
@@ -29518,30 +29550,38 @@
}
char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
- while (argstr && *argstr != '\0')
+ char *q;
+
+ while ((q = strtok (argstr, ",")) != NULL)
{
- while (ISSPACE (*argstr))
- argstr++;
+ while (ISSPACE (*q)) ++q;
- if (!strcmp (argstr, "thumb"))
- {
+ argstr = NULL;
+ if (!strncmp (q, "thumb", 5))
opts->x_target_flags |= MASK_THUMB;
- arm_option_check_internal (opts);
- return true;
- }
- if (!strcmp (argstr, "arm"))
- {
+ else if (!strncmp (q, "arm", 3))
opts->x_target_flags &= ~MASK_THUMB;
- arm_option_check_internal (opts);
- return true;
+
+ else if (!strncmp (q, "fpu=", 4))
+ {
+ if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
+ &opts->x_arm_fpu_index, CL_TARGET))
+ {
+ error ("invalid fpu for attribute(target(\"%s\"))", q);
+ return false;
+ }
+ }
+ else
+ {
+ error ("attribute(target(\"%s\")) is unknown", q);
+ return false;
}
- warning (0, "attribute(target(\"%s\")) is unknown", argstr);
- return false;
+ arm_option_check_internal (opts);
}
- return false;
+ return true;
}
/* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
diff -ruN gnu_trunk.p3/gcc/gcc/config/arm/arm-c.c gnu_trunk.p4/gcc/gcc/config/arm/arm-c.c
--- gnu_trunk.p3/gcc/gcc/config/arm/arm-c.c 2015-09-11 16:25:32.180858606 +0200
+++ gnu_trunk.p4/gcc/gcc/config/arm/arm-c.c 2015-09-11 17:00:26.085645968 +0200
@@ -68,8 +68,8 @@
def_or_undef_macro (pfile, "__ARM_FEATURE_DSP", TARGET_DSP_MULTIPLY);
def_or_undef_macro (pfile, "__ARM_FEATURE_QBIT", TARGET_ARM_QBIT);
def_or_undef_macro (pfile, "__ARM_FEATURE_SAT", TARGET_ARM_SAT);
- if (TARGET_CRYPTO)
- builtin_define ("__ARM_FEATURE_CRYPTO");
+ def_or_undef_macro (pfile, "__ARM_FEATURE_CRYPTO", TARGET_CRYPTO);
+
if (unaligned_access)
builtin_define ("__ARM_FEATURE_UNALIGNED");
if (TARGET_CRC32)
@@ -129,8 +129,7 @@
if (TARGET_SOFT_FLOAT)
builtin_define ("__SOFTFP__");
- if (TARGET_VFP)
- builtin_define ("__VFP_FP__");
+ def_or_undef_macro (pfile, "__VFP_FP__", TARGET_VFP);
if (TARGET_ARM_FP)
builtin_define_with_int_value ("__ARM_FP", TARGET_ARM_FP);
@@ -141,11 +140,9 @@
if (TARGET_FMA)
builtin_define ("__ARM_FEATURE_FMA");
- if (TARGET_NEON)
- {
- builtin_define ("__ARM_NEON__");
- builtin_define ("__ARM_NEON");
- }
+ def_or_undef_macro (pfile, "__ARM_NEON__", TARGET_NEON);
+ def_or_undef_macro (pfile, "__ARM_NEON", TARGET_NEON);
+
if (TARGET_NEON_FP)
builtin_define_with_int_value ("__ARM_NEON_FP", TARGET_NEON_FP);
@@ -231,7 +228,7 @@
gcc_assert (prev_opt);
gcc_assert (cur_opt);
- if (cur_opt->x_target_flags != prev_opt->x_target_flags)
+ if (cur_opt != prev_opt)
{
/* For the definitions, ensure all newly defined macros are considered
as used for -Wunused-macros. There is no point warning about the
@@ -242,6 +239,8 @@
/* Update macros. */
gcc_assert (cur_opt->x_target_flags == target_flags);
+ /* This one can be redefined by the pragma without warning. */
+ cpp_undef (parse_in, "__ARM_FP");
arm_cpu_builtins (parse_in);
cpp_opts->warn_unused_macros = saved_warn_unused_macros;
diff -ruN gnu_trunk.p3/gcc/gcc/doc/extend.texi gnu_trunk.p4/gcc/gcc/doc/extend.texi
--- gnu_trunk.p3/gcc/gcc/doc/extend.texi 2015-09-07 13:35:20.777683005 +0200
+++ gnu_trunk.p4/gcc/gcc/doc/extend.texi 2015-09-14 13:58:49.271385001 +0200
@@ -3606,10 +3606,17 @@
@item arm
@cindex @code{target("arm")} function attribute, ARM
Force code generation in the ARM (A32) ISA.
-@end table
Functions from different modes can be inlined in the caller's mode.
+@item fpu=
+@cindex @code{target("fpu=")} function attribute, ARM
+Specifies the fpu for which to tune the performance of this function.
+The behavior and permissible arguments are the same as for the @option{-mfpu=}
+command-line option.
+
+@end table
+
@end table
@node AVR Function Attributes
diff -ruN gnu_trunk.p3/gcc/gcc/doc/invoke.texi gnu_trunk.p4/gcc/gcc/doc/invoke.texi
--- gnu_trunk.p3/gcc/gcc/doc/invoke.texi 2015-09-10 12:21:00.698911244 +0200
+++ gnu_trunk.p4/gcc/gcc/doc/invoke.texi 2015-09-14 10:27:20.281932581 +0200
@@ -13360,6 +13363,8 @@
floating-point arithmetic (in particular denormal values are treated as
zero), so the use of NEON instructions may lead to a loss of precision.
+You can also set the fpu name at function level by using the @code{target("mfpu=")} function attributes (@pxref{ARM Function Attributes}) or pragmas (@pxref{Function Specific Option Pragmas}).
+
@item -mfp16-format=@var{name}
@opindex mfp16-format
Specify the format of the @code{__fp16} half-precision floating-point type.
diff -ruN gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-crypto.c gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-crypto.c
--- gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-crypto.c 1970-01-01 01:00:00.000000000 +0100
+++ gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-crypto.c 2015-09-14 15:58:24.967898634 +0200
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_crypto_ok } */
+
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
+
+#ifndef __ARM_FEATURE_CRYPTO
+#error __ARM_FEATURE_CRYPTO not defined.
+#endif
+
+#ifndef __ARM_NEON
+#error __ARM_NEON not defined.
+#endif
+
+#if !defined(__ARM_FP) || (__ARM_FP != 14)
+#error __ARM_FP
+#endif
+
+#include "arm_neon.h"
+
+int
+foo (void)
+{
+ uint32x4_t a = {0xd, 0xe, 0xa, 0xd};
+ uint32x4_t b = {0, 1, 2, 3};
+
+ uint32x4_t res = vsha256su0q_u32 (a, b);
+ return res[0];
+}
+
+#pragma GCC reset_options
+
+/* Check that the FP version is correctly reset. */
+
+#if !defined(__ARM_FP) || (__ARM_FP != 12)
+#error __ARM_FP
+#endif
+
+/* { dg-final { scan-assembler "sha256su0.32\tq\[0-9\]+, q\[0-9\]+" } } */
diff -ruN gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-neon2.c gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon2.c
--- gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-neon2.c 1970-01-01 01:00:00.000000000 +0100
+++ gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon2.c 2015-09-14 15:58:24.967898634 +0200
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O2 -mfloat-abi=softfp -mfpu=vfp" } */
+
+#pragma GCC target ("fpu=neon")
+#include <arm_neon.h>
+
+/* Check that pragma target is used. */
+int8x8_t
+my (int8x8_t __a, int8x8_t __b)
+{
+ return __a + __b;
+}
+
+#pragma GCC reset_options
+
+/* Check that command line option is restored. */
+int8x8_t
+my1 (int8x8_t __a, int8x8_t __b)
+{
+ return __a + __b;
+}
+
+/* { dg-final { scan-assembler-times "\.fpu vfp" 1 } } */
+/* { dg-final { scan-assembler-times "\.fpu neon" 1 } } */
+/* { dg-final { scan-assembler "vadd" } } */
+
+
diff -ruN gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-neon-builtin-fail.c gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon-builtin-fail.c
--- gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-neon-builtin-fail.c 1970-01-01 01:00:00.000000000 +0100
+++ gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon-builtin-fail.c 2015-09-14 15:58:24.967898634 +0200
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O2 -mfloat-abi=softfp -mfpu=neon" } */
+
+#include <arm_neon.h>
+
+void __attribute__ ((target ("fpu=vfp")))
+foo (uint8x16_t *p)
+{
+ *p = vmovq_n_u8 (3); /* { dg-error "called from here" } */
+
+}
+
+
+/* { dg-error "inlining failed in call to always_inline" "" { target *-*-* } 0 } */
+
+
+
diff -ruN gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-neon.c gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon.c
--- gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-neon.c 1970-01-01 01:00:00.000000000 +0100
+++ gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon.c 2015-09-14 16:12:08.449698268 +0200
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O3 -mfloat-abi=softfp -ftree-vectorize" } */
+
+void
+f3(int n, int x[], int y[]) {
+ int i;
+ for (i = 0; i < n; ++i)
+ y[i] = x[i] << 3;
+}
+
+/* Verify that neon instructions are emitted once. */
+void __attribute__ ((target("fpu=neon")))
+ f1(int n, int x[], int y[]) {
+ int i;
+ for (i = 0; i < n; ++i)
+ y[i] = x[i] << 3;
+}
+
+/* { dg-final { scan-assembler-times "\.fpu vfp" 1 } } */
+/* { dg-final { scan-assembler-times "\.fpu neon" 1 } } */
+/* { dg-final { scan-assembler-times "vshl" 1 } } */
+
+
+
+
diff -ruN gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/lto/pr65837_0.c gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/lto/pr65837_0.c
--- gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/lto/pr65837_0.c 1970-01-01 01:00:00.000000000 +0100
+++ gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/lto/pr65837_0.c 2015-09-14 15:58:13.899874587 +0200
@@ -0,0 +1,14 @@
+/* { dg-lto-do run } */
+/* { dg-lto-options {{-flto -mfpu=neon}} } */
+/* { dg-suppress-ld-options {-mfpu=neon} } */
+
+#include "arm_neon.h"
+
+float32x2_t a, b, c, e;
+
+int main()
+{
+ e = __builtin_neon_vmls_lanev2sf (a, b, c, 0);
+ return 0;
+}
+
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 4/4] [ARM] Add attribute/pragma target fpu=
2015-09-15 10:48 ` Christian Bruel
@ 2015-09-18 9:13 ` Kyrill Tkachov
2015-09-21 13:46 ` Christian Bruel
0 siblings, 1 reply; 9+ messages in thread
From: Kyrill Tkachov @ 2015-09-18 9:13 UTC (permalink / raw)
To: Christian Bruel, gcc-patches, Ramana Radhakrishnan
On 15/09/15 11:47, Christian Bruel wrote:
>
> On 09/14/2015 04:30 PM, Christian Bruel wrote:
>> Finally, the final part of the patch set does the attribute target
>> parsing and checking, redefines the preprocessor macros and implements
>> the inlining rules.
>>
>> testcases and documentation included.
>>
> new version to remove a shadowed remnant piece of code.
>
>
> > thanks
> >
> > Christian
> >
+ /* OK to inline between different modes.
+ Function with mode specific instructions, e.g using asm,
+ must be explicitely protected with noinline. */
s/explicitely/explicitly/
+ const struct arm_fpu_desc *fpu_desc1
+ = &all_fpus[caller_opts->x_arm_fpu_index];
+ const struct arm_fpu_desc *fpu_desc2
+ = &all_fpus[callee_opts->x_arm_fpu_index];
Please call these caller_fpu and callee_fpu, it's much easier to reason about the inlining rules that way
+
+ /* Can't inline NEON extension if the caller doesn't support it. */
+ if (ARM_FPU_FSET_HAS (fpu_desc2->features, FPU_FL_NEON)
+ && ! ARM_FPU_FSET_HAS (fpu_desc1->features, FPU_FL_NEON))
+ return false;
+
+ /* Can't inline CRYPTO extension if the caller doesn't support it. */
+ if (ARM_FPU_FSET_HAS (fpu_desc2->features, FPU_FL_CRYPTO)
+ && ! ARM_FPU_FSET_HAS (fpu_desc1->features, FPU_FL_CRYPTO))
+ return false;
+
We also need to take into account FPU_FL_FP16...
In general what we want is for the callee FPU features to be
a subset of the callers features, similar to the way we handle
the x_aarch64_isa_flags handling in aarch64_can_inline_p from the
aarch64 port. I think that's the way to go here rather than explicitly
writing down a check for each feature.
@@ -242,6 +239,8 @@
/* Update macros. */
gcc_assert (cur_opt->x_target_flags == target_flags);
+ /* This one can be redefined by the pragma without warning. */
+ cpp_undef (parse_in, "__ARM_FP");
arm_cpu_builtins (parse_in);
Could you elaborate why the cpp_undef here?
If you want to undefine __ARM_FP so you can redefine it to a new value
in arm_cpu_builtins then I think you should just undefine it in that function.
diff -ruN gnu_trunk.p3/gcc/gcc/doc/invoke.texi gnu_trunk.p4/gcc/gcc/doc/invoke.texi
--- gnu_trunk.p3/gcc/gcc/doc/invoke.texi 2015-09-10 12:21:00.698911244 +0200
+++ gnu_trunk.p4/gcc/gcc/doc/invoke.texi 2015-09-14 10:27:20.281932581 +0200
@@ -13360,6 +13363,8 @@
floating-point arithmetic (in particular denormal values are treated as
zero), so the use of NEON instructions may lead to a loss of precision.
+You can also set the fpu name at function level by using the @code{target("mfpu=")} function attributes (@pxref{ARM Function Attributes}) or pragmas (@pxref{Function Specific Option Pragmas}).
+
s/"mfpu="/"fpu="
--- gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-neon.c 1970-01-01 01:00:00.000000000 +0100
+++ gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon.c 2015-09-14 16:12:08.449698268 +0200
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O3 -mfloat-abi=softfp -ftree-vectorize" } */
+
+void
+f3(int n, int x[], int y[]) {
+ int i;
+ for (i = 0; i < n; ++i)
+ y[i] = x[i] << 3;
+}
+
What if GCC has been configured with --with-fpu=neon?
Then f3 will be compiled assuming NEON. You should add a -mfpu=vfp to the dg-options.
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 4/4] [ARM] Add attribute/pragma target fpu=
2015-09-18 9:13 ` Kyrill Tkachov
@ 2015-09-21 13:46 ` Christian Bruel
2015-10-08 8:53 ` Kyrill Tkachov
0 siblings, 1 reply; 9+ messages in thread
From: Christian Bruel @ 2015-09-21 13:46 UTC (permalink / raw)
To: kyrylo.tkachov; +Cc: Ramana.Radhakrishnan, gcc-patches
[-- Attachment #1: Type: text/plain, Size: 4473 bytes --]
Hi Kyrill,
Thanks for your comments. Answers interleaved and the new patch attached.
On 09/18/2015 11:04 AM, Kyrill Tkachov wrote:
>
> On 15/09/15 11:47, Christian Bruel wrote:
>>
>> On 09/14/2015 04:30 PM, Christian Bruel wrote:
>>> Finally, the final part of the patch set does the attribute target
>>> parsing and checking, redefines the preprocessor macros and implements
>>> the inlining rules.
>>>
>>> testcases and documentation included.
>>>
>> new version to remove a shadowed remnant piece of code.
>>
>>
>> > thanks
>> >
>> > Christian
>> >
>
> + /* OK to inline between different modes.
> + Function with mode specific instructions, e.g using asm,
> + must be explicitely protected with noinline. */
>
> s/explicitely/explicitly/
>
thanks
>
> + const struct arm_fpu_desc *fpu_desc1
> + = &all_fpus[caller_opts->x_arm_fpu_index];
> + const struct arm_fpu_desc *fpu_desc2
> + = &all_fpus[callee_opts->x_arm_fpu_index];
>
> Please call these caller_fpu and callee_fpu, it's much easier to reason about the inlining rules that way
ok
>
> +
> + /* Can't inline NEON extension if the caller doesn't support it. */
> + if (ARM_FPU_FSET_HAS (fpu_desc2->features, FPU_FL_NEON)
> + && ! ARM_FPU_FSET_HAS (fpu_desc1->features, FPU_FL_NEON))
> + return false;
> +
> + /* Can't inline CRYPTO extension if the caller doesn't support it. */
> + if (ARM_FPU_FSET_HAS (fpu_desc2->features, FPU_FL_CRYPTO)
> + && ! ARM_FPU_FSET_HAS (fpu_desc1->features, FPU_FL_CRYPTO))
> + return false;
> +
>
> We also need to take into account FPU_FL_FP16...
> In general what we want is for the callee FPU features to be
> a subset of the callers features, similar to the way we handle
> the x_aarch64_isa_flags handling in aarch64_can_inline_p from the
> aarch64 port. I think that's the way to go here rather than explicitly
> writing down a check for each feature.
ok, with FL_FP16 now,
>
> @@ -242,6 +239,8 @@
>
> /* Update macros. */
> gcc_assert (cur_opt->x_target_flags == target_flags);
> + /* This one can be redefined by the pragma without warning. */
> + cpp_undef (parse_in, "__ARM_FP");
> arm_cpu_builtins (parse_in);
>
> Could you elaborate why the cpp_undef here?
> If you want to undefine __ARM_FP so you can redefine it to a new value
> in arm_cpu_builtins then I think you should just undefine it in that function.
This is to avoid a warning: "__ARM_FP" redefined when creating a new
pragma scope. (See the test attr-crypto.c).
We cannot call the cpp_undef inside arm_cpu_builtins, because it is also
used for the TARGET_CPU_CPP_BUILTINS hook and then would prevent real
illegitimate redefinitions.
Alternatively, I thought to reset the warn_builtin_macro_redefined flag,
but that doesn't work as the macro is not NODE_BUILTIN (see the
definition of warn_of_redefinition in libcpp).
We might need to change this later : should target macros be marked as
NOTE_BUILTIN ? We can discuss this separately (I can open a defect) as
we have the cpp_undep solution for now, if you agree.
>
>
> diff -ruN gnu_trunk.p3/gcc/gcc/doc/invoke.texi gnu_trunk.p4/gcc/gcc/doc/invoke.texi
> --- gnu_trunk.p3/gcc/gcc/doc/invoke.texi 2015-09-10 12:21:00.698911244 +0200
> +++ gnu_trunk.p4/gcc/gcc/doc/invoke.texi 2015-09-14 10:27:20.281932581 +0200
> @@ -13360,6 +13363,8 @@
> floating-point arithmetic (in particular denormal values are treated as
> zero), so the use of NEON instructions may lead to a loss of precision.
>
> +You can also set the fpu name at function level by using the @code{target("mfpu=")} function attributes (@pxref{ARM Function Attributes}) or pragmas (@pxref{Function Specific Option Pragmas}).
> +
>
> s/"mfpu="/"fpu="
>
thanks
>
> --- gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-neon.c 1970-01-01 01:00:00.000000000 +0100
> +++ gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon.c 2015-09-14 16:12:08.449698268 +0200
> @@ -0,0 +1,26 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target arm_neon_ok } */
> +/* { dg-options "-O3 -mfloat-abi=softfp -ftree-vectorize" } */
> +
> +void
> +f3(int n, int x[], int y[]) {
> + int i;
> + for (i = 0; i < n; ++i)
> + y[i] = x[i] << 3;
> +}
> +
>
> What if GCC has been configured with --with-fpu=neon?
> Then f3 will be compiled assuming NEON. You should add a -mfpu=vfp to the dg-options.
Ah yes. I've added ((target("fpu=vfp")) instead, since we are testing
the attribute.
[-- Attachment #2: p42.patch --]
[-- Type: text/x-patch, Size: 11741 bytes --]
2015-05-26 Christian Bruel <christian.bruel@st.com>
PR target/65837
* config/arm/arm-c.c (arm_cpu_builtins): Set or reset
__ARM_FEATURE_CRYPTO, __VFP_FP__, __ARM_NEON__
(arm_pragma_target_parse): Change check for arm_cpu_builtins.
undefine __ARM_FP.
* config/arm/arm.c (arm_can_inline_p): Check FPUs.
(arm_valid_target_attribute_rec): Handle -mfpu attribute target.
* doc/invoke.texi (-mfpu=): Mention attribute and pragma.
* doc/extend.texi (-mfpu=): Describe attribute.
2015-09-14 Christian Bruel <christian.bruel@st.com>
PR target/65837
gcc.target/arm/lto/pr65837_0.c
gcc.target/arm/attr-neon2.c
gcc.target/arm/attr-neon.c
gcc.target/arm/attr-neon-builtin-fail.c
gcc.target/arm/attr-crypto.c
diff -ruN gnu_trunk.p3/gcc/gcc/config/arm/arm.c gnu_trunk.p4/gcc/gcc/config/arm/arm.c
--- gnu_trunk.p3/gcc/gcc/config/arm/arm.c 2015-09-21 14:07:39.218566954 +0200
+++ gnu_trunk.p4/gcc/gcc/config/arm/arm.c 2015-09-21 13:36:36.242397513 +0200
@@ -29789,11 +29788,36 @@
/* Hook to determine if one function can safely inline another. */
static bool
-arm_can_inline_p (tree caller ATTRIBUTE_UNUSED, tree callee ATTRIBUTE_UNUSED)
+arm_can_inline_p (tree caller, tree callee)
{
- /* Overidde default hook: Always OK to inline between different modes.
- Function with mode specific instructions, e.g using asm, must be explicitely
- protected with noinline. */
+ tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
+ tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
+
+ struct cl_target_option *caller_opts
+ = TREE_TARGET_OPTION (caller_tree ? caller_tree
+ : target_option_default_node);
+
+ struct cl_target_option *callee_opts
+ = TREE_TARGET_OPTION (callee_tree ? callee_tree
+ : target_option_default_node);
+
+ const struct arm_fpu_desc *caller_fpu
+ = &all_fpus[caller_opts->x_arm_fpu_index];
+ const struct arm_fpu_desc *callee_fpu
+ = &all_fpus[callee_opts->x_arm_fpu_index];
+
+ /* Callee's fpu features should be a subset of the caller's. */
+ if ((caller_fpu->features & callee_fpu->features) != callee_fpu->features)
+ return false;
+
+ /* Need same model and regs. */
+ if (callee_fpu->model != caller_fpu->model
+ || callee_fpu->regs != callee_fpu->regs)
+ return false;
+
+ /* OK to inline between different modes.
+ Function with mode specific instructions, e.g using asm,
+ must be explicitly protected with noinline. */
return true;
}
@@ -29821,30 +29846,38 @@
}
char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
- while (argstr && *argstr != '\0')
+ char *q;
+
+ while ((q = strtok (argstr, ",")) != NULL)
{
- while (ISSPACE (*argstr))
- argstr++;
+ while (ISSPACE (*q)) ++q;
- if (!strcmp (argstr, "thumb"))
- {
+ argstr = NULL;
+ if (!strncmp (q, "thumb", 5))
opts->x_target_flags |= MASK_THUMB;
- arm_option_check_internal (opts);
- return true;
- }
- if (!strcmp (argstr, "arm"))
- {
+ else if (!strncmp (q, "arm", 3))
opts->x_target_flags &= ~MASK_THUMB;
- arm_option_check_internal (opts);
- return true;
+
+ else if (!strncmp (q, "fpu=", 4))
+ {
+ if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
+ &opts->x_arm_fpu_index, CL_TARGET))
+ {
+ error ("invalid fpu for attribute(target(\"%s\"))", q);
+ return false;
+ }
+ }
+ else
+ {
+ error ("attribute(target(\"%s\")) is unknown", q);
+ return false;
}
- warning (0, "attribute(target(\"%s\")) is unknown", argstr);
- return false;
+ arm_option_check_internal (opts);
}
- return false;
+ return true;
}
/* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
diff -ruN gnu_trunk.p3/gcc/gcc/config/arm/arm-c.c gnu_trunk.p4/gcc/gcc/config/arm/arm-c.c
--- gnu_trunk.p3/gcc/gcc/config/arm/arm-c.c 2015-09-21 14:07:12.186506227 +0200
+++ gnu_trunk.p4/gcc/gcc/config/arm/arm-c.c 2015-09-21 13:46:02.655664904 +0200
@@ -68,8 +68,8 @@
def_or_undef_macro (pfile, "__ARM_FEATURE_DSP", TARGET_DSP_MULTIPLY);
def_or_undef_macro (pfile, "__ARM_FEATURE_QBIT", TARGET_ARM_QBIT);
def_or_undef_macro (pfile, "__ARM_FEATURE_SAT", TARGET_ARM_SAT);
- if (TARGET_CRYPTO)
- builtin_define ("__ARM_FEATURE_CRYPTO");
+ def_or_undef_macro (pfile, "__ARM_FEATURE_CRYPTO", TARGET_CRYPTO);
+
if (unaligned_access)
builtin_define ("__ARM_FEATURE_UNALIGNED");
if (TARGET_CRC32)
@@ -129,8 +129,7 @@
if (TARGET_SOFT_FLOAT)
builtin_define ("__SOFTFP__");
- if (TARGET_VFP)
- builtin_define ("__VFP_FP__");
+ def_or_undef_macro (pfile, "__VFP_FP__", TARGET_VFP);
if (TARGET_ARM_FP)
builtin_define_with_int_value ("__ARM_FP", TARGET_ARM_FP);
@@ -141,11 +140,9 @@
if (TARGET_FMA)
builtin_define ("__ARM_FEATURE_FMA");
- if (TARGET_NEON)
- {
- builtin_define ("__ARM_NEON__");
- builtin_define ("__ARM_NEON");
- }
+ def_or_undef_macro (pfile, "__ARM_NEON__", TARGET_NEON);
+ def_or_undef_macro (pfile, "__ARM_NEON", TARGET_NEON);
+
if (TARGET_NEON_FP)
builtin_define_with_int_value ("__ARM_NEON_FP", TARGET_NEON_FP);
@@ -232,7 +228,7 @@
gcc_assert (prev_opt);
gcc_assert (cur_opt);
- if (cur_opt->x_target_flags != prev_opt->x_target_flags)
+ if (cur_opt != prev_opt)
{
/* For the definitions, ensure all newly defined macros are considered
as used for -Wunused-macros. There is no point warning about the
@@ -243,6 +239,8 @@
/* Update macros. */
gcc_assert (cur_opt->x_target_flags == target_flags);
+ /* This one can be redefined by the pragma without warning. */
+ cpp_undef (parse_in, "__ARM_FP");
arm_cpu_builtins (parse_in);
cpp_opts->warn_unused_macros = saved_warn_unused_macros;
diff -ruN gnu_trunk.p3/gcc/gcc/doc/extend.texi gnu_trunk.p4/gcc/gcc/doc/extend.texi
--- gnu_trunk.p3/gcc/gcc/doc/extend.texi 2015-09-07 13:35:20.777683005 +0200
+++ gnu_trunk.p4/gcc/gcc/doc/extend.texi 2015-09-14 13:58:49.271385001 +0200
@@ -3606,10 +3606,17 @@
@item arm
@cindex @code{target("arm")} function attribute, ARM
Force code generation in the ARM (A32) ISA.
-@end table
Functions from different modes can be inlined in the caller's mode.
+@item fpu=
+@cindex @code{target("fpu=")} function attribute, ARM
+Specifies the fpu for which to tune the performance of this function.
+The behavior and permissible arguments are the same as for the @option{-mfpu=}
+command-line option.
+
+@end table
+
@end table
@node AVR Function Attributes
diff -ruN gnu_trunk.p3/gcc/gcc/doc/invoke.texi gnu_trunk.p4/gcc/gcc/doc/invoke.texi
--- gnu_trunk.p3/gcc/gcc/doc/invoke.texi 2015-09-21 13:35:49.274292268 +0200
+++ gnu_trunk.p4/gcc/gcc/doc/invoke.texi 2015-09-21 13:36:18.798358427 +0200
@@ -13386,6 +13386,8 @@
floating-point arithmetic (in particular denormal values are treated as
zero), so the use of NEON instructions may lead to a loss of precision.
+You can also set the fpu name at function level by using the @code{target("fpu=")} function attributes (@pxref{ARM Function Attributes}) or pragmas (@pxref{Function Specific Option Pragmas}).
+
@item -mfp16-format=@var{name}
@opindex mfp16-format
Specify the format of the @code{__fp16} half-precision floating-point type.
diff -ruN gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-crypto.c gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-crypto.c
--- gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-crypto.c 1970-01-01 01:00:00.000000000 +0100
+++ gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-crypto.c 2015-09-14 15:58:24.967898634 +0200
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_crypto_ok } */
+
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
+
+#ifndef __ARM_FEATURE_CRYPTO
+#error __ARM_FEATURE_CRYPTO not defined.
+#endif
+
+#ifndef __ARM_NEON
+#error __ARM_NEON not defined.
+#endif
+
+#if !defined(__ARM_FP) || (__ARM_FP != 14)
+#error __ARM_FP
+#endif
+
+#include "arm_neon.h"
+
+int
+foo (void)
+{
+ uint32x4_t a = {0xd, 0xe, 0xa, 0xd};
+ uint32x4_t b = {0, 1, 2, 3};
+
+ uint32x4_t res = vsha256su0q_u32 (a, b);
+ return res[0];
+}
+
+#pragma GCC reset_options
+
+/* Check that the FP version is correctly reset. */
+
+#if !defined(__ARM_FP) || (__ARM_FP != 12)
+#error __ARM_FP
+#endif
+
+/* { dg-final { scan-assembler "sha256su0.32\tq\[0-9\]+, q\[0-9\]+" } } */
diff -ruN gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-neon2.c gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon2.c
--- gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-neon2.c 1970-01-01 01:00:00.000000000 +0100
+++ gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon2.c 2015-09-14 15:58:24.967898634 +0200
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O2 -mfloat-abi=softfp -mfpu=vfp" } */
+
+#pragma GCC target ("fpu=neon")
+#include <arm_neon.h>
+
+/* Check that pragma target is used. */
+int8x8_t
+my (int8x8_t __a, int8x8_t __b)
+{
+ return __a + __b;
+}
+
+#pragma GCC reset_options
+
+/* Check that command line option is restored. */
+int8x8_t
+my1 (int8x8_t __a, int8x8_t __b)
+{
+ return __a + __b;
+}
+
+/* { dg-final { scan-assembler-times "\.fpu vfp" 1 } } */
+/* { dg-final { scan-assembler-times "\.fpu neon" 1 } } */
+/* { dg-final { scan-assembler "vadd" } } */
+
+
diff -ruN gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-neon-builtin-fail.c gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon-builtin-fail.c
--- gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-neon-builtin-fail.c 1970-01-01 01:00:00.000000000 +0100
+++ gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon-builtin-fail.c 2015-09-14 15:58:24.967898634 +0200
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O2 -mfloat-abi=softfp -mfpu=neon" } */
+
+#include <arm_neon.h>
+
+void __attribute__ ((target ("fpu=vfp")))
+foo (uint8x16_t *p)
+{
+ *p = vmovq_n_u8 (3); /* { dg-error "called from here" } */
+
+}
+
+
+/* { dg-error "inlining failed in call to always_inline" "" { target *-*-* } 0 } */
+
+
+
diff -ruN gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-neon.c gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon.c
--- gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-neon.c 1970-01-01 01:00:00.000000000 +0100
+++ gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon.c 2015-09-21 13:43:45.983359388 +0200
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O2 -mfloat-abi=softfp -ftree-vectorize" } */
+
+/* Verify that neon instructions are emitted once. */
+void __attribute__ ((target("fpu=neon")))
+ f1(int n, int x[], int y[]) {
+ int i;
+ for (i = 0; i < n; ++i)
+ y[i] = x[i] << 3;
+}
+
+void __attribute__ ((target("fpu=vfp")))
+f3(int n, int x[], int y[]) {
+ int i;
+ for (i = 0; i < n; ++i)
+ y[i] = x[i] << 3;
+}
+
+/* { dg-final { scan-assembler-times "\.fpu vfp" 1 } } */
+/* { dg-final { scan-assembler-times "\.fpu neon" 1 } } */
+/* { dg-final { scan-assembler-times "vshl" 1 } } */
+
+
+
+
diff -ruN gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/lto/pr65837_0.c gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/lto/pr65837_0.c
--- gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/lto/pr65837_0.c 1970-01-01 01:00:00.000000000 +0100
+++ gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/lto/pr65837_0.c 2015-09-14 15:58:13.899874587 +0200
@@ -0,0 +1,14 @@
+/* { dg-lto-do run } */
+/* { dg-lto-options {{-flto -mfpu=neon}} } */
+/* { dg-suppress-ld-options {-mfpu=neon} } */
+
+#include "arm_neon.h"
+
+float32x2_t a, b, c, e;
+
+int main()
+{
+ e = __builtin_neon_vmls_lanev2sf (a, b, c, 0);
+ return 0;
+}
+
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 4/4] [ARM] Add attribute/pragma target fpu=
2015-09-21 13:46 ` Christian Bruel
@ 2015-10-08 8:53 ` Kyrill Tkachov
2015-11-12 14:54 ` Christian Bruel
0 siblings, 1 reply; 9+ messages in thread
From: Kyrill Tkachov @ 2015-10-08 8:53 UTC (permalink / raw)
To: Christian Bruel; +Cc: Ramana Radhakrishnan, gcc-patches
Hi Christian,
On 21/09/15 14:43, Christian Bruel wrote:
> Hi Kyrill,
>
> Thanks for your comments. Answers interleaved and the new patch attached.
>
> On 09/18/2015 11:04 AM, Kyrill Tkachov wrote:
>> On 15/09/15 11:47, Christian Bruel wrote:
>>> On 09/14/2015 04:30 PM, Christian Bruel wrote:
>>>> Finally, the final part of the patch set does the attribute target
>>>> parsing and checking, redefines the preprocessor macros and implements
>>>> the inlining rules.
>>>>
>>>> testcases and documentation included.
>>>>
>>> new version to remove a shadowed remnant piece of code.
>>>
>>>
>>> > thanks
>>> >
>>> > Christian
>>> >
>> + /* OK to inline between different modes.
>> + Function with mode specific instructions, e.g using asm,
>> + must be explicitely protected with noinline. */
>>
>> s/explicitely/explicitly/
>>
> thanks
>
>> + const struct arm_fpu_desc *fpu_desc1
>> + = &all_fpus[caller_opts->x_arm_fpu_index];
>> + const struct arm_fpu_desc *fpu_desc2
>> + = &all_fpus[callee_opts->x_arm_fpu_index];
>>
>> Please call these caller_fpu and callee_fpu, it's much easier to reason about the inlining rules that way
> ok
>
>> +
>> + /* Can't inline NEON extension if the caller doesn't support it. */
>> + if (ARM_FPU_FSET_HAS (fpu_desc2->features, FPU_FL_NEON)
>> + && ! ARM_FPU_FSET_HAS (fpu_desc1->features, FPU_FL_NEON))
>> + return false;
>> +
>> + /* Can't inline CRYPTO extension if the caller doesn't support it. */
>> + if (ARM_FPU_FSET_HAS (fpu_desc2->features, FPU_FL_CRYPTO)
>> + && ! ARM_FPU_FSET_HAS (fpu_desc1->features, FPU_FL_CRYPTO))
>> + return false;
>> +
>>
>> We also need to take into account FPU_FL_FP16...
>> In general what we want is for the callee FPU features to be
>> a subset of the callers features, similar to the way we handle
>> the x_aarch64_isa_flags handling in aarch64_can_inline_p from the
>> aarch64 port. I think that's the way to go here rather than explicitly
>> writing down a check for each feature.
> ok, with FL_FP16 now,
>
>> @@ -242,6 +239,8 @@
>>
>> /* Update macros. */
>> gcc_assert (cur_opt->x_target_flags == target_flags);
>> + /* This one can be redefined by the pragma without warning. */
>> + cpp_undef (parse_in, "__ARM_FP");
>> arm_cpu_builtins (parse_in);
>>
>> Could you elaborate why the cpp_undef here?
>> If you want to undefine __ARM_FP so you can redefine it to a new value
>> in arm_cpu_builtins then I think you should just undefine it in that function.
> This is to avoid a warning: "__ARM_FP" redefined when creating a new
> pragma scope. (See the test attr-crypto.c).
>
> We cannot call the cpp_undef inside arm_cpu_builtins, because it is also
> used for the TARGET_CPU_CPP_BUILTINS hook and then would prevent real
> illegitimate redefinitions.
>
> Alternatively, I thought to reset the warn_builtin_macro_redefined flag,
> but that doesn't work as the macro is not NODE_BUILTIN (see the
> definition of warn_of_redefinition in libcpp).
> We might need to change this later : should target macros be marked as
> NOTE_BUILTIN ? We can discuss this separately (I can open a defect) as
> we have the cpp_undep solution for now, if you agree.
>
>>
>> diff -ruN gnu_trunk.p3/gcc/gcc/doc/invoke.texi gnu_trunk.p4/gcc/gcc/doc/invoke.texi
>> --- gnu_trunk.p3/gcc/gcc/doc/invoke.texi 2015-09-10 12:21:00.698911244 +0200
>> +++ gnu_trunk.p4/gcc/gcc/doc/invoke.texi 2015-09-14 10:27:20.281932581 +0200
>> @@ -13360,6 +13363,8 @@
>> floating-point arithmetic (in particular denormal values are treated as
>> zero), so the use of NEON instructions may lead to a loss of precision.
>>
>> +You can also set the fpu name at function level by using the @code{target("mfpu=")} function attributes (@pxref{ARM Function Attributes}) or pragmas (@pxref{Function Specific Option Pragmas}).
>> +
>>
>> s/"mfpu="/"fpu="
>>
> thanks
>
>> --- gnu_trunk.p3/gcc/gcc/testsuite/gcc.target/arm/attr-neon.c 1970-01-01 01:00:00.000000000 +0100
>> +++ gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon.c 2015-09-14 16:12:08.449698268 +0200
>> @@ -0,0 +1,26 @@
>> +/* { dg-do compile } */
>> +/* { dg-require-effective-target arm_neon_ok } */
>> +/* { dg-options "-O3 -mfloat-abi=softfp -ftree-vectorize" } */
>> +
>> +void
>> +f3(int n, int x[], int y[]) {
>> + int i;
>> + for (i = 0; i < n; ++i)
>> + y[i] = x[i] << 3;
>> +}
>> +
>>
>> What if GCC has been configured with --with-fpu=neon?
>> Then f3 will be compiled assuming NEON. You should add a -mfpu=vfp to the dg-options.
> Ah yes. I've added ((target("fpu=vfp")) instead, since we are testing
> the attribute.
>
2015-05-26 Christian Bruel<christian.bruel@st.com>
PR target/65837
* config/arm/arm-c.c (arm_cpu_builtins): Set or reset
__ARM_FEATURE_CRYPTO, __VFP_FP__, __ARM_NEON__
(arm_pragma_target_parse): Change check for arm_cpu_builtins.
undefine __ARM_FP.
* config/arm/arm.c (arm_can_inline_p): Check FPUs.
(arm_valid_target_attribute_rec): Handle -mfpu attribute target.
* doc/invoke.texi (-mfpu=): Mention attribute and pragma.
* doc/extend.texi (-mfpu=): Describe attribute.
2015-09-14 Christian Bruel<christian.bruel@st.com>
PR target/65837
gcc.target/arm/lto/pr65837_0.c
gcc.target/arm/attr-neon2.c
gcc.target/arm/attr-neon.c
gcc.target/arm/attr-neon-builtin-fail.c
gcc.target/arm/attr-crypto.c
The parts in this patch look ok to me.
However, I think we need some more functionality
In aarch64 we support compiling a file with no simd, including arm_neon.h and using arm_neon.h intrinsics
within functions tagged with simd support.
We want to support such functionality on arm i.e. compile a file with -mfpu=vfp and use arm_neon.h intrinsics
in a function tagged with an fpu=neon attribute.
For that we'd need to wrap the intrinsics in arm_neon.h in appropriate pragmas, like in the aarch64 version of arm_neon.h
Thanks,
Kyrill
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 4/4] [ARM] Add attribute/pragma target fpu=
2015-10-08 8:53 ` Kyrill Tkachov
@ 2015-11-12 14:54 ` Christian Bruel
2015-11-13 11:49 ` Kyrill Tkachov
0 siblings, 1 reply; 9+ messages in thread
From: Christian Bruel @ 2015-11-12 14:54 UTC (permalink / raw)
To: Kyrill Tkachov; +Cc: Ramana Radhakrishnan, gcc-patches
[-- Attachment #1: Type: text/plain, Size: 2023 bytes --]
Hi Kyril,
> ...
> The parts in this patch look ok to me.
> However, I think we need some more functionality
> In aarch64 we support compiling a file with no simd, including arm_neon.h and using arm_neon.h intrinsics
> within functions tagged with simd support.
> We want to support such functionality on arm i.e. compile a file with -mfpu=vfp and use arm_neon.h intrinsics
> in a function tagged with an fpu=neon attribute.
> For that we'd need to wrap the intrinsics in arm_neon.h in appropriate pragmas, like in the aarch64 version of arm_neon.h
As discussed, here is arm_neon.h for aarch32/neon with the same
programming model than aarch64/simd. As you said lets use one of the
fpu=neon attributes even if the file is compiled with -mfpu=vfp.
The drawback for this is that now we unconditionally makes available
every neon intrinsics, introducing a small legacy change with regards to
error checking (that you didn't have with aarch64). Then it's worth to
stress that:
- One cannot check #include "arm_neon.h" to check if the compiler can
use neon instruction. Instead use #ifndef __ARM_NEON__. (Found in
target-supports.exp)
- Types cannot be checked. For instance:
#include <arm_neon.h>
poly128_t
foo (poly128_t* ptr)
{
return vldrq_p128 (ptr);
}
compiled with -mfpu=neon used to be rejected with
error: unknown type name 'poly128_t' ...
Now the error, as a side effect from the inlining rules between
incompatible modes, becomes
error: inlining failed in call to always_inline 'vldrq_p128': target
specific option mismatch ...
I found this more confusing, so I was a little bit reluctant to
implement this, but the code is correctly rejected and the message makes
sense, after all. Just a different check.
This patch applies on top of the preceding attribute/pragma target fpu=
series. Tested with arm-none-eabi configured with default and
--with-cpu=cortex-a9 --with-fp --with-float=hard
Also fixes a few macro that depends on fpu=, that I forgot to redefine.
Christian
[-- Attachment #2: arm_neon.patch --]
[-- Type: text/x-patch, Size: 55802 bytes --]
2015-11-12 Christian Bruel <christian.bruel@st.com>
* config/arm/arm_neon.h: Remove #ifndef check on __ARM_NEON.
Replace #ifdef __ARM_FEATURE_CRYPTO, __ARM_FEATURE_FMA, __ARM_FP
with appropriate pragma GCC target.
* config/arm/arm-c.c (arm_cpu_builtins): Conditionally set and reset
__ARM_FEATURE_FMA and __ARM_NEON_FP, __ARM_FP.
2015-11-12 Christian Bruel <christian.bruel@st.com>
* lib/target-supports.exp (check_effective_target_arm_neon_ok_nocache):
Check __ARM_NEON__ instead of "arm_neon.h.
* gcc.target/arm/attr-neon3.c: New test.
* gcc.target/arm/attr-neon-fp16.c: Likewise
diff -ruN '--exclude=#*#' '--exclude=.svn' '--exclude=*~' -ruN gnu_trunk.p4/gcc/gcc/config/arm/arm-c.c gnu_trunk.p5/gcc/gcc/config/arm/arm-c.c
--- gnu_trunk.p4/gcc/gcc/config/arm/arm-c.c 2015-11-10 14:52:22.968943246 +0100
+++ gnu_trunk.p5/gcc/gcc/config/arm/arm-c.c 2015-11-09 14:33:08.395442761 +0100
@@ -129,18 +129,22 @@
if (TARGET_ARM_FP)
builtin_define_with_int_value ("__ARM_FP", TARGET_ARM_FP);
+ else
+ cpp_undef (pfile, "__ARM_FP");
+
if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
builtin_define ("__ARM_FP16_FORMAT_IEEE");
if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
builtin_define ("__ARM_FP16_FORMAT_ALTERNATIVE");
- if (TARGET_FMA)
- builtin_define ("__ARM_FEATURE_FMA");
+ def_or_undef_macro (pfile, "__ARM_FEATURE_FMA", TARGET_FMA);
def_or_undef_macro (pfile, "__ARM_NEON__", TARGET_NEON);
def_or_undef_macro (pfile, "__ARM_NEON", TARGET_NEON);
if (TARGET_NEON_FP)
builtin_define_with_int_value ("__ARM_NEON_FP", TARGET_NEON_FP);
+ else
+ cpp_undef (pfile, "__ARM_NEON_FP");
/* Add a define for interworking. Needed when building libgcc.a. */
if (arm_cpp_interwork)
diff -ruN '--exclude=#*#' '--exclude=.svn' '--exclude=*~' -ruN gnu_trunk.p4/gcc/gcc/config/arm/arm_neon.h gnu_trunk.p5/gcc/gcc/config/arm/arm_neon.h
--- gnu_trunk.p4/gcc/gcc/config/arm/arm_neon.h 2015-09-10 14:57:15.363897373 +0200
+++ gnu_trunk.p5/gcc/gcc/config/arm/arm_neon.h 2015-11-12 14:22:23.071626491 +0100
@@ -27,9 +27,8 @@
#ifndef _GCC_ARM_NEON_H
#define _GCC_ARM_NEON_H 1
-#ifndef __ARM_NEON__
-#error You must enable NEON instructions (e.g. -mfloat-abi=softfp -mfpu=neon) to use arm_neon.h
-#else
+#pragma GCC push_options
+#pragma GCC target ("fpu=neon")
#ifdef __cplusplus
extern "C" {
@@ -48,9 +47,10 @@
typedef __simd64_float32_t float32x2_t;
typedef __simd64_poly8_t poly8x8_t;
typedef __simd64_poly16_t poly16x4_t;
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
typedef __builtin_neon_poly64 poly64x1_t;
-#endif
+#pragma GCC pop_options
typedef __simd64_uint8_t uint8x8_t;
typedef __simd64_uint16_t uint16x4_t;
typedef __simd64_uint32_t uint32x2_t;
@@ -66,9 +66,10 @@
typedef __simd128_float32_t float32x4_t;
typedef __simd128_poly8_t poly8x16_t;
typedef __simd128_poly16_t poly16x8_t;
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
typedef __builtin_neon_poly64 poly64x2_t __attribute__ ((__vector_size__ (16)));
-#endif
+#pragma GCC pop_options
typedef __simd128_uint8_t uint8x16_t;
typedef __simd128_uint16_t uint16x8_t;
@@ -81,10 +82,11 @@
keep them that way. */
typedef __builtin_neon_poly8 poly8_t;
typedef __builtin_neon_poly16 poly16_t;
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
typedef __builtin_neon_poly64 poly64_t;
typedef __builtin_neon_poly128 poly128_t;
-#endif
+#pragma GCC pop_options
typedef struct int8x8x2_t
{
@@ -210,20 +212,19 @@
poly16x8_t val[2];
} poly16x8x2_t;
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
typedef struct poly64x1x2_t
{
poly64x1_t val[2];
} poly64x1x2_t;
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
typedef struct poly64x2x2_t
{
poly64x2_t val[2];
} poly64x2x2_t;
-#endif
+#pragma GCC pop_options
typedef struct int8x8x3_t
@@ -350,20 +351,19 @@
poly16x8_t val[3];
} poly16x8x3_t;
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
typedef struct poly64x1x3_t
{
poly64x1_t val[3];
} poly64x1x3_t;
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
typedef struct poly64x2x3_t
{
poly64x2_t val[3];
} poly64x2x3_t;
-#endif
+#pragma GCC pop_options
typedef struct int8x8x4_t
@@ -490,20 +490,19 @@
poly16x8_t val[4];
} poly16x8x4_t;
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
typedef struct poly64x1x4_t
{
poly64x1_t val[4];
} poly64x1x4_t;
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
typedef struct poly64x2x4_t
{
poly64x2_t val[4];
} poly64x2x4_t;
-#endif
+#pragma GCC pop_options
/* vadd */
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
@@ -1477,38 +1476,33 @@
return (int64x2_t)__builtin_neon_vqdmlslv2si (__a, __b, __c);
}
-#ifdef __ARM_FEATURE_FMA
+#pragma GCC push_options
+#pragma GCC target ("fpu=neon-vfpv4")
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vfma_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c)
{
return (float32x2_t)__builtin_neon_vfmav2sf (__a, __b, __c);
}
-#endif
-#ifdef __ARM_FEATURE_FMA
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vfmaq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
{
return (float32x4_t)__builtin_neon_vfmav4sf (__a, __b, __c);
}
-#endif
-#ifdef __ARM_FEATURE_FMA
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vfms_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c)
{
return (float32x2_t)__builtin_neon_vfmsv2sf (__a, __b, __c);
}
-#endif
-#ifdef __ARM_FEATURE_FMA
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vfmsq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
{
return (float32x4_t)__builtin_neon_vfmsv4sf (__a, __b, __c);
}
+#pragma GCC pop_options
-#endif
#if __ARM_ARCH >= 8
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vrndn_f32 (float32x2_t __a)
@@ -4515,14 +4509,15 @@
return (uint64x2_t)__builtin_neon_vrsrau_nv2di ((int64x2_t) __a, (int64x2_t) __b, __c);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
vsri_n_p64 (poly64x1_t __a, poly64x1_t __b, const int __c)
{
return (poly64x1_t)__builtin_neon_vsri_ndi (__a, __b, __c);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vsri_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
{
@@ -4583,14 +4578,15 @@
return (poly16x4_t)__builtin_neon_vsri_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
vsriq_n_p64 (poly64x2_t __a, poly64x2_t __b, const int __c)
{
return (poly64x2_t)__builtin_neon_vsri_nv2di ((int64x2_t) __a, (int64x2_t) __b, __c);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
{
@@ -4651,14 +4647,15 @@
return (poly16x8_t)__builtin_neon_vsri_nv8hi ((int16x8_t) __a, (int16x8_t) __b, __c);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
vsli_n_p64 (poly64x1_t __a, poly64x1_t __b, const int __c)
{
return (poly64x1_t)__builtin_neon_vsli_ndi (__a, __b, __c);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vsli_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
{
@@ -4719,14 +4716,15 @@
return (poly16x4_t)__builtin_neon_vsli_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
vsliq_n_p64 (poly64x2_t __a, poly64x2_t __b, const int __c)
{
return (poly64x2_t)__builtin_neon_vsli_nv2di ((int64x2_t) __a, (int64x2_t) __b, __c);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vsliq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
{
@@ -5545,14 +5543,15 @@
return (uint64x2_t)__builtin_neon_vset_lanev2di ((__builtin_neon_di) __a, (int64x2_t) __b, __c);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
vcreate_p64 (uint64_t __a)
{
return (poly64x1_t)__builtin_neon_vcreatedi ((__builtin_neon_di) __a);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vcreate_s8 (uint64_t __a)
{
@@ -5681,14 +5680,15 @@
return (poly16x4_t)__builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __a);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
vdup_n_p64 (poly64_t __a)
{
return (poly64x1_t)__builtin_neon_vdup_ndi ((__builtin_neon_di) __a);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vdup_n_s64 (int64_t __a)
{
@@ -5701,14 +5701,15 @@
return (uint64x1_t)__builtin_neon_vdup_ndi ((__builtin_neon_di) __a);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
vdupq_n_p64 (poly64_t __a)
{
return (poly64x2_t)__builtin_neon_vdup_nv2di ((__builtin_neon_di) __a);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vdupq_n_s8 (int8_t __a)
{
@@ -5961,14 +5962,15 @@
return (poly16x4_t)__builtin_neon_vdup_lanev4hi ((int16x4_t) __a, __b);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
vdup_lane_p64 (poly64x1_t __a, const int __b)
{
return (poly64x1_t)__builtin_neon_vdup_lanedi (__a, __b);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vdup_lane_s64 (int64x1_t __a, const int __b)
{
@@ -6035,14 +6037,15 @@
return (poly16x8_t)__builtin_neon_vdup_lanev8hi ((int16x4_t) __a, __b);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
vdupq_lane_p64 (poly64x1_t __a, const int __b)
{
return (poly64x2_t)__builtin_neon_vdup_lanev2di (__a, __b);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vdupq_lane_s64 (int64x1_t __a, const int __b)
{
@@ -6055,14 +6058,15 @@
return (uint64x2_t)__builtin_neon_vdup_lanev2di ((int64x1_t) __a, __b);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
vcombine_p64 (poly64x1_t __a, poly64x1_t __b)
{
return (poly64x2_t)__builtin_neon_vcombinedi (__a, __b);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vcombine_s8 (int8x8_t __a, int8x8_t __b)
{
@@ -6137,14 +6141,15 @@
return (poly16x8_t)__builtin_neon_vcombinev4hi ((int16x4_t) __a, (int16x4_t) __b);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
vget_high_p64 (poly64x2_t __a)
{
return (poly64x1_t)__builtin_neon_vget_highv2di ((int64x2_t) __a);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vget_high_s8 (int8x16_t __a)
{
@@ -6281,14 +6286,15 @@
return (poly16x4_t)__builtin_neon_vget_lowv8hi ((int16x8_t) __a);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
vget_low_p64 (poly64x2_t __a)
{
return (poly64x1_t)__builtin_neon_vget_lowv2di ((int64x2_t) __a);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vget_low_s64 (int64x2_t __a)
{
@@ -6349,7 +6355,8 @@
return (uint32x4_t)__builtin_neon_vcvtuv4sf (__a);
}
-#if ((__ARM_FP & 0x2) != 0)
+#pragma GCC push_options
+#pragma GCC target ("fpu=neon-fp16")
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vcvt_f16_f32 (float32x4_t __a)
@@ -6357,9 +6364,7 @@
return (float16x4_t)__builtin_neon_vcvtv4hfv4sf (__a);
}
#endif
-#endif
-#if ((__ARM_FP & 0x2) != 0)
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vcvt_f32_f16 (float16x4_t __a)
@@ -6367,7 +6372,7 @@
return (float32x4_t)__builtin_neon_vcvtv4sfv4hf (__a);
}
#endif
-#endif
+#pragma GCC pop_options
__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vcvt_n_s32_f32 (float32x2_t __a, const int __b)
@@ -7377,14 +7382,15 @@
return (int64x2_t)__builtin_neon_vqdmlsl_nv2si (__a, __b, (__builtin_neon_si) __c);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
vext_p64 (poly64x1_t __a, poly64x1_t __b, const int __c)
{
return (poly64x1_t)__builtin_neon_vextdi (__a, __b, __c);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vext_s8 (int8x8_t __a, int8x8_t __b, const int __c)
{
@@ -7451,14 +7457,15 @@
return (poly16x4_t)__builtin_neon_vextv4hi ((int16x4_t) __a, (int16x4_t) __b, __c);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
vextq_p64 (poly64x2_t __a, poly64x2_t __b, const int __c)
{
return (poly64x2_t)__builtin_neon_vextv2di ((int64x2_t) __a, (int64x2_t) __b, __c);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vextq_s8 (int8x16_t __a, int8x16_t __b, const int __c)
{
@@ -7741,14 +7748,15 @@
return (poly8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
vbsl_p64 (uint64x1_t __a, poly64x1_t __b, poly64x1_t __c)
{
return (poly64x1_t)__builtin_neon_vbsldi ((int64x1_t) __a, __b, __c);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vbsl_s8 (uint8x8_t __a, int8x8_t __b, int8x8_t __c)
{
@@ -7815,14 +7823,15 @@
return (poly16x4_t)__builtin_neon_vbslv4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
vbslq_p64 (uint64x2_t __a, poly64x2_t __b, poly64x2_t __c)
{
return (poly64x2_t)__builtin_neon_vbslv2di ((int64x2_t) __a, (int64x2_t) __b, (int64x2_t) __c);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vbslq_s8 (uint8x16_t __a, int8x16_t __b, int8x16_t __c)
{
@@ -8764,14 +8773,15 @@
return __rv;
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
vld1_p64 (const poly64_t * __a)
{
return (poly64x1_t)__builtin_neon_vld1di ((const __builtin_neon_di *) __a);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vld1_s8 (const int8_t * __a)
{
@@ -8846,14 +8856,15 @@
return (poly16x4_t)__builtin_neon_vld1v4hi ((const __builtin_neon_hi *) __a);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
vld1q_p64 (const poly64_t * __a)
{
return (poly64x2_t)__builtin_neon_vld1v2di ((const __builtin_neon_di *) __a);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vld1q_s8 (const int8_t * __a)
{
@@ -8990,14 +9001,15 @@
return (poly16x4_t)__builtin_neon_vld1_lanev4hi ((const __builtin_neon_hi *) __a, (int16x4_t) __b, __c);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
vld1_lane_p64 (const poly64_t * __a, poly64x1_t __b, const int __c)
{
return (poly64x1_t)__builtin_neon_vld1_lanedi ((const __builtin_neon_di *) __a, __b, __c);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vld1_lane_s64 (const int64_t * __a, int64x1_t __b, const int __c)
{
@@ -9072,14 +9084,15 @@
return (poly16x8_t)__builtin_neon_vld1_lanev8hi ((const __builtin_neon_hi *) __a, (int16x8_t) __b, __c);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
vld1q_lane_p64 (const poly64_t * __a, poly64x2_t __b, const int __c)
{
return (poly64x2_t)__builtin_neon_vld1_lanev2di ((const __builtin_neon_di *) __a, (int64x2_t) __b, __c);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vld1q_lane_s64 (const int64_t * __a, int64x2_t __b, const int __c)
{
@@ -9155,14 +9168,15 @@
return (poly16x4_t)__builtin_neon_vld1_dupv4hi ((const __builtin_neon_hi *) __a);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
vld1_dup_p64 (const poly64_t * __a)
{
return (poly64x1_t)__builtin_neon_vld1_dupdi ((const __builtin_neon_di *) __a);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vld1_dup_s64 (const int64_t * __a)
{
@@ -9238,14 +9252,15 @@
return (poly16x8_t)__builtin_neon_vld1_dupv8hi ((const __builtin_neon_hi *) __a);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
vld1q_dup_p64 (const poly64_t * __a)
{
return (poly64x2_t)__builtin_neon_vld1_dupv2di ((const __builtin_neon_di *) __a);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vld1q_dup_s64 (const int64_t * __a)
{
@@ -9258,14 +9273,15 @@
return (uint64x2_t)__builtin_neon_vld1_dupv2di ((const __builtin_neon_di *) __a);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline void __attribute__ ((__always_inline__))
vst1_p64 (poly64_t * __a, poly64x1_t __b)
{
__builtin_neon_vst1di ((__builtin_neon_di *) __a, __b);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline void __attribute__ ((__always_inline__))
vst1_s8 (int8_t * __a, int8x8_t __b)
{
@@ -9340,14 +9356,15 @@
__builtin_neon_vst1v4hi ((__builtin_neon_hi *) __a, (int16x4_t) __b);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_p64 (poly64_t * __a, poly64x2_t __b)
{
__builtin_neon_vst1v2di ((__builtin_neon_di *) __a, (int64x2_t) __b);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_s8 (int8_t * __a, int8x16_t __b)
{
@@ -9484,14 +9501,15 @@
__builtin_neon_vst1_lanev4hi ((__builtin_neon_hi *) __a, (int16x4_t) __b, __c);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline void __attribute__ ((__always_inline__))
vst1_lane_p64 (poly64_t * __a, poly64x1_t __b, const int __c)
{
__builtin_neon_vst1_lanedi ((__builtin_neon_di *) __a, __b, __c);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline void __attribute__ ((__always_inline__))
vst1_lane_s64 (int64_t * __a, int64x1_t __b, const int __c)
{
@@ -9566,14 +9584,15 @@
__builtin_neon_vst1_lanev8hi ((__builtin_neon_hi *) __a, (int16x8_t) __b, __c);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_lane_p64 (poly64_t * __a, poly64x2_t __b, const int __c)
{
__builtin_neon_vst1_lanev2di ((__builtin_neon_di *) __a, (int64x2_t) __b, __c);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_lane_s64 (int64_t * __a, int64x2_t __b, const int __c)
{
@@ -9668,7 +9687,8 @@
return __rv.__i;
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline poly64x1x2_t __attribute__ ((__always_inline__))
vld2_p64 (const poly64_t * __a)
{
@@ -9677,7 +9697,7 @@
return __rv.__i;
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__))
vld2_s64 (const int64_t * __a)
{
@@ -10015,7 +10035,8 @@
return __rv.__i;
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline poly64x1x2_t __attribute__ ((__always_inline__))
vld2_dup_p64 (const poly64_t * __a)
{
@@ -10024,7 +10045,7 @@
return __rv.__i;
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__))
vld2_dup_s64 (const int64_t * __a)
{
@@ -10113,7 +10134,8 @@
__builtin_neon_vst2v4hi ((__builtin_neon_hi *) __a, __bu.__o);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline void __attribute__ ((__always_inline__))
vst2_p64 (poly64_t * __a, poly64x1x2_t __b)
{
@@ -10121,7 +10143,7 @@
__builtin_neon_vst2di ((__builtin_neon_di *) __a, __bu.__o);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline void __attribute__ ((__always_inline__))
vst2_s64 (int64_t * __a, int64x1x2_t __b)
{
@@ -10413,7 +10435,8 @@
return __rv.__i;
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline poly64x1x3_t __attribute__ ((__always_inline__))
vld3_p64 (const poly64_t * __a)
{
@@ -10422,7 +10445,7 @@
return __rv.__i;
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__))
vld3_s64 (const int64_t * __a)
{
@@ -10760,7 +10783,8 @@
return __rv.__i;
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline poly64x1x3_t __attribute__ ((__always_inline__))
vld3_dup_p64 (const poly64_t * __a)
{
@@ -10769,7 +10793,7 @@
return __rv.__i;
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__))
vld3_dup_s64 (const int64_t * __a)
{
@@ -10858,7 +10882,8 @@
__builtin_neon_vst3v4hi ((__builtin_neon_hi *) __a, __bu.__o);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline void __attribute__ ((__always_inline__))
vst3_p64 (poly64_t * __a, poly64x1x3_t __b)
{
@@ -10866,7 +10891,7 @@
__builtin_neon_vst3di ((__builtin_neon_di *) __a, __bu.__o);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline void __attribute__ ((__always_inline__))
vst3_s64 (int64_t * __a, int64x1x3_t __b)
{
@@ -11158,7 +11183,8 @@
return __rv.__i;
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline poly64x1x4_t __attribute__ ((__always_inline__))
vld4_p64 (const poly64_t * __a)
{
@@ -11167,7 +11193,7 @@
return __rv.__i;
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__))
vld4_s64 (const int64_t * __a)
{
@@ -11507,7 +11533,8 @@
return __rv.__i;
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline poly64x1x4_t __attribute__ ((__always_inline__))
vld4_dup_p64 (const poly64_t * __a)
{
@@ -11516,7 +11543,7 @@
return __rv.__i;
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__))
vld4_dup_s64 (const int64_t * __a)
{
@@ -11605,7 +11632,8 @@
__builtin_neon_vst4v4hi ((__builtin_neon_hi *) __a, __bu.__o);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline void __attribute__ ((__always_inline__))
vst4_p64 (poly64_t * __a, poly64x1x4_t __b)
{
@@ -11613,7 +11641,7 @@
__builtin_neon_vst4di ((__builtin_neon_di *) __a, __bu.__o);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline void __attribute__ ((__always_inline__))
vst4_s64 (int64_t * __a, int64x1x4_t __b)
{
@@ -12323,14 +12351,15 @@
return (poly8x8_t)__builtin_neon_vreinterpretv8qiv2sf (__a);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vreinterpret_p8_p64 (poly64x1_t __a)
{
return (poly8x8_t)__builtin_neon_vreinterpretv8qidi (__a);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vreinterpret_p8_s64 (int64x1_t __a)
{
@@ -12399,14 +12428,15 @@
return (poly16x4_t)__builtin_neon_vreinterpretv4hiv2sf (__a);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vreinterpret_p16_p64 (poly64x1_t __a)
{
return (poly16x4_t)__builtin_neon_vreinterpretv4hidi (__a);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vreinterpret_p16_s64 (int64x1_t __a)
{
@@ -12479,14 +12509,15 @@
}
#endif
-#ifdef __ARM_FEATURE_CRYPTO
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vreinterpret_f16_p64 (poly64x1_t __a)
{
return (float16x4_t) __a;
}
-#endif
+#pragma GCC pop_options
#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
@@ -12573,14 +12604,15 @@
}
#endif
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vreinterpret_f32_p64 (poly64x1_t __a)
{
return (float32x2_t)__builtin_neon_vreinterpretv2sfdi (__a);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vreinterpret_f32_s64 (int64x1_t __a)
{
@@ -12629,105 +12661,83 @@
return (float32x2_t)__builtin_neon_vreinterpretv2sfv2si ((int32x2_t) __a);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
vreinterpret_p64_p8 (poly8x8_t __a)
{
return (poly64x1_t)__builtin_neon_vreinterpretdiv8qi ((int8x8_t) __a);
}
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
vreinterpret_p64_p16 (poly16x4_t __a)
{
return (poly64x1_t)__builtin_neon_vreinterpretdiv4hi ((int16x4_t) __a);
}
-#endif
-
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
-#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
vreinterpret_p64_f16 (float16x4_t __a)
{
return (poly64x1_t) __a;
}
#endif
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
vreinterpret_p64_f32 (float32x2_t __a)
{
return (poly64x1_t)__builtin_neon_vreinterpretdiv2sf (__a);
}
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
vreinterpret_p64_s64 (int64x1_t __a)
{
return (poly64x1_t)__builtin_neon_vreinterpretdidi (__a);
}
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
vreinterpret_p64_u64 (uint64x1_t __a)
{
return (poly64x1_t)__builtin_neon_vreinterpretdidi ((int64x1_t) __a);
}
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
vreinterpret_p64_s8 (int8x8_t __a)
{
return (poly64x1_t)__builtin_neon_vreinterpretdiv8qi (__a);
}
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
vreinterpret_p64_s16 (int16x4_t __a)
{
return (poly64x1_t)__builtin_neon_vreinterpretdiv4hi (__a);
}
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
vreinterpret_p64_s32 (int32x2_t __a)
{
return (poly64x1_t)__builtin_neon_vreinterpretdiv2si (__a);
}
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
vreinterpret_p64_u8 (uint8x8_t __a)
{
return (poly64x1_t)__builtin_neon_vreinterpretdiv8qi ((int8x8_t) __a);
}
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
vreinterpret_p64_u16 (uint16x4_t __a)
{
return (poly64x1_t)__builtin_neon_vreinterpretdiv4hi ((int16x4_t) __a);
}
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
vreinterpret_p64_u32 (uint32x2_t __a)
{
return (poly64x1_t)__builtin_neon_vreinterpretdiv2si ((int32x2_t) __a);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vreinterpret_s64_p8 (poly8x8_t __a)
{
@@ -12754,14 +12764,15 @@
return (int64x1_t)__builtin_neon_vreinterpretdiv2sf (__a);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vreinterpret_s64_p64 (poly64x1_t __a)
{
return (int64x1_t)__builtin_neon_vreinterpretdidi (__a);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vreinterpret_s64_u64 (uint64x1_t __a)
{
@@ -12830,14 +12841,15 @@
return (uint64x1_t)__builtin_neon_vreinterpretdiv2sf (__a);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vreinterpret_u64_p64 (poly64x1_t __a)
{
return (uint64x1_t)__builtin_neon_vreinterpretdidi (__a);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vreinterpret_u64_s64 (int64x1_t __a)
{
@@ -12906,14 +12918,15 @@
return (int8x8_t)__builtin_neon_vreinterpretv8qiv2sf (__a);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vreinterpret_s8_p64 (poly64x1_t __a)
{
return (int8x8_t)__builtin_neon_vreinterpretv8qidi (__a);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vreinterpret_s8_s64 (int64x1_t __a)
{
@@ -12982,14 +12995,15 @@
return (int16x4_t)__builtin_neon_vreinterpretv4hiv2sf (__a);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vreinterpret_s16_p64 (poly64x1_t __a)
{
return (int16x4_t)__builtin_neon_vreinterpretv4hidi (__a);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vreinterpret_s16_s64 (int64x1_t __a)
{
@@ -13058,14 +13072,15 @@
return (int32x2_t)__builtin_neon_vreinterpretv2siv2sf (__a);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vreinterpret_s32_p64 (poly64x1_t __a)
{
return (int32x2_t)__builtin_neon_vreinterpretv2sidi (__a);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vreinterpret_s32_s64 (int64x1_t __a)
{
@@ -13134,14 +13149,15 @@
return (uint8x8_t)__builtin_neon_vreinterpretv8qiv2sf (__a);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vreinterpret_u8_p64 (poly64x1_t __a)
{
return (uint8x8_t)__builtin_neon_vreinterpretv8qidi (__a);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vreinterpret_u8_s64 (int64x1_t __a)
{
@@ -13210,14 +13226,15 @@
return (uint16x4_t)__builtin_neon_vreinterpretv4hiv2sf (__a);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vreinterpret_u16_p64 (poly64x1_t __a)
{
return (uint16x4_t)__builtin_neon_vreinterpretv4hidi (__a);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vreinterpret_u16_s64 (int64x1_t __a)
{
@@ -13286,14 +13303,15 @@
return (uint32x2_t)__builtin_neon_vreinterpretv2siv2sf (__a);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vreinterpret_u32_p64 (poly64x1_t __a)
{
return (uint32x2_t)__builtin_neon_vreinterpretv2sidi (__a);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vreinterpret_u32_s64 (int64x1_t __a)
{
@@ -13356,22 +13374,22 @@
return (poly8x16_t)__builtin_neon_vreinterpretv16qiv4sf (__a);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vreinterpretq_p8_p64 (poly64x2_t __a)
{
return (poly8x16_t)__builtin_neon_vreinterpretv16qiv2di ((int64x2_t) __a);
}
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
+
__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vreinterpretq_p8_p128 (poly128_t __a)
{
return (poly8x16_t)__builtin_neon_vreinterpretv16qiti ((__builtin_neon_ti) __a);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vreinterpretq_p8_s64 (int64x2_t __a)
{
@@ -13440,22 +13458,21 @@
return (poly16x8_t)__builtin_neon_vreinterpretv8hiv4sf (__a);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vreinterpretq_p16_p64 (poly64x2_t __a)
{
return (poly16x8_t)__builtin_neon_vreinterpretv8hiv2di ((int64x2_t) __a);
}
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vreinterpretq_p16_p128 (poly128_t __a)
{
return (poly16x8_t)__builtin_neon_vreinterpretv8hiti ((__builtin_neon_ti) __a);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vreinterpretq_p16_s64 (int64x2_t __a)
{
@@ -13528,25 +13545,26 @@
}
#endif
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
+
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
-#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
vreinterpretq_f16_p64 (poly64x2_t __a)
{
return (float16x8_t) __a;
}
#endif
-#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
-#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
vreinterpretq_f16_p128 (poly128_t __a)
{
return (float16x8_t) __a;
}
#endif
-#endif
+
+#pragma GCC pop_options
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
@@ -13632,22 +13650,21 @@
}
#endif
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vreinterpretq_f32_p64 (poly64x2_t __a)
{
return (float32x4_t)__builtin_neon_vreinterpretv4sfv2di ((int64x2_t) __a);
}
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vreinterpretq_f32_p128 (poly128_t __a)
{
return (float32x4_t)__builtin_neon_vreinterpretv4sfti ((__builtin_neon_ti) __a);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vreinterpretq_f32_s64 (int64x2_t __a)
{
@@ -13696,24 +13713,20 @@
return (float32x4_t)__builtin_neon_vreinterpretv4sfv4si ((int32x4_t) __a);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
vreinterpretq_p64_p8 (poly8x16_t __a)
{
return (poly64x2_t)__builtin_neon_vreinterpretv2div16qi ((int8x16_t) __a);
}
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
vreinterpretq_p64_p16 (poly16x8_t __a)
{
return (poly64x2_t)__builtin_neon_vreinterpretv2div8hi ((int16x8_t) __a);
}
-#endif
-
-#ifdef __ARM_FEATURE_CRYPTO
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
vreinterpretq_p64_f16 (float16x8_t __a)
@@ -13721,105 +13734,79 @@
return (poly64x2_t) __a;
}
#endif
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
vreinterpretq_p64_f32 (float32x4_t __a)
{
return (poly64x2_t)__builtin_neon_vreinterpretv2div4sf (__a);
}
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
vreinterpretq_p64_p128 (poly128_t __a)
{
return (poly64x2_t)__builtin_neon_vreinterpretv2diti ((__builtin_neon_ti) __a);
}
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
vreinterpretq_p64_s64 (int64x2_t __a)
{
return (poly64x2_t)__builtin_neon_vreinterpretv2div2di (__a);
}
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
vreinterpretq_p64_u64 (uint64x2_t __a)
{
return (poly64x2_t)__builtin_neon_vreinterpretv2div2di ((int64x2_t) __a);
}
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
vreinterpretq_p64_s8 (int8x16_t __a)
{
return (poly64x2_t)__builtin_neon_vreinterpretv2div16qi (__a);
}
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
vreinterpretq_p64_s16 (int16x8_t __a)
{
return (poly64x2_t)__builtin_neon_vreinterpretv2div8hi (__a);
}
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
vreinterpretq_p64_s32 (int32x4_t __a)
{
return (poly64x2_t)__builtin_neon_vreinterpretv2div4si (__a);
}
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
vreinterpretq_p64_u8 (uint8x16_t __a)
{
return (poly64x2_t)__builtin_neon_vreinterpretv2div16qi ((int8x16_t) __a);
}
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
vreinterpretq_p64_u16 (uint16x8_t __a)
{
return (poly64x2_t)__builtin_neon_vreinterpretv2div8hi ((int16x8_t) __a);
}
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
vreinterpretq_p64_u32 (uint32x4_t __a)
{
return (poly64x2_t)__builtin_neon_vreinterpretv2div4si ((int32x4_t) __a);
}
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline poly128_t __attribute__ ((__always_inline__))
vreinterpretq_p128_p8 (poly8x16_t __a)
{
return (poly128_t)__builtin_neon_vreinterprettiv16qi ((int8x16_t) __a);
}
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline poly128_t __attribute__ ((__always_inline__))
vreinterpretq_p128_p16 (poly16x8_t __a)
{
return (poly128_t)__builtin_neon_vreinterprettiv8hi ((int16x8_t) __a);
}
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
__extension__ static __inline poly128_t __attribute__ ((__always_inline__))
vreinterpretq_p128_f16 (float16x8_t __a)
@@ -13827,88 +13814,68 @@
return (poly128_t) __a;
}
#endif
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline poly128_t __attribute__ ((__always_inline__))
vreinterpretq_p128_f32 (float32x4_t __a)
{
return (poly128_t)__builtin_neon_vreinterprettiv4sf (__a);
}
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline poly128_t __attribute__ ((__always_inline__))
vreinterpretq_p128_p64 (poly64x2_t __a)
{
return (poly128_t)__builtin_neon_vreinterprettiv2di ((int64x2_t) __a);
}
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline poly128_t __attribute__ ((__always_inline__))
vreinterpretq_p128_s64 (int64x2_t __a)
{
return (poly128_t)__builtin_neon_vreinterprettiv2di (__a);
}
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline poly128_t __attribute__ ((__always_inline__))
vreinterpretq_p128_u64 (uint64x2_t __a)
{
return (poly128_t)__builtin_neon_vreinterprettiv2di ((int64x2_t) __a);
}
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline poly128_t __attribute__ ((__always_inline__))
vreinterpretq_p128_s8 (int8x16_t __a)
{
return (poly128_t)__builtin_neon_vreinterprettiv16qi (__a);
}
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline poly128_t __attribute__ ((__always_inline__))
vreinterpretq_p128_s16 (int16x8_t __a)
{
return (poly128_t)__builtin_neon_vreinterprettiv8hi (__a);
}
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline poly128_t __attribute__ ((__always_inline__))
vreinterpretq_p128_s32 (int32x4_t __a)
{
return (poly128_t)__builtin_neon_vreinterprettiv4si (__a);
}
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline poly128_t __attribute__ ((__always_inline__))
vreinterpretq_p128_u8 (uint8x16_t __a)
{
return (poly128_t)__builtin_neon_vreinterprettiv16qi ((int8x16_t) __a);
}
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline poly128_t __attribute__ ((__always_inline__))
vreinterpretq_p128_u16 (uint16x8_t __a)
{
return (poly128_t)__builtin_neon_vreinterprettiv8hi ((int16x8_t) __a);
}
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline poly128_t __attribute__ ((__always_inline__))
vreinterpretq_p128_u32 (uint32x4_t __a)
{
return (poly128_t)__builtin_neon_vreinterprettiv4si ((int32x4_t) __a);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vreinterpretq_s64_p8 (poly8x16_t __a)
{
@@ -13935,22 +13902,21 @@
return (int64x2_t)__builtin_neon_vreinterpretv2div4sf (__a);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vreinterpretq_s64_p64 (poly64x2_t __a)
{
return (int64x2_t)__builtin_neon_vreinterpretv2div2di ((int64x2_t) __a);
}
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vreinterpretq_s64_p128 (poly128_t __a)
{
return (int64x2_t)__builtin_neon_vreinterpretv2diti ((__builtin_neon_ti) __a);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vreinterpretq_s64_u64 (uint64x2_t __a)
{
@@ -14019,22 +13985,21 @@
return (uint64x2_t)__builtin_neon_vreinterpretv2div4sf (__a);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vreinterpretq_u64_p64 (poly64x2_t __a)
{
return (uint64x2_t)__builtin_neon_vreinterpretv2div2di ((int64x2_t) __a);
}
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vreinterpretq_u64_p128 (poly128_t __a)
{
return (uint64x2_t)__builtin_neon_vreinterpretv2diti ((__builtin_neon_ti) __a);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vreinterpretq_u64_s64 (int64x2_t __a)
{
@@ -14103,22 +14068,21 @@
return (int8x16_t)__builtin_neon_vreinterpretv16qiv4sf (__a);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vreinterpretq_s8_p64 (poly64x2_t __a)
{
return (int8x16_t)__builtin_neon_vreinterpretv16qiv2di ((int64x2_t) __a);
}
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vreinterpretq_s8_p128 (poly128_t __a)
{
return (int8x16_t)__builtin_neon_vreinterpretv16qiti ((__builtin_neon_ti) __a);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vreinterpretq_s8_s64 (int64x2_t __a)
{
@@ -14187,22 +14151,21 @@
return (int16x8_t)__builtin_neon_vreinterpretv8hiv4sf (__a);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vreinterpretq_s16_p64 (poly64x2_t __a)
{
return (int16x8_t)__builtin_neon_vreinterpretv8hiv2di ((int64x2_t) __a);
}
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vreinterpretq_s16_p128 (poly128_t __a)
{
return (int16x8_t)__builtin_neon_vreinterpretv8hiti ((__builtin_neon_ti) __a);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vreinterpretq_s16_s64 (int64x2_t __a)
{
@@ -14271,22 +14234,21 @@
return (int32x4_t)__builtin_neon_vreinterpretv4siv4sf (__a);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vreinterpretq_s32_p64 (poly64x2_t __a)
{
return (int32x4_t)__builtin_neon_vreinterpretv4siv2di ((int64x2_t) __a);
}
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vreinterpretq_s32_p128 (poly128_t __a)
{
return (int32x4_t)__builtin_neon_vreinterpretv4siti ((__builtin_neon_ti) __a);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vreinterpretq_s32_s64 (int64x2_t __a)
{
@@ -14355,22 +14317,21 @@
return (uint8x16_t)__builtin_neon_vreinterpretv16qiv4sf (__a);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vreinterpretq_u8_p64 (poly64x2_t __a)
{
return (uint8x16_t)__builtin_neon_vreinterpretv16qiv2di ((int64x2_t) __a);
}
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vreinterpretq_u8_p128 (poly128_t __a)
{
return (uint8x16_t)__builtin_neon_vreinterpretv16qiti ((__builtin_neon_ti) __a);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vreinterpretq_u8_s64 (int64x2_t __a)
{
@@ -14439,22 +14400,21 @@
return (uint16x8_t)__builtin_neon_vreinterpretv8hiv4sf (__a);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vreinterpretq_u16_p64 (poly64x2_t __a)
{
return (uint16x8_t)__builtin_neon_vreinterpretv8hiv2di ((int64x2_t) __a);
}
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vreinterpretq_u16_p128 (poly128_t __a)
{
return (uint16x8_t)__builtin_neon_vreinterpretv8hiti ((__builtin_neon_ti) __a);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vreinterpretq_u16_s64 (int64x2_t __a)
{
@@ -14523,22 +14483,21 @@
return (uint32x4_t)__builtin_neon_vreinterpretv4siv4sf (__a);
}
-#ifdef __ARM_FEATURE_CRYPTO
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vreinterpretq_u32_p64 (poly64x2_t __a)
{
return (uint32x4_t)__builtin_neon_vreinterpretv4siv2di ((int64x2_t) __a);
}
-#endif
-#ifdef __ARM_FEATURE_CRYPTO
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vreinterpretq_u32_p128 (poly128_t __a)
{
return (uint32x4_t)__builtin_neon_vreinterpretv4siti ((__builtin_neon_ti) __a);
}
-#endif
+#pragma GCC pop_options
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vreinterpretq_u32_s64 (int64x2_t __a)
{
@@ -14582,8 +14541,8 @@
}
-#ifdef __ARM_FEATURE_CRYPTO
-
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
__extension__ static __inline poly128_t __attribute__ ((__always_inline__))
vldrq_p128 (poly128_t const * __ptr)
{
@@ -14753,9 +14712,12 @@
return (poly128_t) __builtin_arm_crypto_vmullp64 ((uint64_t) __t1, (uint64_t) __t2);
}
-#endif
+#pragma GCC pop_options
+
#ifdef __cplusplus
}
#endif
-#endif
+
+#pragma GCC pop_options
+
#endif
diff -ruN '--exclude=#*#' '--exclude=.svn' '--exclude=*~' -ruN gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon3.c gnu_trunk.p5/gcc/gcc/testsuite/gcc.target/arm/attr-neon3.c
--- gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon3.c 1970-01-01 01:00:00.000000000 +0100
+++ gnu_trunk.p5/gcc/gcc/testsuite/gcc.target/arm/attr-neon3.c 2015-10-26 13:44:11.790368746 +0100
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_crypto_ok } */
+/* { dg-options "-O2 -mfloat-abi=softfp -mfpu=vfp" } */
+
+#include <arm_neon.h>
+
+/* Check that neon is used. */
+int8x8_t __attribute__ ((target("fpu=neon")))
+my (int8x8_t __a, int8x8_t __b)
+{
+ return __a + __b;
+}
+
+/* Check that crypto builtins are recognized. */
+poly128_t __attribute__ ((target("fpu=crypto-neon-fp-armv8")))
+foo (poly128_t* ptr)
+{
+ return vldrq_p128 (ptr);
+}
+
+/* Check that default mode is restored. */
+int8x8_t
+my1 (int8x8_t __a, int8x8_t __b)
+{
+ return __a + __b;
+}
+
+/* { dg-final { scan-assembler-times "\.fpu vfp" 1 } } */
+/* { dg-final { scan-assembler-times "\.fpu neon" 1 } } */
+/* { dg-final { scan-assembler-times "\.fpu crypto-neon-fp-armv8" 1 } } */
+/* { dg-final { scan-assembler-times "vld1" 1 } } */
+/* { dg-final { scan-assembler-times "vadd" 1} } */
diff -ruN '--exclude=#*#' '--exclude=.svn' '--exclude=*~' -ruN gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon-fp16.c gnu_trunk.p5/gcc/gcc/testsuite/gcc.target/arm/attr-neon-fp16.c
--- gnu_trunk.p4/gcc/gcc/testsuite/gcc.target/arm/attr-neon-fp16.c 1970-01-01 01:00:00.000000000 +0100
+++ gnu_trunk.p5/gcc/gcc/testsuite/gcc.target/arm/attr-neon-fp16.c 2015-11-04 13:30:23.006138103 +0100
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-mfp16-format=ieee -mfloat-abi=softfp" } */
+
+#include "arm_neon.h"
+
+float16x4_t __attribute__((target("fpu=neon-fp16")))
+foo (float32x4_t arg)
+{
+ return vcvt_f16_f32 (arg);
+}
diff -ruN '--exclude=#*#' '--exclude=.svn' '--exclude=*~' -ruN gnu_trunk.p4/gcc/gcc/testsuite/lib/target-supports.exp gnu_trunk.p5/gcc/gcc/testsuite/lib/target-supports.exp
--- gnu_trunk.p4/gcc/gcc/testsuite/lib/target-supports.exp 2015-11-10 13:39:31.689982689 +0100
+++ gnu_trunk.p5/gcc/gcc/testsuite/lib/target-supports.exp 2015-11-12 11:06:27.062849846 +0100
@@ -2850,8 +2850,10 @@
if { [check_effective_target_arm32] } {
foreach flags {"" "-mfloat-abi=softfp" "-mfpu=neon" "-mfpu=neon -mfloat-abi=softfp"} {
if { [check_no_compiler_messages_nocache arm_neon_ok object {
- #include "arm_neon.h"
int dummy;
+ #ifndef __ARM_NEON__
+ #error not NEON
+ #endif
/* Avoid the case where a test adds -mfpu=neon, but the toolchain is
configured for -mcpu=arm926ej-s, for example. */
#if __ARM_ARCH < 7
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 4/4] [ARM] Add attribute/pragma target fpu=
2015-11-12 14:54 ` Christian Bruel
@ 2015-11-13 11:49 ` Kyrill Tkachov
0 siblings, 0 replies; 9+ messages in thread
From: Kyrill Tkachov @ 2015-11-13 11:49 UTC (permalink / raw)
To: Christian Bruel; +Cc: Ramana Radhakrishnan, gcc-patches
Hi Christian,
On 12/11/15 14:54, Christian Bruel wrote:
> Hi Kyril,
>
>> ...
>> The parts in this patch look ok to me.
>> However, I think we need some more functionality
>> In aarch64 we support compiling a file with no simd, including arm_neon.h and using arm_neon.h intrinsics
>> within functions tagged with simd support.
>> We want to support such functionality on arm i.e. compile a file with -mfpu=vfp and use arm_neon.h intrinsics
>> in a function tagged with an fpu=neon attribute.
>> For that we'd need to wrap the intrinsics in arm_neon.h in appropriate pragmas, like in the aarch64 version of arm_neon.h
>
> As discussed, here is arm_neon.h for aarch32/neon with the same programming model than aarch64/simd. As you said lets use one of the fpu=neon attributes even if the file is compiled with -mfpu=vfp.
>
> The drawback for this is that now we unconditionally makes available every neon intrinsics, introducing a small legacy change with regards to error checking (that you didn't have with aarch64). Then it's worth to stress that:
>
> - One cannot check #include "arm_neon.h" to check if the compiler can use neon instruction. Instead use #ifndef __ARM_NEON__. (Found in target-supports.exp)
Checking the macro is the 'canonical' way to check for NEON support,
so I reckon we can live with that.
>
>
> - Types cannot be checked. For instance:
>
> #include <arm_neon.h>
>
> poly128_t
> foo (poly128_t* ptr)
> {
> return vldrq_p128 (ptr);
> }
>
> compiled with -mfpu=neon used to be rejected with
>
> error: unknown type name 'poly128_t' ...
>
> Now the error, as a side effect from the inlining rules between incompatible modes, becomes
>
> error: inlining failed in call to always_inline 'vldrq_p128': target specific option mismatch ...
Well, the previous message is misleading anyway since the user error there is not a type issue
but failure to specify the correct -mfpu option.
>
> I found this more confusing, so I was a little bit reluctant to implement this, but the code is correctly rejected and the message makes sense, after all. Just a different check.
>
> This patch applies on top of the preceding attribute/pragma target fpu= series. Tested with arm-none-eabi configured with default and --with-cpu=cortex-a9 --with-fp --with-float=hard
Do you mean --with-fpu=<something>?
>
> Also fixes a few macro that depends on fpu=, that I forgot to redefine.
Can you please split those changes into a separate patch and ChangeLog and commit the separately?
That part is preapproved.
This patch is ok then with above comment about splitting the arm-c.c changes separately.
Thanks for doing this!
I believe all patches in this series are approved then
so you can go ahead and start committing.
Kyrill
>
> Christian
>
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2015-11-13 11:49 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-09-14 14:38 [PATCH 4/4] [ARM] Add attribute/pragma target fpu= Christian Bruel
2015-09-14 19:50 ` Bernhard Reutner-Fischer
2015-09-15 10:07 ` Christian Bruel
2015-09-15 10:48 ` Christian Bruel
2015-09-18 9:13 ` Kyrill Tkachov
2015-09-21 13:46 ` Christian Bruel
2015-10-08 8:53 ` Kyrill Tkachov
2015-11-12 14:54 ` Christian Bruel
2015-11-13 11:49 ` Kyrill Tkachov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).