public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] aarch64: Remove architecture dependencies from intrinsics
@ 2023-06-26 13:55 Andrew Carlotti
  2023-06-27  6:23 ` Richard Sandiford
  0 siblings, 1 reply; 9+ messages in thread
From: Andrew Carlotti @ 2023-06-26 13:55 UTC (permalink / raw)
  To: gcc-patches

Many intrinsics currently depend on both an architecture version and a
feature, despite the corresponding instructions being available within
GCC at lower architecture versions.

LLVM has already removed these explicit architecture version
dependences; this patch does the same for GCC, as well as removing an
unecessary simd dependency for the scalar fp16 intrinsics.

Binutils does not support all of these architecture+feature combinations
yet, but this is an existing problem that is already reachable from GCC.
For example, compiling the test gcc.target/aarch64/usadv16qi-dotprod.c
with -O3 -march=armv8-a+dotprod has resulted in an assembler error since
GCC 10. I intend to patch this in binutils.

This patch retains explicit architecture version dependencies for
features that do not currently have a separate feature flag.

Ok for master, and backport to GCC 13?

gcc/ChangeLog:

 * config/aarch64/aarch64.h (TARGET_MEMTAG): Remove armv8.5
 dependency.
 * config/aarch64/arm_acle.h: Remove unnecessary armv8.x
 dependencies from target pragmas.
 * config/aarch64/arm_fp16.h (target): Likewise.
 * config/aarch64/arm_neon.h (target): Likewise.

gcc/testsuite/ChangeLog:

 * gcc.target/aarch64/feature-bf16-backport.c: New test.
 * gcc.target/aarch64/feature-dotprod-backport.c: New test.
 * gcc.target/aarch64/feature-fp16-backport.c: New test.
 * gcc.target/aarch64/feature-fp16-scalar-backport.c: New test.
 * gcc.target/aarch64/feature-fp16fml-backport.c: New test.
 * gcc.target/aarch64/feature-i8mm-backport.c: New test.
 * gcc.target/aarch64/feature-memtag-backport.c: New test.
 * gcc.target/aarch64/feature-sha3-backport.c: New test.
 * gcc.target/aarch64/feature-sm4-backport.c: New test.


diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 7129ed1ff370d597895b3f46b56b1250da7fa190..cdb664eb8f7db820b6b06b2667bfad6dc14cb7a2 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -292,7 +292,7 @@ enum class aarch64_feature : unsigned char {
 #define TARGET_RNG (AARCH64_ISA_RNG)
 
 /* Memory Tagging instructions optional to Armv8.5 enabled through +memtag.  */
-#define TARGET_MEMTAG (AARCH64_ISA_V8_5A && AARCH64_ISA_MEMTAG)
+#define TARGET_MEMTAG (AARCH64_ISA_MEMTAG)
 
 /* I8MM instructions are enabled through +i8mm.  */
 #define TARGET_I8MM (AARCH64_ISA_I8MM)
diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h
index e0ac591d2c8d6c4c4c8a074b2d9881c47b1db1ab..87fb42f47c5821adecbb0ea441e0a38c63972e77 100644
--- a/gcc/config/aarch64/arm_acle.h
+++ b/gcc/config/aarch64/arm_acle.h
@@ -325,7 +325,7 @@ __rndrrs (uint64_t *__res)
 #pragma GCC pop_options
 
 #pragma GCC push_options
-#pragma GCC target ("arch=armv8.5-a+memtag")
+#pragma GCC target ("+nothing+memtag")
 
 #define __arm_mte_create_random_tag(__ptr, __u64_mask) \
   __builtin_aarch64_memtag_irg(__ptr, __u64_mask)
diff --git a/gcc/config/aarch64/arm_fp16.h b/gcc/config/aarch64/arm_fp16.h
index a8fa4dbbdfe1bab4aa604bb311ef66d4e1de18ac..84b2ed66f9ba19fba6ccd8be33940d7239bfa22e 100644
--- a/gcc/config/aarch64/arm_fp16.h
+++ b/gcc/config/aarch64/arm_fp16.h
@@ -30,7 +30,7 @@
 #include <stdint.h>
 
 #pragma GCC push_options
-#pragma GCC target ("arch=armv8.2-a+fp16")
+#pragma GCC target ("+nothing+fp16+nosimd")
 
 typedef __fp16 float16_t;
 
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index eeec9f162e223df8cf7803b3227aef22e94227ac..a078674376af121c36bbebef76631c25a6815b1b 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -25590,7 +25590,7 @@ __INTERLEAVE_LIST (zip)
 #include "arm_fp16.h"
 
 #pragma GCC push_options
-#pragma GCC target ("arch=armv8.2-a+fp16")
+#pragma GCC target ("+nothing+fp16")
 
 /* ARMv8.2-A FP16 one operand vector intrinsics.  */
 
@@ -26753,7 +26753,7 @@ vminnmvq_f16 (float16x8_t __a)
 /* AdvSIMD Dot Product intrinsics.  */
 
 #pragma GCC push_options
-#pragma GCC target ("arch=armv8.2-a+dotprod")
+#pragma GCC target ("+nothing+dotprod")
 
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -26844,7 +26844,7 @@ vdotq_laneq_s32 (int32x4_t __r, int8x16_t __a, int8x16_t __b, const int __index)
 #pragma GCC pop_options
 
 #pragma GCC push_options
-#pragma GCC target ("arch=armv8.2-a+sm4")
+#pragma GCC target ("+nothing+sm4")
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -26911,7 +26911,7 @@ vsm4ekeyq_u32 (uint32x4_t __a, uint32x4_t __b)
 #pragma GCC pop_options
 
 #pragma GCC push_options
-#pragma GCC target ("arch=armv8.2-a+sha3")
+#pragma GCC target ("+nothing+sha3")
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -27547,7 +27547,7 @@ vcmlaq_rot270_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b,
 #pragma GCC pop_options
 
 #pragma GCC push_options
-#pragma GCC target ("arch=armv8.2-a+fp16fml")
+#pragma GCC target ("+nothing+fp16fml")
 
 __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -27856,7 +27856,7 @@ vrnd64xq_f64 (float64x2_t __a)
 #include "arm_bf16.h"
 
 #pragma GCC push_options
-#pragma GCC target ("arch=armv8.2-a+bf16")
+#pragma GCC target ("+nothing+bf16")
 
 __extension__ extern __inline bfloat16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -28535,7 +28535,7 @@ vst4q_lane_bf16 (bfloat16_t *__ptr, bfloat16x8x4_t __val, const int __lane)
 /* AdvSIMD 8-bit Integer Matrix Multiply (I8MM) intrinsics.  */
 
 #pragma GCC push_options
-#pragma GCC target ("arch=armv8.2-a+i8mm")
+#pragma GCC target ("+nothing+i8mm")
 
 __extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
diff --git a/gcc/testsuite/gcc.target/aarch64/feature-bf16-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-bf16-backport.c
new file mode 100644
index 0000000000000000000000000000000000000000..3a03255bbb9a3e3da45c9f2ecafaf12685278057
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/feature-bf16-backport.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv8-a+bf16" } */
+
+#include <arm_neon.h>
+
+float32x4_t bar (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) {
+        return vbfmlalbq_f32 (r, a, b);
+}
+
+/* { dg-final { scan-assembler {\tbfmlalb\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/feature-dotprod-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-dotprod-backport.c
new file mode 100644
index 0000000000000000000000000000000000000000..5f66fffa5f5b09a9c04da7b861055e3aa89bccec
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/feature-dotprod-backport.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv8-a+dotprod" } */
+
+#include <arm_neon.h>
+
+uint32x4_t bar (uint32x4_t r, uint8x16_t a, uint8x16_t b) {
+        return vdotq_u32(r, a, b);
+}
+
+/* { dg-final { scan-assembler {\tudot\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/feature-fp16-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-fp16-backport.c
new file mode 100644
index 0000000000000000000000000000000000000000..eb94ae060664eef4275023440c0a18b52ae27b42
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/feature-fp16-backport.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv8-a+fp16" } */
+
+#include <arm_neon.h>
+
+float16x8_t bar (float16x8_t a, float16x8_t b) {
+        return vaddq_f16(a, b);
+}
+
+/* { dg-final { scan-assembler {\tfadd\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/feature-fp16-scalar-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-fp16-scalar-backport.c
new file mode 100644
index 0000000000000000000000000000000000000000..9cde6b5558c2e3699620d0d35ebc3e679fdfe5eb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/feature-fp16-scalar-backport.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv8-a+fp16+nosimd" } */
+
+#include <arm_fp16.h>
+
+float16_t bar (float16_t a, float16_t b) {
+        return vaddh_f16(a, b);
+}
+
+/* { dg-final { scan-assembler {\tfadd\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/feature-fp16fml-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-fp16fml-backport.c
new file mode 100644
index 0000000000000000000000000000000000000000..86c4748b86d9730d433cc206700d7789d996bb6d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/feature-fp16fml-backport.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv8-a+fp16fml" } */
+
+#include <arm_neon.h>
+
+float32x4_t bar (float32x4_t r, float16x8_t a, float16x8_t b) {
+        return vfmlalq_high_f16 (r, a, b);
+}
+
+/* { dg-final { scan-assembler {\tfmlal2\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/feature-i8mm-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-i8mm-backport.c
new file mode 100644
index 0000000000000000000000000000000000000000..6dd0214faab9d470a4f363252d48ac0de92bfe7b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/feature-i8mm-backport.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv8-a+i8mm" } */
+
+#include <arm_neon.h>
+
+int32x4_t bar (int32x4_t r, int8x16_t a, int8x16_t b) {
+        return vmmlaq_s32 (r, a, b);
+}
+
+/* { dg-final { scan-assembler {\tsmmla\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/feature-memtag-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-memtag-backport.c
new file mode 100644
index 0000000000000000000000000000000000000000..5cb071b77ccbd11039d4b8ee9e2cd78a708f2fad
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/feature-memtag-backport.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv8-a+memtag" } */
+
+#include <arm_acle.h>
+
+int *bar (int *src) {
+        return __arm_mte_create_random_tag(src, 2<<16-1);
+}
+
+/* { dg-final { scan-assembler {\tirg\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/feature-sha3-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-sha3-backport.c
new file mode 100644
index 0000000000000000000000000000000000000000..e194f1a10ea12439195c8cf1aae08dae26607c14
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/feature-sha3-backport.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv8-a+sha3" } */
+
+#include <arm_neon.h>
+
+uint64x2_t bar (uint64x2_t a, uint64x2_t b, uint64x2_t c) {
+        return vsha512hq_u64(a, b, c);
+}
+
+/* { dg-final { scan-assembler {\tsha512h\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/feature-sm4-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-sm4-backport.c
new file mode 100644
index 0000000000000000000000000000000000000000..604a58bcb68e5753d2eec661a374b4983ad29088
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/feature-sm4-backport.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv8-a+sm4" } */
+
+#include <arm_neon.h>
+
+uint32x4_t bar (uint32x4_t a, uint32x4_t b, uint32x4_t c) {
+        return vsm3tt1aq_u32(a, b, c, 2);
+}
+
+/* { dg-final { scan-assembler {\tsm3tt1a\t} } } */

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] aarch64: Remove architecture dependencies from intrinsics
  2023-06-26 13:55 [PATCH] aarch64: Remove architecture dependencies from intrinsics Andrew Carlotti
@ 2023-06-27  6:23 ` Richard Sandiford
  2023-06-29 18:24   ` Andrew Carlotti
  0 siblings, 1 reply; 9+ messages in thread
From: Richard Sandiford @ 2023-06-27  6:23 UTC (permalink / raw)
  To: Andrew Carlotti via Gcc-patches; +Cc: Andrew Carlotti

Andrew Carlotti via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
> Many intrinsics currently depend on both an architecture version and a
> feature, despite the corresponding instructions being available within
> GCC at lower architecture versions.
>
> LLVM has already removed these explicit architecture version
> dependences; this patch does the same for GCC, as well as removing an
> unecessary simd dependency for the scalar fp16 intrinsics.
>
> Binutils does not support all of these architecture+feature combinations
> yet, but this is an existing problem that is already reachable from GCC.
> For example, compiling the test gcc.target/aarch64/usadv16qi-dotprod.c
> with -O3 -march=armv8-a+dotprod has resulted in an assembler error since
> GCC 10. I intend to patch this in binutils.
>
> This patch retains explicit architecture version dependencies for
> features that do not currently have a separate feature flag.
>
> Ok for master, and backport to GCC 13?
>
> gcc/ChangeLog:
>
>  * config/aarch64/aarch64.h (TARGET_MEMTAG): Remove armv8.5
>  dependency.
>  * config/aarch64/arm_acle.h: Remove unnecessary armv8.x
>  dependencies from target pragmas.
>  * config/aarch64/arm_fp16.h (target): Likewise.

The change to this file is a bit different from the others,
since it's removing an implicit dependency on +simd, rather
than a dependency on an architecture level.  I think it'd be
worth mentioning that explicitly in the changelog.

OK with that change, thanks.

(Arguably we should add +nosimd to many of the other pragmas in
arm_acle.h, but that's logically a separate patch.)

Richard

>  * config/aarch64/arm_neon.h (target): Likewise.
>
> gcc/testsuite/ChangeLog:
>
>  * gcc.target/aarch64/feature-bf16-backport.c: New test.
>  * gcc.target/aarch64/feature-dotprod-backport.c: New test.
>  * gcc.target/aarch64/feature-fp16-backport.c: New test.
>  * gcc.target/aarch64/feature-fp16-scalar-backport.c: New test.
>  * gcc.target/aarch64/feature-fp16fml-backport.c: New test.
>  * gcc.target/aarch64/feature-i8mm-backport.c: New test.
>  * gcc.target/aarch64/feature-memtag-backport.c: New test.
>  * gcc.target/aarch64/feature-sha3-backport.c: New test.
>  * gcc.target/aarch64/feature-sm4-backport.c: New test.
>
>
> diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
> index 7129ed1ff370d597895b3f46b56b1250da7fa190..cdb664eb8f7db820b6b06b2667bfad6dc14cb7a2 100644
> --- a/gcc/config/aarch64/aarch64.h
> +++ b/gcc/config/aarch64/aarch64.h
> @@ -292,7 +292,7 @@ enum class aarch64_feature : unsigned char {
>  #define TARGET_RNG (AARCH64_ISA_RNG)
>  
>  /* Memory Tagging instructions optional to Armv8.5 enabled through +memtag.  */
> -#define TARGET_MEMTAG (AARCH64_ISA_V8_5A && AARCH64_ISA_MEMTAG)
> +#define TARGET_MEMTAG (AARCH64_ISA_MEMTAG)
>  
>  /* I8MM instructions are enabled through +i8mm.  */
>  #define TARGET_I8MM (AARCH64_ISA_I8MM)
> diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h
> index e0ac591d2c8d6c4c4c8a074b2d9881c47b1db1ab..87fb42f47c5821adecbb0ea441e0a38c63972e77 100644
> --- a/gcc/config/aarch64/arm_acle.h
> +++ b/gcc/config/aarch64/arm_acle.h
> @@ -325,7 +325,7 @@ __rndrrs (uint64_t *__res)
>  #pragma GCC pop_options
>  
>  #pragma GCC push_options
> -#pragma GCC target ("arch=armv8.5-a+memtag")
> +#pragma GCC target ("+nothing+memtag")
>  
>  #define __arm_mte_create_random_tag(__ptr, __u64_mask) \
>    __builtin_aarch64_memtag_irg(__ptr, __u64_mask)
> diff --git a/gcc/config/aarch64/arm_fp16.h b/gcc/config/aarch64/arm_fp16.h
> index a8fa4dbbdfe1bab4aa604bb311ef66d4e1de18ac..84b2ed66f9ba19fba6ccd8be33940d7239bfa22e 100644
> --- a/gcc/config/aarch64/arm_fp16.h
> +++ b/gcc/config/aarch64/arm_fp16.h
> @@ -30,7 +30,7 @@
>  #include <stdint.h>
>  
>  #pragma GCC push_options
> -#pragma GCC target ("arch=armv8.2-a+fp16")
> +#pragma GCC target ("+nothing+fp16+nosimd")
>  
>  typedef __fp16 float16_t;
>  
> diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
> index eeec9f162e223df8cf7803b3227aef22e94227ac..a078674376af121c36bbebef76631c25a6815b1b 100644
> --- a/gcc/config/aarch64/arm_neon.h
> +++ b/gcc/config/aarch64/arm_neon.h
> @@ -25590,7 +25590,7 @@ __INTERLEAVE_LIST (zip)
>  #include "arm_fp16.h"
>  
>  #pragma GCC push_options
> -#pragma GCC target ("arch=armv8.2-a+fp16")
> +#pragma GCC target ("+nothing+fp16")
>  
>  /* ARMv8.2-A FP16 one operand vector intrinsics.  */
>  
> @@ -26753,7 +26753,7 @@ vminnmvq_f16 (float16x8_t __a)
>  /* AdvSIMD Dot Product intrinsics.  */
>  
>  #pragma GCC push_options
> -#pragma GCC target ("arch=armv8.2-a+dotprod")
> +#pragma GCC target ("+nothing+dotprod")
>  
>  __extension__ extern __inline uint32x2_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> @@ -26844,7 +26844,7 @@ vdotq_laneq_s32 (int32x4_t __r, int8x16_t __a, int8x16_t __b, const int __index)
>  #pragma GCC pop_options
>  
>  #pragma GCC push_options
> -#pragma GCC target ("arch=armv8.2-a+sm4")
> +#pragma GCC target ("+nothing+sm4")
>  
>  __extension__ extern __inline uint32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> @@ -26911,7 +26911,7 @@ vsm4ekeyq_u32 (uint32x4_t __a, uint32x4_t __b)
>  #pragma GCC pop_options
>  
>  #pragma GCC push_options
> -#pragma GCC target ("arch=armv8.2-a+sha3")
> +#pragma GCC target ("+nothing+sha3")
>  
>  __extension__ extern __inline uint64x2_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> @@ -27547,7 +27547,7 @@ vcmlaq_rot270_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b,
>  #pragma GCC pop_options
>  
>  #pragma GCC push_options
> -#pragma GCC target ("arch=armv8.2-a+fp16fml")
> +#pragma GCC target ("+nothing+fp16fml")
>  
>  __extension__ extern __inline float32x2_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> @@ -27856,7 +27856,7 @@ vrnd64xq_f64 (float64x2_t __a)
>  #include "arm_bf16.h"
>  
>  #pragma GCC push_options
> -#pragma GCC target ("arch=armv8.2-a+bf16")
> +#pragma GCC target ("+nothing+bf16")
>  
>  __extension__ extern __inline bfloat16x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> @@ -28535,7 +28535,7 @@ vst4q_lane_bf16 (bfloat16_t *__ptr, bfloat16x8x4_t __val, const int __lane)
>  /* AdvSIMD 8-bit Integer Matrix Multiply (I8MM) intrinsics.  */
>  
>  #pragma GCC push_options
> -#pragma GCC target ("arch=armv8.2-a+i8mm")
> +#pragma GCC target ("+nothing+i8mm")
>  
>  __extension__ extern __inline int32x2_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> diff --git a/gcc/testsuite/gcc.target/aarch64/feature-bf16-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-bf16-backport.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..3a03255bbb9a3e3da45c9f2ecafaf12685278057
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/feature-bf16-backport.c
> @@ -0,0 +1,10 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=armv8-a+bf16" } */
> +
> +#include <arm_neon.h>
> +
> +float32x4_t bar (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) {
> +        return vbfmlalbq_f32 (r, a, b);
> +}
> +
> +/* { dg-final { scan-assembler {\tbfmlalb\t} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/feature-dotprod-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-dotprod-backport.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..5f66fffa5f5b09a9c04da7b861055e3aa89bccec
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/feature-dotprod-backport.c
> @@ -0,0 +1,10 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=armv8-a+dotprod" } */
> +
> +#include <arm_neon.h>
> +
> +uint32x4_t bar (uint32x4_t r, uint8x16_t a, uint8x16_t b) {
> +        return vdotq_u32(r, a, b);
> +}
> +
> +/* { dg-final { scan-assembler {\tudot\t} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/feature-fp16-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-fp16-backport.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..eb94ae060664eef4275023440c0a18b52ae27b42
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/feature-fp16-backport.c
> @@ -0,0 +1,10 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=armv8-a+fp16" } */
> +
> +#include <arm_neon.h>
> +
> +float16x8_t bar (float16x8_t a, float16x8_t b) {
> +        return vaddq_f16(a, b);
> +}
> +
> +/* { dg-final { scan-assembler {\tfadd\t} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/feature-fp16-scalar-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-fp16-scalar-backport.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..9cde6b5558c2e3699620d0d35ebc3e679fdfe5eb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/feature-fp16-scalar-backport.c
> @@ -0,0 +1,10 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=armv8-a+fp16+nosimd" } */
> +
> +#include <arm_fp16.h>
> +
> +float16_t bar (float16_t a, float16_t b) {
> +        return vaddh_f16(a, b);
> +}
> +
> +/* { dg-final { scan-assembler {\tfadd\t} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/feature-fp16fml-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-fp16fml-backport.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..86c4748b86d9730d433cc206700d7789d996bb6d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/feature-fp16fml-backport.c
> @@ -0,0 +1,10 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=armv8-a+fp16fml" } */
> +
> +#include <arm_neon.h>
> +
> +float32x4_t bar (float32x4_t r, float16x8_t a, float16x8_t b) {
> +        return vfmlalq_high_f16 (r, a, b);
> +}
> +
> +/* { dg-final { scan-assembler {\tfmlal2\t} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/feature-i8mm-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-i8mm-backport.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..6dd0214faab9d470a4f363252d48ac0de92bfe7b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/feature-i8mm-backport.c
> @@ -0,0 +1,10 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=armv8-a+i8mm" } */
> +
> +#include <arm_neon.h>
> +
> +int32x4_t bar (int32x4_t r, int8x16_t a, int8x16_t b) {
> +        return vmmlaq_s32 (r, a, b);
> +}
> +
> +/* { dg-final { scan-assembler {\tsmmla\t} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/feature-memtag-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-memtag-backport.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..5cb071b77ccbd11039d4b8ee9e2cd78a708f2fad
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/feature-memtag-backport.c
> @@ -0,0 +1,10 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=armv8-a+memtag" } */
> +
> +#include <arm_acle.h>
> +
> +int *bar (int *src) {
> +        return __arm_mte_create_random_tag(src, 2<<16-1);
> +}
> +
> +/* { dg-final { scan-assembler {\tirg\t} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/feature-sha3-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-sha3-backport.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..e194f1a10ea12439195c8cf1aae08dae26607c14
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/feature-sha3-backport.c
> @@ -0,0 +1,10 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=armv8-a+sha3" } */
> +
> +#include <arm_neon.h>
> +
> +uint64x2_t bar (uint64x2_t a, uint64x2_t b, uint64x2_t c) {
> +        return vsha512hq_u64(a, b, c);
> +}
> +
> +/* { dg-final { scan-assembler {\tsha512h\t} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/feature-sm4-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-sm4-backport.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..604a58bcb68e5753d2eec661a374b4983ad29088
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/feature-sm4-backport.c
> @@ -0,0 +1,10 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=armv8-a+sm4" } */
> +
> +#include <arm_neon.h>
> +
> +uint32x4_t bar (uint32x4_t a, uint32x4_t b, uint32x4_t c) {
> +        return vsm3tt1aq_u32(a, b, c, 2);
> +}
> +
> +/* { dg-final { scan-assembler {\tsm3tt1a\t} } } */

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] aarch64: Remove architecture dependencies from intrinsics
  2023-06-27  6:23 ` Richard Sandiford
@ 2023-06-29 18:24   ` Andrew Carlotti
  2023-07-19 16:44     ` [GCC 13 PATCH] " Andrew Carlotti
  0 siblings, 1 reply; 9+ messages in thread
From: Andrew Carlotti @ 2023-06-29 18:24 UTC (permalink / raw)
  To: gcc-patches, richard.sandiford

On Tue, Jun 27, 2023 at 07:23:32AM +0100, Richard Sandiford wrote:
> Andrew Carlotti via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
> > Many intrinsics currently depend on both an architecture version and a
> > feature, despite the corresponding instructions being available within
> > GCC at lower architecture versions.
> >
> > LLVM has already removed these explicit architecture version
> > dependences; this patch does the same for GCC, as well as removing an
> > unecessary simd dependency for the scalar fp16 intrinsics.
> >
> > Binutils does not support all of these architecture+feature combinations
> > yet, but this is an existing problem that is already reachable from GCC.
> > For example, compiling the test gcc.target/aarch64/usadv16qi-dotprod.c
> > with -O3 -march=armv8-a+dotprod has resulted in an assembler error since
> > GCC 10. I intend to patch this in binutils.
> >
> > This patch retains explicit architecture version dependencies for
> > features that do not currently have a separate feature flag.
> >
> > Ok for master, and backport to GCC 13?
> >
> > gcc/ChangeLog:
> >
> >  * config/aarch64/aarch64.h (TARGET_MEMTAG): Remove armv8.5
> >  dependency.
> >  * config/aarch64/arm_acle.h: Remove unnecessary armv8.x
> >  dependencies from target pragmas.
> >  * config/aarch64/arm_fp16.h (target): Likewise.
> 
> The change to this file is a bit different from the others,
> since it's removing an implicit dependency on +simd, rather
> than a dependency on an architecture level.  I think it'd be
> worth mentioning that explicitly in the changelog.
> 
> OK with that change, thanks.
> 
> (Arguably we should add +nosimd to many of the other pragmas in
> arm_acle.h, but that's logically a separate patch.)
> 
> Richard

Actually, I think I should just remove the +nosimd from the patch, because
+fp16 doesn't enable simd (unlike +bf16, which has simd as an 'explicit on'
implication).

Aside from +bf16, the only other feature with simd as an 'explicit on' is
+rdma. However, there appear to be no non-simd rdma instructions, so
+nothing+rdma+nosimd is effectively the same as +nothing.

> > ...
> >
> > diff --git a/gcc/config/aarch64/arm_fp16.h b/gcc/config/aarch64/arm_fp16.h
> > index a8fa4dbbdfe1bab4aa604bb311ef66d4e1de18ac..84b2ed66f9ba19fba6ccd8be33940d7239bfa22e 100644
> > --- a/gcc/config/aarch64/arm_fp16.h
> > +++ b/gcc/config/aarch64/arm_fp16.h
> > @@ -30,7 +30,7 @@
> >  #include <stdint.h>
> >  
> >  #pragma GCC push_options
> > -#pragma GCC target ("arch=armv8.2-a+fp16")
> > +#pragma GCC target ("+nothing+fp16+nosimd")
> >  
> >  typedef __fp16 float16_t;
> >  

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [GCC 13 PATCH] aarch64: Remove architecture dependencies from intrinsics
  2023-06-29 18:24   ` Andrew Carlotti
@ 2023-07-19 16:44     ` Andrew Carlotti
  2023-07-19 18:35       ` Ramana Radhakrishnan
  2023-07-20  6:48       ` Richard Sandiford
  0 siblings, 2 replies; 9+ messages in thread
From: Andrew Carlotti @ 2023-07-19 16:44 UTC (permalink / raw)
  To: gcc-patches, richard.sandiford

Updated patch to fix the fp16 intrinsic pragmas, and pushed to master.
OK to backport to GCC 13?


Many intrinsics currently depend on both an architecture version and a
feature, despite the corresponding instructions being available within
GCC at lower architecture versions.

LLVM has already removed these explicit architecture version
dependences; this patch does the same for GCC. Note that +fp16 does not
imply +simd, so we need to add an explicit +simd for the Neon fp16
intrinsics.

Binutils did not previously support all of these architecture+feature
combinations, but this problem is already reachable from GCC.  For
example, compiling the test gcc.target/aarch64/usadv16qi-dotprod.c
with -O3 -march=armv8-a+dotprod has resulted in an assembler error since
GCC 10.  This is fixed in Binutils 2.41.

This patch retains explicit architecture version dependencies for
features that do not currently have a separate feature flag.

gcc/ChangeLog:

 * config/aarch64/aarch64.h (TARGET_MEMTAG): Remove armv8.5
 dependency.
 * config/aarch64/arm_acle.h: Remove unnecessary armv8.x
 dependencies from target pragmas.
 * config/aarch64/arm_fp16.h (target): Likewise.
 * config/aarch64/arm_neon.h (target): Likewise.

gcc/testsuite/ChangeLog:

 * gcc.target/aarch64/feature-bf16-backport.c: New test.
 * gcc.target/aarch64/feature-dotprod-backport.c: New test.
 * gcc.target/aarch64/feature-fp16-backport.c: New test.
 * gcc.target/aarch64/feature-fp16-scalar-backport.c: New test.
 * gcc.target/aarch64/feature-fp16fml-backport.c: New test.
 * gcc.target/aarch64/feature-i8mm-backport.c: New test.
 * gcc.target/aarch64/feature-memtag-backport.c: New test.
 * gcc.target/aarch64/feature-sha3-backport.c: New test.
 * gcc.target/aarch64/feature-sm4-backport.c: New test.

---

diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index a01f1ee99d85917941ffba55bc3b4dcac87b41f6..2b0fc97bb71e9d560ae26035c7d7142682e46c38 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -292,7 +292,7 @@ enum class aarch64_feature : unsigned char {
 #define TARGET_RNG (AARCH64_ISA_RNG)
 
 /* Memory Tagging instructions optional to Armv8.5 enabled through +memtag.  */
-#define TARGET_MEMTAG (AARCH64_ISA_V8_5A && AARCH64_ISA_MEMTAG)
+#define TARGET_MEMTAG (AARCH64_ISA_MEMTAG)
 
 /* I8MM instructions are enabled through +i8mm.  */
 #define TARGET_I8MM (AARCH64_ISA_I8MM)
diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h
index 3b6b63e6805432b5f1686745f987c52d2967c7c1..7599a32301dadf80760d3cb40a8685d2e6a476fb 100644
--- a/gcc/config/aarch64/arm_acle.h
+++ b/gcc/config/aarch64/arm_acle.h
@@ -292,7 +292,7 @@ __rndrrs (uint64_t *__res)
 #pragma GCC pop_options
 
 #pragma GCC push_options
-#pragma GCC target ("arch=armv8.5-a+memtag")
+#pragma GCC target ("+nothing+memtag")
 
 #define __arm_mte_create_random_tag(__ptr, __u64_mask) \
   __builtin_aarch64_memtag_irg(__ptr, __u64_mask)
diff --git a/gcc/config/aarch64/arm_fp16.h b/gcc/config/aarch64/arm_fp16.h
index 350f8cc33d99e16137e9d70fa7958b10924dc67f..c10f9dcf7e097ded1740955addcd73348649dc56 100644
--- a/gcc/config/aarch64/arm_fp16.h
+++ b/gcc/config/aarch64/arm_fp16.h
@@ -30,7 +30,7 @@
 #include <stdint.h>
 
 #pragma GCC push_options
-#pragma GCC target ("arch=armv8.2-a+fp16")
+#pragma GCC target ("+nothing+fp16")
 
 typedef __fp16 float16_t;
 
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 0ace1eeddb97443433c091d2363403fcf2907654..349f3167699447eb397af482eaeadf8a07617025 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -25590,7 +25590,7 @@ __INTERLEAVE_LIST (zip)
 #include "arm_fp16.h"
 
 #pragma GCC push_options
-#pragma GCC target ("arch=armv8.2-a+fp16")
+#pragma GCC target ("+nothing+simd+fp16")
 
 /* ARMv8.2-A FP16 one operand vector intrinsics.  */
 
@@ -26753,7 +26753,7 @@ vminnmvq_f16 (float16x8_t __a)
 /* AdvSIMD Dot Product intrinsics.  */
 
 #pragma GCC push_options
-#pragma GCC target ("arch=armv8.2-a+dotprod")
+#pragma GCC target ("+nothing+dotprod")
 
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -26844,7 +26844,7 @@ vdotq_laneq_s32 (int32x4_t __r, int8x16_t __a, int8x16_t __b, const int __index)
 #pragma GCC pop_options
 
 #pragma GCC push_options
-#pragma GCC target ("arch=armv8.2-a+sm4")
+#pragma GCC target ("+nothing+sm4")
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -26911,7 +26911,7 @@ vsm4ekeyq_u32 (uint32x4_t __a, uint32x4_t __b)
 #pragma GCC pop_options
 
 #pragma GCC push_options
-#pragma GCC target ("arch=armv8.2-a+sha3")
+#pragma GCC target ("+nothing+sha3")
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -27547,7 +27547,7 @@ vcmlaq_rot270_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b,
 #pragma GCC pop_options
 
 #pragma GCC push_options
-#pragma GCC target ("arch=armv8.2-a+fp16fml")
+#pragma GCC target ("+nothing+fp16fml")
 
 __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -27856,7 +27856,7 @@ vrnd64xq_f64 (float64x2_t __a)
 #include "arm_bf16.h"
 
 #pragma GCC push_options
-#pragma GCC target ("arch=armv8.2-a+bf16")
+#pragma GCC target ("+nothing+bf16")
 
 __extension__ extern __inline bfloat16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -28535,7 +28535,7 @@ vst4q_lane_bf16 (bfloat16_t *__ptr, bfloat16x8x4_t __val, const int __lane)
 /* AdvSIMD 8-bit Integer Matrix Multiply (I8MM) intrinsics.  */
 
 #pragma GCC push_options
-#pragma GCC target ("arch=armv8.2-a+i8mm")
+#pragma GCC target ("+nothing+i8mm")
 
 __extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
diff --git a/gcc/testsuite/gcc.target/aarch64/feature-bf16-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-bf16-backport.c
new file mode 100644
index 0000000000000000000000000000000000000000..3a03255bbb9a3e3da45c9f2ecafaf12685278057
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/feature-bf16-backport.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv8-a+bf16" } */
+
+#include <arm_neon.h>
+
+float32x4_t bar (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) {
+        return vbfmlalbq_f32 (r, a, b);
+}
+
+/* { dg-final { scan-assembler {\tbfmlalb\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/feature-dotprod-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-dotprod-backport.c
new file mode 100644
index 0000000000000000000000000000000000000000..5f66fffa5f5b09a9c04da7b861055e3aa89bccec
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/feature-dotprod-backport.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv8-a+dotprod" } */
+
+#include <arm_neon.h>
+
+uint32x4_t bar (uint32x4_t r, uint8x16_t a, uint8x16_t b) {
+        return vdotq_u32(r, a, b);
+}
+
+/* { dg-final { scan-assembler {\tudot\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/feature-fp16-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-fp16-backport.c
new file mode 100644
index 0000000000000000000000000000000000000000..eb94ae060664eef4275023440c0a18b52ae27b42
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/feature-fp16-backport.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv8-a+fp16" } */
+
+#include <arm_neon.h>
+
+float16x8_t bar (float16x8_t a, float16x8_t b) {
+        return vaddq_f16(a, b);
+}
+
+/* { dg-final { scan-assembler {\tfadd\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/feature-fp16-scalar-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-fp16-scalar-backport.c
new file mode 100644
index 0000000000000000000000000000000000000000..9cde6b5558c2e3699620d0d35ebc3e679fdfe5eb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/feature-fp16-scalar-backport.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv8-a+fp16+nosimd" } */
+
+#include <arm_fp16.h>
+
+float16_t bar (float16_t a, float16_t b) {
+        return vaddh_f16(a, b);
+}
+
+/* { dg-final { scan-assembler {\tfadd\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/feature-fp16fml-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-fp16fml-backport.c
new file mode 100644
index 0000000000000000000000000000000000000000..86c4748b86d9730d433cc206700d7789d996bb6d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/feature-fp16fml-backport.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv8-a+fp16fml" } */
+
+#include <arm_neon.h>
+
+float32x4_t bar (float32x4_t r, float16x8_t a, float16x8_t b) {
+        return vfmlalq_high_f16 (r, a, b);
+}
+
+/* { dg-final { scan-assembler {\tfmlal2\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/feature-i8mm-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-i8mm-backport.c
new file mode 100644
index 0000000000000000000000000000000000000000..6dd0214faab9d470a4f363252d48ac0de92bfe7b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/feature-i8mm-backport.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv8-a+i8mm" } */
+
+#include <arm_neon.h>
+
+int32x4_t bar (int32x4_t r, int8x16_t a, int8x16_t b) {
+        return vmmlaq_s32 (r, a, b);
+}
+
+/* { dg-final { scan-assembler {\tsmmla\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/feature-memtag-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-memtag-backport.c
new file mode 100644
index 0000000000000000000000000000000000000000..5cb071b77ccbd11039d4b8ee9e2cd78a708f2fad
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/feature-memtag-backport.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv8-a+memtag" } */
+
+#include <arm_acle.h>
+
+int *bar (int *src) {
+        return __arm_mte_create_random_tag(src, 2<<16-1);
+}
+
+/* { dg-final { scan-assembler {\tirg\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/feature-sha3-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-sha3-backport.c
new file mode 100644
index 0000000000000000000000000000000000000000..e194f1a10ea12439195c8cf1aae08dae26607c14
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/feature-sha3-backport.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv8-a+sha3" } */
+
+#include <arm_neon.h>
+
+uint64x2_t bar (uint64x2_t a, uint64x2_t b, uint64x2_t c) {
+        return vsha512hq_u64(a, b, c);
+}
+
+/* { dg-final { scan-assembler {\tsha512h\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/feature-sm4-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-sm4-backport.c
new file mode 100644
index 0000000000000000000000000000000000000000..604a58bcb68e5753d2eec661a374b4983ad29088
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/feature-sm4-backport.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv8-a+sm4" } */
+
+#include <arm_neon.h>
+
+uint32x4_t bar (uint32x4_t a, uint32x4_t b, uint32x4_t c) {
+        return vsm3tt1aq_u32(a, b, c, 2);
+}
+
+/* { dg-final { scan-assembler {\tsm3tt1a\t} } } */

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [GCC 13 PATCH] aarch64: Remove architecture dependencies from intrinsics
  2023-07-19 16:44     ` [GCC 13 PATCH] " Andrew Carlotti
@ 2023-07-19 18:35       ` Ramana Radhakrishnan
  2023-07-19 23:27         ` Andrew Carlotti
  2023-07-20  6:48       ` Richard Sandiford
  1 sibling, 1 reply; 9+ messages in thread
From: Ramana Radhakrishnan @ 2023-07-19 18:35 UTC (permalink / raw)
  To: Andrew Carlotti; +Cc: gcc-patches, richard.sandiford, Richard Earnshaw

On Wed, Jul 19, 2023 at 5:44 PM Andrew Carlotti via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Updated patch to fix the fp16 intrinsic pragmas, and pushed to master.
> OK to backport to GCC 13?
>
>
> Many intrinsics currently depend on both an architecture version and a
> feature, despite the corresponding instructions being available within
> GCC at lower architecture versions.
>
> LLVM has already removed these explicit architecture version
> dependences; this patch does the same for GCC. Note that +fp16 does not
> imply +simd, so we need to add an explicit +simd for the Neon fp16
> intrinsics.
>
> Binutils did not previously support all of these architecture+feature
> combinations, but this problem is already reachable from GCC.  For
> example, compiling the test gcc.target/aarch64/usadv16qi-dotprod.c
> with -O3 -march=armv8-a+dotprod has resulted in an assembler error since
> GCC 10.  This is fixed in Binutils 2.41.

Are there any implementations that actually implement v8-a + dotprod
?. As far as I'm aware this was v8.2-A as the base architecture where
this was allowed. Has this changed recently?


regards
Ramana

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [GCC 13 PATCH] aarch64: Remove architecture dependencies from intrinsics
  2023-07-19 18:35       ` Ramana Radhakrishnan
@ 2023-07-19 23:27         ` Andrew Carlotti
  0 siblings, 0 replies; 9+ messages in thread
From: Andrew Carlotti @ 2023-07-19 23:27 UTC (permalink / raw)
  To: Ramana Radhakrishnan; +Cc: gcc-patches, richard.sandiford, Richard Earnshaw

On Wed, Jul 19, 2023 at 07:35:26PM +0100, Ramana Radhakrishnan wrote:
> On Wed, Jul 19, 2023 at 5:44 PM Andrew Carlotti via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > Updated patch to fix the fp16 intrinsic pragmas, and pushed to master.
> > OK to backport to GCC 13?
> >
> >
> > Many intrinsics currently depend on both an architecture version and a
> > feature, despite the corresponding instructions being available within
> > GCC at lower architecture versions.
> >
> > LLVM has already removed these explicit architecture version
> > dependences; this patch does the same for GCC. Note that +fp16 does not
> > imply +simd, so we need to add an explicit +simd for the Neon fp16
> > intrinsics.
> >
> > Binutils did not previously support all of these architecture+feature
> > combinations, but this problem is already reachable from GCC.  For
> > example, compiling the test gcc.target/aarch64/usadv16qi-dotprod.c
> > with -O3 -march=armv8-a+dotprod has resulted in an assembler error since
> > GCC 10.  This is fixed in Binutils 2.41.
> 
> Are there any implementations that actually implement v8-a + dotprod
> ?. As far as I'm aware this was v8.2-A as the base architecture where
> this was allowed. Has this changed recently?
> 
> 
> regards
> Ramana

I don't recall whether there are any physical implementations of DotProd
without Armv8.2, but similar situations have already occurred with other
features.

There are also situations where developers wish to enable only a subset of
available features.  For example, the existing restrictions in GCC have forced
Chromium to disable their memtag support when building with GCC [1]; with this
patch, they will be able to reenable memtag support from GCC 14 (and GCC 13.x
when this is backported).

I don't see any advantages to trying to enforce minimum architecture versions
for features in GCC, except perhaps maintaining the status quo.  But the status
quo is already rather inconsistent, and these changes only make GCC more
permissive (and only for options that currently don't work).


[1] https://chromium-review.googlesource.com/c/chromium/src/+/3238466

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [GCC 13 PATCH] aarch64: Remove architecture dependencies from intrinsics
  2023-07-19 16:44     ` [GCC 13 PATCH] " Andrew Carlotti
  2023-07-19 18:35       ` Ramana Radhakrishnan
@ 2023-07-20  6:48       ` Richard Sandiford
  2023-07-20  7:37         ` Richard Biener
  1 sibling, 1 reply; 9+ messages in thread
From: Richard Sandiford @ 2023-07-20  6:48 UTC (permalink / raw)
  To: Andrew Carlotti; +Cc: gcc-patches

Andrew Carlotti <andrew.carlotti@arm.com> writes:
> Updated patch to fix the fp16 intrinsic pragmas, and pushed to master.
> OK to backport to GCC 13?

OK, thanks.

Richard

> Many intrinsics currently depend on both an architecture version and a
> feature, despite the corresponding instructions being available within
> GCC at lower architecture versions.
>
> LLVM has already removed these explicit architecture version
> dependences; this patch does the same for GCC. Note that +fp16 does not
> imply +simd, so we need to add an explicit +simd for the Neon fp16
> intrinsics.
>
> Binutils did not previously support all of these architecture+feature
> combinations, but this problem is already reachable from GCC.  For
> example, compiling the test gcc.target/aarch64/usadv16qi-dotprod.c
> with -O3 -march=armv8-a+dotprod has resulted in an assembler error since
> GCC 10.  This is fixed in Binutils 2.41.
>
> This patch retains explicit architecture version dependencies for
> features that do not currently have a separate feature flag.
>
> gcc/ChangeLog:
>
>  * config/aarch64/aarch64.h (TARGET_MEMTAG): Remove armv8.5
>  dependency.
>  * config/aarch64/arm_acle.h: Remove unnecessary armv8.x
>  dependencies from target pragmas.
>  * config/aarch64/arm_fp16.h (target): Likewise.
>  * config/aarch64/arm_neon.h (target): Likewise.
>
> gcc/testsuite/ChangeLog:
>
>  * gcc.target/aarch64/feature-bf16-backport.c: New test.
>  * gcc.target/aarch64/feature-dotprod-backport.c: New test.
>  * gcc.target/aarch64/feature-fp16-backport.c: New test.
>  * gcc.target/aarch64/feature-fp16-scalar-backport.c: New test.
>  * gcc.target/aarch64/feature-fp16fml-backport.c: New test.
>  * gcc.target/aarch64/feature-i8mm-backport.c: New test.
>  * gcc.target/aarch64/feature-memtag-backport.c: New test.
>  * gcc.target/aarch64/feature-sha3-backport.c: New test.
>  * gcc.target/aarch64/feature-sm4-backport.c: New test.
>
> ---
>
> diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
> index a01f1ee99d85917941ffba55bc3b4dcac87b41f6..2b0fc97bb71e9d560ae26035c7d7142682e46c38 100644
> --- a/gcc/config/aarch64/aarch64.h
> +++ b/gcc/config/aarch64/aarch64.h
> @@ -292,7 +292,7 @@ enum class aarch64_feature : unsigned char {
>  #define TARGET_RNG (AARCH64_ISA_RNG)
>  
>  /* Memory Tagging instructions optional to Armv8.5 enabled through +memtag.  */
> -#define TARGET_MEMTAG (AARCH64_ISA_V8_5A && AARCH64_ISA_MEMTAG)
> +#define TARGET_MEMTAG (AARCH64_ISA_MEMTAG)
>  
>  /* I8MM instructions are enabled through +i8mm.  */
>  #define TARGET_I8MM (AARCH64_ISA_I8MM)
> diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h
> index 3b6b63e6805432b5f1686745f987c52d2967c7c1..7599a32301dadf80760d3cb40a8685d2e6a476fb 100644
> --- a/gcc/config/aarch64/arm_acle.h
> +++ b/gcc/config/aarch64/arm_acle.h
> @@ -292,7 +292,7 @@ __rndrrs (uint64_t *__res)
>  #pragma GCC pop_options
>  
>  #pragma GCC push_options
> -#pragma GCC target ("arch=armv8.5-a+memtag")
> +#pragma GCC target ("+nothing+memtag")
>  
>  #define __arm_mte_create_random_tag(__ptr, __u64_mask) \
>    __builtin_aarch64_memtag_irg(__ptr, __u64_mask)
> diff --git a/gcc/config/aarch64/arm_fp16.h b/gcc/config/aarch64/arm_fp16.h
> index 350f8cc33d99e16137e9d70fa7958b10924dc67f..c10f9dcf7e097ded1740955addcd73348649dc56 100644
> --- a/gcc/config/aarch64/arm_fp16.h
> +++ b/gcc/config/aarch64/arm_fp16.h
> @@ -30,7 +30,7 @@
>  #include <stdint.h>
>  
>  #pragma GCC push_options
> -#pragma GCC target ("arch=armv8.2-a+fp16")
> +#pragma GCC target ("+nothing+fp16")
>  
>  typedef __fp16 float16_t;
>  
> diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
> index 0ace1eeddb97443433c091d2363403fcf2907654..349f3167699447eb397af482eaeadf8a07617025 100644
> --- a/gcc/config/aarch64/arm_neon.h
> +++ b/gcc/config/aarch64/arm_neon.h
> @@ -25590,7 +25590,7 @@ __INTERLEAVE_LIST (zip)
>  #include "arm_fp16.h"
>  
>  #pragma GCC push_options
> -#pragma GCC target ("arch=armv8.2-a+fp16")
> +#pragma GCC target ("+nothing+simd+fp16")
>  
>  /* ARMv8.2-A FP16 one operand vector intrinsics.  */
>  
> @@ -26753,7 +26753,7 @@ vminnmvq_f16 (float16x8_t __a)
>  /* AdvSIMD Dot Product intrinsics.  */
>  
>  #pragma GCC push_options
> -#pragma GCC target ("arch=armv8.2-a+dotprod")
> +#pragma GCC target ("+nothing+dotprod")
>  
>  __extension__ extern __inline uint32x2_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> @@ -26844,7 +26844,7 @@ vdotq_laneq_s32 (int32x4_t __r, int8x16_t __a, int8x16_t __b, const int __index)
>  #pragma GCC pop_options
>  
>  #pragma GCC push_options
> -#pragma GCC target ("arch=armv8.2-a+sm4")
> +#pragma GCC target ("+nothing+sm4")
>  
>  __extension__ extern __inline uint32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> @@ -26911,7 +26911,7 @@ vsm4ekeyq_u32 (uint32x4_t __a, uint32x4_t __b)
>  #pragma GCC pop_options
>  
>  #pragma GCC push_options
> -#pragma GCC target ("arch=armv8.2-a+sha3")
> +#pragma GCC target ("+nothing+sha3")
>  
>  __extension__ extern __inline uint64x2_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> @@ -27547,7 +27547,7 @@ vcmlaq_rot270_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b,
>  #pragma GCC pop_options
>  
>  #pragma GCC push_options
> -#pragma GCC target ("arch=armv8.2-a+fp16fml")
> +#pragma GCC target ("+nothing+fp16fml")
>  
>  __extension__ extern __inline float32x2_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> @@ -27856,7 +27856,7 @@ vrnd64xq_f64 (float64x2_t __a)
>  #include "arm_bf16.h"
>  
>  #pragma GCC push_options
> -#pragma GCC target ("arch=armv8.2-a+bf16")
> +#pragma GCC target ("+nothing+bf16")
>  
>  __extension__ extern __inline bfloat16x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> @@ -28535,7 +28535,7 @@ vst4q_lane_bf16 (bfloat16_t *__ptr, bfloat16x8x4_t __val, const int __lane)
>  /* AdvSIMD 8-bit Integer Matrix Multiply (I8MM) intrinsics.  */
>  
>  #pragma GCC push_options
> -#pragma GCC target ("arch=armv8.2-a+i8mm")
> +#pragma GCC target ("+nothing+i8mm")
>  
>  __extension__ extern __inline int32x2_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> diff --git a/gcc/testsuite/gcc.target/aarch64/feature-bf16-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-bf16-backport.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..3a03255bbb9a3e3da45c9f2ecafaf12685278057
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/feature-bf16-backport.c
> @@ -0,0 +1,10 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=armv8-a+bf16" } */
> +
> +#include <arm_neon.h>
> +
> +float32x4_t bar (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) {
> +        return vbfmlalbq_f32 (r, a, b);
> +}
> +
> +/* { dg-final { scan-assembler {\tbfmlalb\t} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/feature-dotprod-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-dotprod-backport.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..5f66fffa5f5b09a9c04da7b861055e3aa89bccec
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/feature-dotprod-backport.c
> @@ -0,0 +1,10 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=armv8-a+dotprod" } */
> +
> +#include <arm_neon.h>
> +
> +uint32x4_t bar (uint32x4_t r, uint8x16_t a, uint8x16_t b) {
> +        return vdotq_u32(r, a, b);
> +}
> +
> +/* { dg-final { scan-assembler {\tudot\t} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/feature-fp16-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-fp16-backport.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..eb94ae060664eef4275023440c0a18b52ae27b42
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/feature-fp16-backport.c
> @@ -0,0 +1,10 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=armv8-a+fp16" } */
> +
> +#include <arm_neon.h>
> +
> +float16x8_t bar (float16x8_t a, float16x8_t b) {
> +        return vaddq_f16(a, b);
> +}
> +
> +/* { dg-final { scan-assembler {\tfadd\t} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/feature-fp16-scalar-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-fp16-scalar-backport.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..9cde6b5558c2e3699620d0d35ebc3e679fdfe5eb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/feature-fp16-scalar-backport.c
> @@ -0,0 +1,10 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=armv8-a+fp16+nosimd" } */
> +
> +#include <arm_fp16.h>
> +
> +float16_t bar (float16_t a, float16_t b) {
> +        return vaddh_f16(a, b);
> +}
> +
> +/* { dg-final { scan-assembler {\tfadd\t} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/feature-fp16fml-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-fp16fml-backport.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..86c4748b86d9730d433cc206700d7789d996bb6d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/feature-fp16fml-backport.c
> @@ -0,0 +1,10 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=armv8-a+fp16fml" } */
> +
> +#include <arm_neon.h>
> +
> +float32x4_t bar (float32x4_t r, float16x8_t a, float16x8_t b) {
> +        return vfmlalq_high_f16 (r, a, b);
> +}
> +
> +/* { dg-final { scan-assembler {\tfmlal2\t} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/feature-i8mm-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-i8mm-backport.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..6dd0214faab9d470a4f363252d48ac0de92bfe7b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/feature-i8mm-backport.c
> @@ -0,0 +1,10 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=armv8-a+i8mm" } */
> +
> +#include <arm_neon.h>
> +
> +int32x4_t bar (int32x4_t r, int8x16_t a, int8x16_t b) {
> +        return vmmlaq_s32 (r, a, b);
> +}
> +
> +/* { dg-final { scan-assembler {\tsmmla\t} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/feature-memtag-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-memtag-backport.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..5cb071b77ccbd11039d4b8ee9e2cd78a708f2fad
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/feature-memtag-backport.c
> @@ -0,0 +1,10 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=armv8-a+memtag" } */
> +
> +#include <arm_acle.h>
> +
> +int *bar (int *src) {
> +        return __arm_mte_create_random_tag(src, 2<<16-1);
> +}
> +
> +/* { dg-final { scan-assembler {\tirg\t} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/feature-sha3-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-sha3-backport.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..e194f1a10ea12439195c8cf1aae08dae26607c14
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/feature-sha3-backport.c
> @@ -0,0 +1,10 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=armv8-a+sha3" } */
> +
> +#include <arm_neon.h>
> +
> +uint64x2_t bar (uint64x2_t a, uint64x2_t b, uint64x2_t c) {
> +        return vsha512hq_u64(a, b, c);
> +}
> +
> +/* { dg-final { scan-assembler {\tsha512h\t} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/feature-sm4-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-sm4-backport.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..604a58bcb68e5753d2eec661a374b4983ad29088
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/feature-sm4-backport.c
> @@ -0,0 +1,10 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=armv8-a+sm4" } */
> +
> +#include <arm_neon.h>
> +
> +uint32x4_t bar (uint32x4_t a, uint32x4_t b, uint32x4_t c) {
> +        return vsm3tt1aq_u32(a, b, c, 2);
> +}
> +
> +/* { dg-final { scan-assembler {\tsm3tt1a\t} } } */

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [GCC 13 PATCH] aarch64: Remove architecture dependencies from intrinsics
  2023-07-20  6:48       ` Richard Sandiford
@ 2023-07-20  7:37         ` Richard Biener
  2023-07-20  8:22           ` Andrew Carlotti
  0 siblings, 1 reply; 9+ messages in thread
From: Richard Biener @ 2023-07-20  7:37 UTC (permalink / raw)
  To: Richard Sandiford, Andrew Carlotti, gcc-patches

On Thu, Jul 20, 2023 at 8:49 AM Richard Sandiford via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Andrew Carlotti <andrew.carlotti@arm.com> writes:
> > Updated patch to fix the fp16 intrinsic pragmas, and pushed to master.
> > OK to backport to GCC 13?
>
> OK, thanks.

In case you want it in 13.2 please push it really soon, we want to do 13.2 RC1
today.

Richard.

> Richard
>
> > Many intrinsics currently depend on both an architecture version and a
> > feature, despite the corresponding instructions being available within
> > GCC at lower architecture versions.
> >
> > LLVM has already removed these explicit architecture version
> > dependences; this patch does the same for GCC. Note that +fp16 does not
> > imply +simd, so we need to add an explicit +simd for the Neon fp16
> > intrinsics.
> >
> > Binutils did not previously support all of these architecture+feature
> > combinations, but this problem is already reachable from GCC.  For
> > example, compiling the test gcc.target/aarch64/usadv16qi-dotprod.c
> > with -O3 -march=armv8-a+dotprod has resulted in an assembler error since
> > GCC 10.  This is fixed in Binutils 2.41.
> >
> > This patch retains explicit architecture version dependencies for
> > features that do not currently have a separate feature flag.
> >
> > gcc/ChangeLog:
> >
> >  * config/aarch64/aarch64.h (TARGET_MEMTAG): Remove armv8.5
> >  dependency.
> >  * config/aarch64/arm_acle.h: Remove unnecessary armv8.x
> >  dependencies from target pragmas.
> >  * config/aarch64/arm_fp16.h (target): Likewise.
> >  * config/aarch64/arm_neon.h (target): Likewise.
> >
> > gcc/testsuite/ChangeLog:
> >
> >  * gcc.target/aarch64/feature-bf16-backport.c: New test.
> >  * gcc.target/aarch64/feature-dotprod-backport.c: New test.
> >  * gcc.target/aarch64/feature-fp16-backport.c: New test.
> >  * gcc.target/aarch64/feature-fp16-scalar-backport.c: New test.
> >  * gcc.target/aarch64/feature-fp16fml-backport.c: New test.
> >  * gcc.target/aarch64/feature-i8mm-backport.c: New test.
> >  * gcc.target/aarch64/feature-memtag-backport.c: New test.
> >  * gcc.target/aarch64/feature-sha3-backport.c: New test.
> >  * gcc.target/aarch64/feature-sm4-backport.c: New test.
> >
> > ---
> >
> > diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
> > index a01f1ee99d85917941ffba55bc3b4dcac87b41f6..2b0fc97bb71e9d560ae26035c7d7142682e46c38 100644
> > --- a/gcc/config/aarch64/aarch64.h
> > +++ b/gcc/config/aarch64/aarch64.h
> > @@ -292,7 +292,7 @@ enum class aarch64_feature : unsigned char {
> >  #define TARGET_RNG (AARCH64_ISA_RNG)
> >
> >  /* Memory Tagging instructions optional to Armv8.5 enabled through +memtag.  */
> > -#define TARGET_MEMTAG (AARCH64_ISA_V8_5A && AARCH64_ISA_MEMTAG)
> > +#define TARGET_MEMTAG (AARCH64_ISA_MEMTAG)
> >
> >  /* I8MM instructions are enabled through +i8mm.  */
> >  #define TARGET_I8MM (AARCH64_ISA_I8MM)
> > diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h
> > index 3b6b63e6805432b5f1686745f987c52d2967c7c1..7599a32301dadf80760d3cb40a8685d2e6a476fb 100644
> > --- a/gcc/config/aarch64/arm_acle.h
> > +++ b/gcc/config/aarch64/arm_acle.h
> > @@ -292,7 +292,7 @@ __rndrrs (uint64_t *__res)
> >  #pragma GCC pop_options
> >
> >  #pragma GCC push_options
> > -#pragma GCC target ("arch=armv8.5-a+memtag")
> > +#pragma GCC target ("+nothing+memtag")
> >
> >  #define __arm_mte_create_random_tag(__ptr, __u64_mask) \
> >    __builtin_aarch64_memtag_irg(__ptr, __u64_mask)
> > diff --git a/gcc/config/aarch64/arm_fp16.h b/gcc/config/aarch64/arm_fp16.h
> > index 350f8cc33d99e16137e9d70fa7958b10924dc67f..c10f9dcf7e097ded1740955addcd73348649dc56 100644
> > --- a/gcc/config/aarch64/arm_fp16.h
> > +++ b/gcc/config/aarch64/arm_fp16.h
> > @@ -30,7 +30,7 @@
> >  #include <stdint.h>
> >
> >  #pragma GCC push_options
> > -#pragma GCC target ("arch=armv8.2-a+fp16")
> > +#pragma GCC target ("+nothing+fp16")
> >
> >  typedef __fp16 float16_t;
> >
> > diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
> > index 0ace1eeddb97443433c091d2363403fcf2907654..349f3167699447eb397af482eaeadf8a07617025 100644
> > --- a/gcc/config/aarch64/arm_neon.h
> > +++ b/gcc/config/aarch64/arm_neon.h
> > @@ -25590,7 +25590,7 @@ __INTERLEAVE_LIST (zip)
> >  #include "arm_fp16.h"
> >
> >  #pragma GCC push_options
> > -#pragma GCC target ("arch=armv8.2-a+fp16")
> > +#pragma GCC target ("+nothing+simd+fp16")
> >
> >  /* ARMv8.2-A FP16 one operand vector intrinsics.  */
> >
> > @@ -26753,7 +26753,7 @@ vminnmvq_f16 (float16x8_t __a)
> >  /* AdvSIMD Dot Product intrinsics.  */
> >
> >  #pragma GCC push_options
> > -#pragma GCC target ("arch=armv8.2-a+dotprod")
> > +#pragma GCC target ("+nothing+dotprod")
> >
> >  __extension__ extern __inline uint32x2_t
> >  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > @@ -26844,7 +26844,7 @@ vdotq_laneq_s32 (int32x4_t __r, int8x16_t __a, int8x16_t __b, const int __index)
> >  #pragma GCC pop_options
> >
> >  #pragma GCC push_options
> > -#pragma GCC target ("arch=armv8.2-a+sm4")
> > +#pragma GCC target ("+nothing+sm4")
> >
> >  __extension__ extern __inline uint32x4_t
> >  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > @@ -26911,7 +26911,7 @@ vsm4ekeyq_u32 (uint32x4_t __a, uint32x4_t __b)
> >  #pragma GCC pop_options
> >
> >  #pragma GCC push_options
> > -#pragma GCC target ("arch=armv8.2-a+sha3")
> > +#pragma GCC target ("+nothing+sha3")
> >
> >  __extension__ extern __inline uint64x2_t
> >  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > @@ -27547,7 +27547,7 @@ vcmlaq_rot270_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b,
> >  #pragma GCC pop_options
> >
> >  #pragma GCC push_options
> > -#pragma GCC target ("arch=armv8.2-a+fp16fml")
> > +#pragma GCC target ("+nothing+fp16fml")
> >
> >  __extension__ extern __inline float32x2_t
> >  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > @@ -27856,7 +27856,7 @@ vrnd64xq_f64 (float64x2_t __a)
> >  #include "arm_bf16.h"
> >
> >  #pragma GCC push_options
> > -#pragma GCC target ("arch=armv8.2-a+bf16")
> > +#pragma GCC target ("+nothing+bf16")
> >
> >  __extension__ extern __inline bfloat16x4_t
> >  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > @@ -28535,7 +28535,7 @@ vst4q_lane_bf16 (bfloat16_t *__ptr, bfloat16x8x4_t __val, const int __lane)
> >  /* AdvSIMD 8-bit Integer Matrix Multiply (I8MM) intrinsics.  */
> >
> >  #pragma GCC push_options
> > -#pragma GCC target ("arch=armv8.2-a+i8mm")
> > +#pragma GCC target ("+nothing+i8mm")
> >
> >  __extension__ extern __inline int32x2_t
> >  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > diff --git a/gcc/testsuite/gcc.target/aarch64/feature-bf16-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-bf16-backport.c
> > new file mode 100644
> > index 0000000000000000000000000000000000000000..3a03255bbb9a3e3da45c9f2ecafaf12685278057
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/feature-bf16-backport.c
> > @@ -0,0 +1,10 @@
> > +/* { dg-do compile } */
> > +/* { dg-additional-options "-march=armv8-a+bf16" } */
> > +
> > +#include <arm_neon.h>
> > +
> > +float32x4_t bar (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) {
> > +        return vbfmlalbq_f32 (r, a, b);
> > +}
> > +
> > +/* { dg-final { scan-assembler {\tbfmlalb\t} } } */
> > diff --git a/gcc/testsuite/gcc.target/aarch64/feature-dotprod-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-dotprod-backport.c
> > new file mode 100644
> > index 0000000000000000000000000000000000000000..5f66fffa5f5b09a9c04da7b861055e3aa89bccec
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/feature-dotprod-backport.c
> > @@ -0,0 +1,10 @@
> > +/* { dg-do compile } */
> > +/* { dg-additional-options "-march=armv8-a+dotprod" } */
> > +
> > +#include <arm_neon.h>
> > +
> > +uint32x4_t bar (uint32x4_t r, uint8x16_t a, uint8x16_t b) {
> > +        return vdotq_u32(r, a, b);
> > +}
> > +
> > +/* { dg-final { scan-assembler {\tudot\t} } } */
> > diff --git a/gcc/testsuite/gcc.target/aarch64/feature-fp16-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-fp16-backport.c
> > new file mode 100644
> > index 0000000000000000000000000000000000000000..eb94ae060664eef4275023440c0a18b52ae27b42
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/feature-fp16-backport.c
> > @@ -0,0 +1,10 @@
> > +/* { dg-do compile } */
> > +/* { dg-additional-options "-march=armv8-a+fp16" } */
> > +
> > +#include <arm_neon.h>
> > +
> > +float16x8_t bar (float16x8_t a, float16x8_t b) {
> > +        return vaddq_f16(a, b);
> > +}
> > +
> > +/* { dg-final { scan-assembler {\tfadd\t} } } */
> > diff --git a/gcc/testsuite/gcc.target/aarch64/feature-fp16-scalar-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-fp16-scalar-backport.c
> > new file mode 100644
> > index 0000000000000000000000000000000000000000..9cde6b5558c2e3699620d0d35ebc3e679fdfe5eb
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/feature-fp16-scalar-backport.c
> > @@ -0,0 +1,10 @@
> > +/* { dg-do compile } */
> > +/* { dg-additional-options "-march=armv8-a+fp16+nosimd" } */
> > +
> > +#include <arm_fp16.h>
> > +
> > +float16_t bar (float16_t a, float16_t b) {
> > +        return vaddh_f16(a, b);
> > +}
> > +
> > +/* { dg-final { scan-assembler {\tfadd\t} } } */
> > diff --git a/gcc/testsuite/gcc.target/aarch64/feature-fp16fml-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-fp16fml-backport.c
> > new file mode 100644
> > index 0000000000000000000000000000000000000000..86c4748b86d9730d433cc206700d7789d996bb6d
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/feature-fp16fml-backport.c
> > @@ -0,0 +1,10 @@
> > +/* { dg-do compile } */
> > +/* { dg-additional-options "-march=armv8-a+fp16fml" } */
> > +
> > +#include <arm_neon.h>
> > +
> > +float32x4_t bar (float32x4_t r, float16x8_t a, float16x8_t b) {
> > +        return vfmlalq_high_f16 (r, a, b);
> > +}
> > +
> > +/* { dg-final { scan-assembler {\tfmlal2\t} } } */
> > diff --git a/gcc/testsuite/gcc.target/aarch64/feature-i8mm-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-i8mm-backport.c
> > new file mode 100644
> > index 0000000000000000000000000000000000000000..6dd0214faab9d470a4f363252d48ac0de92bfe7b
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/feature-i8mm-backport.c
> > @@ -0,0 +1,10 @@
> > +/* { dg-do compile } */
> > +/* { dg-additional-options "-march=armv8-a+i8mm" } */
> > +
> > +#include <arm_neon.h>
> > +
> > +int32x4_t bar (int32x4_t r, int8x16_t a, int8x16_t b) {
> > +        return vmmlaq_s32 (r, a, b);
> > +}
> > +
> > +/* { dg-final { scan-assembler {\tsmmla\t} } } */
> > diff --git a/gcc/testsuite/gcc.target/aarch64/feature-memtag-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-memtag-backport.c
> > new file mode 100644
> > index 0000000000000000000000000000000000000000..5cb071b77ccbd11039d4b8ee9e2cd78a708f2fad
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/feature-memtag-backport.c
> > @@ -0,0 +1,10 @@
> > +/* { dg-do compile } */
> > +/* { dg-additional-options "-march=armv8-a+memtag" } */
> > +
> > +#include <arm_acle.h>
> > +
> > +int *bar (int *src) {
> > +        return __arm_mte_create_random_tag(src, 2<<16-1);
> > +}
> > +
> > +/* { dg-final { scan-assembler {\tirg\t} } } */
> > diff --git a/gcc/testsuite/gcc.target/aarch64/feature-sha3-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-sha3-backport.c
> > new file mode 100644
> > index 0000000000000000000000000000000000000000..e194f1a10ea12439195c8cf1aae08dae26607c14
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/feature-sha3-backport.c
> > @@ -0,0 +1,10 @@
> > +/* { dg-do compile } */
> > +/* { dg-additional-options "-march=armv8-a+sha3" } */
> > +
> > +#include <arm_neon.h>
> > +
> > +uint64x2_t bar (uint64x2_t a, uint64x2_t b, uint64x2_t c) {
> > +        return vsha512hq_u64(a, b, c);
> > +}
> > +
> > +/* { dg-final { scan-assembler {\tsha512h\t} } } */
> > diff --git a/gcc/testsuite/gcc.target/aarch64/feature-sm4-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-sm4-backport.c
> > new file mode 100644
> > index 0000000000000000000000000000000000000000..604a58bcb68e5753d2eec661a374b4983ad29088
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/feature-sm4-backport.c
> > @@ -0,0 +1,10 @@
> > +/* { dg-do compile } */
> > +/* { dg-additional-options "-march=armv8-a+sm4" } */
> > +
> > +#include <arm_neon.h>
> > +
> > +uint32x4_t bar (uint32x4_t a, uint32x4_t b, uint32x4_t c) {
> > +        return vsm3tt1aq_u32(a, b, c, 2);
> > +}
> > +
> > +/* { dg-final { scan-assembler {\tsm3tt1a\t} } } */

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [GCC 13 PATCH] aarch64: Remove architecture dependencies from intrinsics
  2023-07-20  7:37         ` Richard Biener
@ 2023-07-20  8:22           ` Andrew Carlotti
  0 siblings, 0 replies; 9+ messages in thread
From: Andrew Carlotti @ 2023-07-20  8:22 UTC (permalink / raw)
  To: Richard Biener; +Cc: Richard Sandiford, gcc-patches

On Thu, Jul 20, 2023 at 09:37:14AM +0200, Richard Biener wrote:
> On Thu, Jul 20, 2023 at 8:49 AM Richard Sandiford via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > Andrew Carlotti <andrew.carlotti@arm.com> writes:
> > > Updated patch to fix the fp16 intrinsic pragmas, and pushed to master.
> > > OK to backport to GCC 13?
> >
> > OK, thanks.
> 
> In case you want it in 13.2 please push it really soon, we want to do 13.2 RC1
> today.
> 
> Richard.

Pushed, thanks.

> 
> > Richard
> >
> > > Many intrinsics currently depend on both an architecture version and a
> > > feature, despite the corresponding instructions being available within
> > > GCC at lower architecture versions.
> > >
> > > LLVM has already removed these explicit architecture version
> > > dependences; this patch does the same for GCC. Note that +fp16 does not
> > > imply +simd, so we need to add an explicit +simd for the Neon fp16
> > > intrinsics.
> > >
> > > Binutils did not previously support all of these architecture+feature
> > > combinations, but this problem is already reachable from GCC.  For
> > > example, compiling the test gcc.target/aarch64/usadv16qi-dotprod.c
> > > with -O3 -march=armv8-a+dotprod has resulted in an assembler error since
> > > GCC 10.  This is fixed in Binutils 2.41.
> > >
> > > This patch retains explicit architecture version dependencies for
> > > features that do not currently have a separate feature flag.
> > >
> > > gcc/ChangeLog:
> > >
> > >  * config/aarch64/aarch64.h (TARGET_MEMTAG): Remove armv8.5
> > >  dependency.
> > >  * config/aarch64/arm_acle.h: Remove unnecessary armv8.x
> > >  dependencies from target pragmas.
> > >  * config/aarch64/arm_fp16.h (target): Likewise.
> > >  * config/aarch64/arm_neon.h (target): Likewise.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > >  * gcc.target/aarch64/feature-bf16-backport.c: New test.
> > >  * gcc.target/aarch64/feature-dotprod-backport.c: New test.
> > >  * gcc.target/aarch64/feature-fp16-backport.c: New test.
> > >  * gcc.target/aarch64/feature-fp16-scalar-backport.c: New test.
> > >  * gcc.target/aarch64/feature-fp16fml-backport.c: New test.
> > >  * gcc.target/aarch64/feature-i8mm-backport.c: New test.
> > >  * gcc.target/aarch64/feature-memtag-backport.c: New test.
> > >  * gcc.target/aarch64/feature-sha3-backport.c: New test.
> > >  * gcc.target/aarch64/feature-sm4-backport.c: New test.
> > >
> > > ---
> > >
> > > diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
> > > index a01f1ee99d85917941ffba55bc3b4dcac87b41f6..2b0fc97bb71e9d560ae26035c7d7142682e46c38 100644
> > > --- a/gcc/config/aarch64/aarch64.h
> > > +++ b/gcc/config/aarch64/aarch64.h
> > > @@ -292,7 +292,7 @@ enum class aarch64_feature : unsigned char {
> > >  #define TARGET_RNG (AARCH64_ISA_RNG)
> > >
> > >  /* Memory Tagging instructions optional to Armv8.5 enabled through +memtag.  */
> > > -#define TARGET_MEMTAG (AARCH64_ISA_V8_5A && AARCH64_ISA_MEMTAG)
> > > +#define TARGET_MEMTAG (AARCH64_ISA_MEMTAG)
> > >
> > >  /* I8MM instructions are enabled through +i8mm.  */
> > >  #define TARGET_I8MM (AARCH64_ISA_I8MM)
> > > diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h
> > > index 3b6b63e6805432b5f1686745f987c52d2967c7c1..7599a32301dadf80760d3cb40a8685d2e6a476fb 100644
> > > --- a/gcc/config/aarch64/arm_acle.h
> > > +++ b/gcc/config/aarch64/arm_acle.h
> > > @@ -292,7 +292,7 @@ __rndrrs (uint64_t *__res)
> > >  #pragma GCC pop_options
> > >
> > >  #pragma GCC push_options
> > > -#pragma GCC target ("arch=armv8.5-a+memtag")
> > > +#pragma GCC target ("+nothing+memtag")
> > >
> > >  #define __arm_mte_create_random_tag(__ptr, __u64_mask) \
> > >    __builtin_aarch64_memtag_irg(__ptr, __u64_mask)
> > > diff --git a/gcc/config/aarch64/arm_fp16.h b/gcc/config/aarch64/arm_fp16.h
> > > index 350f8cc33d99e16137e9d70fa7958b10924dc67f..c10f9dcf7e097ded1740955addcd73348649dc56 100644
> > > --- a/gcc/config/aarch64/arm_fp16.h
> > > +++ b/gcc/config/aarch64/arm_fp16.h
> > > @@ -30,7 +30,7 @@
> > >  #include <stdint.h>
> > >
> > >  #pragma GCC push_options
> > > -#pragma GCC target ("arch=armv8.2-a+fp16")
> > > +#pragma GCC target ("+nothing+fp16")
> > >
> > >  typedef __fp16 float16_t;
> > >
> > > diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
> > > index 0ace1eeddb97443433c091d2363403fcf2907654..349f3167699447eb397af482eaeadf8a07617025 100644
> > > --- a/gcc/config/aarch64/arm_neon.h
> > > +++ b/gcc/config/aarch64/arm_neon.h
> > > @@ -25590,7 +25590,7 @@ __INTERLEAVE_LIST (zip)
> > >  #include "arm_fp16.h"
> > >
> > >  #pragma GCC push_options
> > > -#pragma GCC target ("arch=armv8.2-a+fp16")
> > > +#pragma GCC target ("+nothing+simd+fp16")
> > >
> > >  /* ARMv8.2-A FP16 one operand vector intrinsics.  */
> > >
> > > @@ -26753,7 +26753,7 @@ vminnmvq_f16 (float16x8_t __a)
> > >  /* AdvSIMD Dot Product intrinsics.  */
> > >
> > >  #pragma GCC push_options
> > > -#pragma GCC target ("arch=armv8.2-a+dotprod")
> > > +#pragma GCC target ("+nothing+dotprod")
> > >
> > >  __extension__ extern __inline uint32x2_t
> > >  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > > @@ -26844,7 +26844,7 @@ vdotq_laneq_s32 (int32x4_t __r, int8x16_t __a, int8x16_t __b, const int __index)
> > >  #pragma GCC pop_options
> > >
> > >  #pragma GCC push_options
> > > -#pragma GCC target ("arch=armv8.2-a+sm4")
> > > +#pragma GCC target ("+nothing+sm4")
> > >
> > >  __extension__ extern __inline uint32x4_t
> > >  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > > @@ -26911,7 +26911,7 @@ vsm4ekeyq_u32 (uint32x4_t __a, uint32x4_t __b)
> > >  #pragma GCC pop_options
> > >
> > >  #pragma GCC push_options
> > > -#pragma GCC target ("arch=armv8.2-a+sha3")
> > > +#pragma GCC target ("+nothing+sha3")
> > >
> > >  __extension__ extern __inline uint64x2_t
> > >  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > > @@ -27547,7 +27547,7 @@ vcmlaq_rot270_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b,
> > >  #pragma GCC pop_options
> > >
> > >  #pragma GCC push_options
> > > -#pragma GCC target ("arch=armv8.2-a+fp16fml")
> > > +#pragma GCC target ("+nothing+fp16fml")
> > >
> > >  __extension__ extern __inline float32x2_t
> > >  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > > @@ -27856,7 +27856,7 @@ vrnd64xq_f64 (float64x2_t __a)
> > >  #include "arm_bf16.h"
> > >
> > >  #pragma GCC push_options
> > > -#pragma GCC target ("arch=armv8.2-a+bf16")
> > > +#pragma GCC target ("+nothing+bf16")
> > >
> > >  __extension__ extern __inline bfloat16x4_t
> > >  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > > @@ -28535,7 +28535,7 @@ vst4q_lane_bf16 (bfloat16_t *__ptr, bfloat16x8x4_t __val, const int __lane)
> > >  /* AdvSIMD 8-bit Integer Matrix Multiply (I8MM) intrinsics.  */
> > >
> > >  #pragma GCC push_options
> > > -#pragma GCC target ("arch=armv8.2-a+i8mm")
> > > +#pragma GCC target ("+nothing+i8mm")
> > >
> > >  __extension__ extern __inline int32x2_t
> > >  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > > diff --git a/gcc/testsuite/gcc.target/aarch64/feature-bf16-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-bf16-backport.c
> > > new file mode 100644
> > > index 0000000000000000000000000000000000000000..3a03255bbb9a3e3da45c9f2ecafaf12685278057
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/aarch64/feature-bf16-backport.c
> > > @@ -0,0 +1,10 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-additional-options "-march=armv8-a+bf16" } */
> > > +
> > > +#include <arm_neon.h>
> > > +
> > > +float32x4_t bar (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) {
> > > +        return vbfmlalbq_f32 (r, a, b);
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler {\tbfmlalb\t} } } */
> > > diff --git a/gcc/testsuite/gcc.target/aarch64/feature-dotprod-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-dotprod-backport.c
> > > new file mode 100644
> > > index 0000000000000000000000000000000000000000..5f66fffa5f5b09a9c04da7b861055e3aa89bccec
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/aarch64/feature-dotprod-backport.c
> > > @@ -0,0 +1,10 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-additional-options "-march=armv8-a+dotprod" } */
> > > +
> > > +#include <arm_neon.h>
> > > +
> > > +uint32x4_t bar (uint32x4_t r, uint8x16_t a, uint8x16_t b) {
> > > +        return vdotq_u32(r, a, b);
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler {\tudot\t} } } */
> > > diff --git a/gcc/testsuite/gcc.target/aarch64/feature-fp16-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-fp16-backport.c
> > > new file mode 100644
> > > index 0000000000000000000000000000000000000000..eb94ae060664eef4275023440c0a18b52ae27b42
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/aarch64/feature-fp16-backport.c
> > > @@ -0,0 +1,10 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-additional-options "-march=armv8-a+fp16" } */
> > > +
> > > +#include <arm_neon.h>
> > > +
> > > +float16x8_t bar (float16x8_t a, float16x8_t b) {
> > > +        return vaddq_f16(a, b);
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler {\tfadd\t} } } */
> > > diff --git a/gcc/testsuite/gcc.target/aarch64/feature-fp16-scalar-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-fp16-scalar-backport.c
> > > new file mode 100644
> > > index 0000000000000000000000000000000000000000..9cde6b5558c2e3699620d0d35ebc3e679fdfe5eb
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/aarch64/feature-fp16-scalar-backport.c
> > > @@ -0,0 +1,10 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-additional-options "-march=armv8-a+fp16+nosimd" } */
> > > +
> > > +#include <arm_fp16.h>
> > > +
> > > +float16_t bar (float16_t a, float16_t b) {
> > > +        return vaddh_f16(a, b);
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler {\tfadd\t} } } */
> > > diff --git a/gcc/testsuite/gcc.target/aarch64/feature-fp16fml-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-fp16fml-backport.c
> > > new file mode 100644
> > > index 0000000000000000000000000000000000000000..86c4748b86d9730d433cc206700d7789d996bb6d
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/aarch64/feature-fp16fml-backport.c
> > > @@ -0,0 +1,10 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-additional-options "-march=armv8-a+fp16fml" } */
> > > +
> > > +#include <arm_neon.h>
> > > +
> > > +float32x4_t bar (float32x4_t r, float16x8_t a, float16x8_t b) {
> > > +        return vfmlalq_high_f16 (r, a, b);
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler {\tfmlal2\t} } } */
> > > diff --git a/gcc/testsuite/gcc.target/aarch64/feature-i8mm-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-i8mm-backport.c
> > > new file mode 100644
> > > index 0000000000000000000000000000000000000000..6dd0214faab9d470a4f363252d48ac0de92bfe7b
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/aarch64/feature-i8mm-backport.c
> > > @@ -0,0 +1,10 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-additional-options "-march=armv8-a+i8mm" } */
> > > +
> > > +#include <arm_neon.h>
> > > +
> > > +int32x4_t bar (int32x4_t r, int8x16_t a, int8x16_t b) {
> > > +        return vmmlaq_s32 (r, a, b);
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler {\tsmmla\t} } } */
> > > diff --git a/gcc/testsuite/gcc.target/aarch64/feature-memtag-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-memtag-backport.c
> > > new file mode 100644
> > > index 0000000000000000000000000000000000000000..5cb071b77ccbd11039d4b8ee9e2cd78a708f2fad
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/aarch64/feature-memtag-backport.c
> > > @@ -0,0 +1,10 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-additional-options "-march=armv8-a+memtag" } */
> > > +
> > > +#include <arm_acle.h>
> > > +
> > > +int *bar (int *src) {
> > > +        return __arm_mte_create_random_tag(src, 2<<16-1);
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler {\tirg\t} } } */
> > > diff --git a/gcc/testsuite/gcc.target/aarch64/feature-sha3-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-sha3-backport.c
> > > new file mode 100644
> > > index 0000000000000000000000000000000000000000..e194f1a10ea12439195c8cf1aae08dae26607c14
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/aarch64/feature-sha3-backport.c
> > > @@ -0,0 +1,10 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-additional-options "-march=armv8-a+sha3" } */
> > > +
> > > +#include <arm_neon.h>
> > > +
> > > +uint64x2_t bar (uint64x2_t a, uint64x2_t b, uint64x2_t c) {
> > > +        return vsha512hq_u64(a, b, c);
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler {\tsha512h\t} } } */
> > > diff --git a/gcc/testsuite/gcc.target/aarch64/feature-sm4-backport.c b/gcc/testsuite/gcc.target/aarch64/feature-sm4-backport.c
> > > new file mode 100644
> > > index 0000000000000000000000000000000000000000..604a58bcb68e5753d2eec661a374b4983ad29088
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/aarch64/feature-sm4-backport.c
> > > @@ -0,0 +1,10 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-additional-options "-march=armv8-a+sm4" } */
> > > +
> > > +#include <arm_neon.h>
> > > +
> > > +uint32x4_t bar (uint32x4_t a, uint32x4_t b, uint32x4_t c) {
> > > +        return vsm3tt1aq_u32(a, b, c, 2);
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler {\tsm3tt1a\t} } } */

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2023-07-20  8:22 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-06-26 13:55 [PATCH] aarch64: Remove architecture dependencies from intrinsics Andrew Carlotti
2023-06-27  6:23 ` Richard Sandiford
2023-06-29 18:24   ` Andrew Carlotti
2023-07-19 16:44     ` [GCC 13 PATCH] " Andrew Carlotti
2023-07-19 18:35       ` Ramana Radhakrishnan
2023-07-19 23:27         ` Andrew Carlotti
2023-07-20  6:48       ` Richard Sandiford
2023-07-20  7:37         ` Richard Biener
2023-07-20  8:22           ` Andrew Carlotti

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).