* [PATCH 1/5] aarch64: rcpc3: Add +rcpc3 extension
2023-11-09 14:12 [PATCH 0/5] aarch64: Add ACLE intrinsics codegen support for lrcpc3 instructions Victor Do Nascimento
@ 2023-11-09 14:12 ` Victor Do Nascimento
2023-11-24 12:03 ` Richard Sandiford
2023-11-09 14:12 ` [PATCH 2/5] aarch64: rcpc3: Add relevant iterators to handle Neon intrinsics Victor Do Nascimento
` (3 subsequent siblings)
4 siblings, 1 reply; 12+ messages in thread
From: Victor Do Nascimento @ 2023-11-09 14:12 UTC (permalink / raw)
To: gcc-patches
Cc: kyrylo.tkachov, richard.sandiford, Richard.Earnshaw,
Victor Do Nascimento
Given the optional LRCPC3 target support for Armv8.2-a cores onwards,
the +rcpc3 arch feature modifier is added to GCC's command-line options.
gcc/ChangeLog:
* config/aarch64/aarch64-option-extensions.def (rcpc3): New.
* config/aarch64/aarch64.h (AARCH64_ISA_RCPC3): Likewise.
(TARGET_RCPC3): Likewise.
* doc/invoke.texi (rcpc3): Document feature in AArch64 Options.
---
gcc/config/aarch64/aarch64-option-extensions.def | 1 +
gcc/config/aarch64/aarch64.h | 4 ++++
gcc/doc/invoke.texi | 4 ++++
3 files changed, 9 insertions(+)
diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def
index 825f3bf7758..2ab94799d34 100644
--- a/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
@@ -151,4 +151,5 @@ AARCH64_OPT_EXTENSION("mops", MOPS, (), (), (), "")
AARCH64_OPT_EXTENSION("cssc", CSSC, (), (), (), "cssc")
+AARCH64_OPT_EXTENSION("rcpc3", RCPC3, (), (), (), "rcpc3")
#undef AARCH64_OPT_EXTENSION
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 2f0777a37ac..68bbaccef1a 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -213,6 +213,7 @@ enum class aarch64_feature : unsigned char {
#define AARCH64_ISA_F64MM (aarch64_isa_flags & AARCH64_FL_F64MM)
#define AARCH64_ISA_BF16 (aarch64_isa_flags & AARCH64_FL_BF16)
#define AARCH64_ISA_SB (aarch64_isa_flags & AARCH64_FL_SB)
+#define AARCH64_ISA_RCPC3 (aarch64_isa_flags & AARCH64_FL_RCPC3)
#define AARCH64_ISA_V8R (aarch64_isa_flags & AARCH64_FL_V8R)
#define AARCH64_ISA_PAUTH (aarch64_isa_flags & AARCH64_FL_PAUTH)
#define AARCH64_ISA_V9A (aarch64_isa_flags & AARCH64_FL_V9A)
@@ -344,6 +345,9 @@ enum class aarch64_feature : unsigned char {
and sign-extending versions.*/
#define TARGET_RCPC2 (AARCH64_ISA_RCPC8_4)
+/* RCPC3 LDAP1/STL1 loads/stores from Armv8.2-a. */
+#define TARGET_RCPC3 (AARCH64_ISA_RCPC3)
+
/* Apply the workaround for Cortex-A53 erratum 835769. */
#define TARGET_FIX_ERR_A53_835769 \
((aarch64_fix_a53_err835769 == 2) \
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 6e776a0faa1..ba28eb195ce 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -21028,6 +21028,10 @@ Enable the Flag Manipulation instructions Extension.
Enable the Pointer Authentication Extension.
@item cssc
Enable the Common Short Sequence Compression instructions.
+@item rcpc3
+Enable the RCpc3 extension. This enables the use of the LDAP1 and
+STL1 instructions for loads/stores of 64-bit values to and from SIMD
+register lanes, passing these on to the assembler.
@end table
--
2.41.0
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 1/5] aarch64: rcpc3: Add +rcpc3 extension
2023-11-09 14:12 ` [PATCH 1/5] aarch64: rcpc3: Add +rcpc3 extension Victor Do Nascimento
@ 2023-11-24 12:03 ` Richard Sandiford
0 siblings, 0 replies; 12+ messages in thread
From: Richard Sandiford @ 2023-11-24 12:03 UTC (permalink / raw)
To: Victor Do Nascimento; +Cc: gcc-patches, kyrylo.tkachov, Richard.Earnshaw
Victor Do Nascimento <victor.donascimento@arm.com> writes:
> Given the optional LRCPC3 target support for Armv8.2-a cores onwards,
> the +rcpc3 arch feature modifier is added to GCC's command-line options.
>
> gcc/ChangeLog:
>
> * config/aarch64/aarch64-option-extensions.def (rcpc3): New.
> * config/aarch64/aarch64.h (AARCH64_ISA_RCPC3): Likewise.
> (TARGET_RCPC3): Likewise.
> * doc/invoke.texi (rcpc3): Document feature in AArch64 Options.
> ---
> gcc/config/aarch64/aarch64-option-extensions.def | 1 +
> gcc/config/aarch64/aarch64.h | 4 ++++
> gcc/doc/invoke.texi | 4 ++++
> 3 files changed, 9 insertions(+)
>
> diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def
> index 825f3bf7758..2ab94799d34 100644
> --- a/gcc/config/aarch64/aarch64-option-extensions.def
> +++ b/gcc/config/aarch64/aarch64-option-extensions.def
> @@ -151,4 +151,5 @@ AARCH64_OPT_EXTENSION("mops", MOPS, (), (), (), "")
>
> AARCH64_OPT_EXTENSION("cssc", CSSC, (), (), (), "cssc")
>
> +AARCH64_OPT_EXTENSION("rcpc3", RCPC3, (), (), (), "rcpc3")
> #undef AARCH64_OPT_EXTENSION
> diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
> index 2f0777a37ac..68bbaccef1a 100644
> --- a/gcc/config/aarch64/aarch64.h
> +++ b/gcc/config/aarch64/aarch64.h
> @@ -213,6 +213,7 @@ enum class aarch64_feature : unsigned char {
> #define AARCH64_ISA_F64MM (aarch64_isa_flags & AARCH64_FL_F64MM)
> #define AARCH64_ISA_BF16 (aarch64_isa_flags & AARCH64_FL_BF16)
> #define AARCH64_ISA_SB (aarch64_isa_flags & AARCH64_FL_SB)
> +#define AARCH64_ISA_RCPC3 (aarch64_isa_flags & AARCH64_FL_RCPC3)
> #define AARCH64_ISA_V8R (aarch64_isa_flags & AARCH64_FL_V8R)
> #define AARCH64_ISA_PAUTH (aarch64_isa_flags & AARCH64_FL_PAUTH)
> #define AARCH64_ISA_V9A (aarch64_isa_flags & AARCH64_FL_V9A)
> @@ -344,6 +345,9 @@ enum class aarch64_feature : unsigned char {
> and sign-extending versions.*/
> #define TARGET_RCPC2 (AARCH64_ISA_RCPC8_4)
>
> +/* RCPC3 LDAP1/STL1 loads/stores from Armv8.2-a. */
> +#define TARGET_RCPC3 (AARCH64_ISA_RCPC3)
The extension is more general than that, so maybe just:
/* RCPC3 (Release Consistency) extensions, optional from Armv8.2-A. */
> +
> /* Apply the workaround for Cortex-A53 erratum 835769. */
> #define TARGET_FIX_ERR_A53_835769 \
> ((aarch64_fix_a53_err835769 == 2) \
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index 6e776a0faa1..ba28eb195ce 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -21028,6 +21028,10 @@ Enable the Flag Manipulation instructions Extension.
> Enable the Pointer Authentication Extension.
> @item cssc
> Enable the Common Short Sequence Compression instructions.
> +@item rcpc3
> +Enable the RCpc3 extension. This enables the use of the LDAP1 and
> +STL1 instructions for loads/stores of 64-bit values to and from SIMD
> +register lanes, passing these on to the assembler.
Similarly here, it's probably enough to say:
Enable the RCpc3 (Release Consistency) extension.
OK with those changes, thanks.
Richard
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH 2/5] aarch64: rcpc3: Add relevant iterators to handle Neon intrinsics
2023-11-09 14:12 [PATCH 0/5] aarch64: Add ACLE intrinsics codegen support for lrcpc3 instructions Victor Do Nascimento
2023-11-09 14:12 ` [PATCH 1/5] aarch64: rcpc3: Add +rcpc3 extension Victor Do Nascimento
@ 2023-11-09 14:12 ` Victor Do Nascimento
2023-11-24 11:31 ` Richard Sandiford
2023-11-09 14:12 ` [PATCH 3/5] aarch64: rcpc3: Add Neon ACLE intrinsics Victor Do Nascimento
` (2 subsequent siblings)
4 siblings, 1 reply; 12+ messages in thread
From: Victor Do Nascimento @ 2023-11-09 14:12 UTC (permalink / raw)
To: gcc-patches
Cc: kyrylo.tkachov, richard.sandiford, Richard.Earnshaw,
Victor Do Nascimento
The LDAP1 and STL1 Neon ACLE intrinsics, operating on 64-bit data
values, operate on single-lane (Vt.1D) or twin-lane (Vt.2D) SIMD
register configurations, either in the DI or DF modes. This leads to
the need for a mode iterator accounting for the V1DI, V1DF, V2DI and
V2DF modes.
This patch therefore introduces the new V12DIF mode iterator with
which to generate functions operating on signed 64-bit integer and
float values and V12DIUP for generating the unsigned and
polynomial-type counterparts. Along with this, we modify the
associated mode attributes accordingly in order to allow for the
implementation of the relevant backend patterns for the intrinsics.
gcc/ChangeLog:
* config/aarch64/iterators.md (V12DIF): New.
(V12DUP): Likewise.
(VEL): Add support for all V12DIF-associated modes.
(Vetype): Add support for V1DI and V1DF.
(Vel): Likewise.
---
gcc/config/aarch64/iterators.md | 25 +++++++++++++++++--------
1 file changed, 17 insertions(+), 8 deletions(-)
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index f9e2210095e..471438e27be 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -314,6 +314,12 @@
;; All byte modes.
(define_mode_iterator VB [V8QI V16QI])
+;; 1 and 2 lane DI and DF modes.
+(define_mode_iterator V12DIF [V1DI V1DF V2DI V2DF])
+
+;; 1 and 2 lane DI mode for unsigned and poly types.
+(define_mode_iterator V12DIUP [V1DI V2DI])
+
;; 2 and 4 lane SI modes.
(define_mode_iterator VS [V2SI V4SI])
@@ -1195,10 +1201,10 @@
(define_mode_attr Vetype [(V8QI "b") (V16QI "b")
(V4HI "h") (V8HI "h")
(V2SI "s") (V4SI "s")
- (V2DI "d")
+ (V2DI "d") (V1DI "d")
(V4HF "h") (V8HF "h")
(V2SF "s") (V4SF "s")
- (V2DF "d")
+ (V2DF "d") (V1DF "d")
(V2x8QI "b") (V2x4HI "h")
(V2x2SI "s") (V2x1DI "d")
(V2x4HF "h") (V2x2SF "s")
@@ -1358,10 +1364,12 @@
(define_mode_attr VEL [(V8QI "QI") (V16QI "QI")
(V4HI "HI") (V8HI "HI")
(V2SI "SI") (V4SI "SI")
- (DI "DI") (V2DI "DI")
+ (DI "DI") (V1DI "DI")
+ (V2DI "DI")
(V4HF "HF") (V8HF "HF")
(V2SF "SF") (V4SF "SF")
- (DF "DF") (V2DF "DF")
+ (DF "DF") (V1DF "DF")
+ (V2DF "DF")
(SI "SI") (HI "HI")
(QI "QI")
(V4BF "BF") (V8BF "BF")
@@ -1378,12 +1386,13 @@
(define_mode_attr Vel [(V8QI "qi") (V16QI "qi")
(V4HI "hi") (V8HI "hi")
(V2SI "si") (V4SI "si")
- (DI "di") (V2DI "di")
+ (DI "di") (V1DI "si")
+ (V2DI "di")
(V4HF "hf") (V8HF "hf")
(V2SF "sf") (V4SF "sf")
- (V2DF "df") (DF "df")
- (SI "si") (HI "hi")
- (QI "qi")
+ (V1DF "df") (V2DF "df")
+ (DF "df") (SI "si")
+ (HI "hi") (QI "qi")
(V4BF "bf") (V8BF "bf")
(VNx16QI "qi") (VNx8QI "qi") (VNx4QI "qi") (VNx2QI "qi")
(VNx8HI "hi") (VNx4HI "hi") (VNx2HI "hi")
--
2.41.0
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 2/5] aarch64: rcpc3: Add relevant iterators to handle Neon intrinsics
2023-11-09 14:12 ` [PATCH 2/5] aarch64: rcpc3: Add relevant iterators to handle Neon intrinsics Victor Do Nascimento
@ 2023-11-24 11:31 ` Richard Sandiford
0 siblings, 0 replies; 12+ messages in thread
From: Richard Sandiford @ 2023-11-24 11:31 UTC (permalink / raw)
To: Victor Do Nascimento; +Cc: gcc-patches, kyrylo.tkachov, Richard.Earnshaw
Victor Do Nascimento <victor.donascimento@arm.com> writes:
> The LDAP1 and STL1 Neon ACLE intrinsics, operating on 64-bit data
> values, operate on single-lane (Vt.1D) or twin-lane (Vt.2D) SIMD
> register configurations, either in the DI or DF modes. This leads to
> the need for a mode iterator accounting for the V1DI, V1DF, V2DI and
> V2DF modes.
>
> This patch therefore introduces the new V12DIF mode iterator with
> which to generate functions operating on signed 64-bit integer and
> float values and V12DIUP for generating the unsigned and
> polynomial-type counterparts. Along with this, we modify the
> associated mode attributes accordingly in order to allow for the
> implementation of the relevant backend patterns for the intrinsics.
>
> gcc/ChangeLog:
>
> * config/aarch64/iterators.md (V12DIF): New.
> (V12DUP): Likewise.
> (VEL): Add support for all V12DIF-associated modes.
> (Vetype): Add support for V1DI and V1DF.
> (Vel): Likewise.
> ---
> gcc/config/aarch64/iterators.md | 25 +++++++++++++++++--------
> 1 file changed, 17 insertions(+), 8 deletions(-)
>
> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
> index f9e2210095e..471438e27be 100644
> --- a/gcc/config/aarch64/iterators.md
> +++ b/gcc/config/aarch64/iterators.md
> @@ -314,6 +314,12 @@
> ;; All byte modes.
> (define_mode_iterator VB [V8QI V16QI])
>
> +;; 1 and 2 lane DI and DF modes.
> +(define_mode_iterator V12DIF [V1DI V1DF V2DI V2DF])
> +
> +;; 1 and 2 lane DI mode for unsigned and poly types.
> +(define_mode_iterator V12DIUP [V1DI V2DI])
Probably easiest just to call it V12DI, without the UP. The same
iterator could be useful in other situations.
> +
> ;; 2 and 4 lane SI modes.
> (define_mode_iterator VS [V2SI V4SI])
>
> @@ -1195,10 +1201,10 @@
> (define_mode_attr Vetype [(V8QI "b") (V16QI "b")
> (V4HI "h") (V8HI "h")
> (V2SI "s") (V4SI "s")
> - (V2DI "d")
> + (V2DI "d") (V1DI "d")
> (V4HF "h") (V8HF "h")
> (V2SF "s") (V4SF "s")
> - (V2DF "d")
> + (V2DF "d") (V1DF "d")
> (V2x8QI "b") (V2x4HI "h")
> (V2x2SI "s") (V2x1DI "d")
> (V2x4HF "h") (V2x2SF "s")
> @@ -1358,10 +1364,12 @@
> (define_mode_attr VEL [(V8QI "QI") (V16QI "QI")
> (V4HI "HI") (V8HI "HI")
> (V2SI "SI") (V4SI "SI")
> - (DI "DI") (V2DI "DI")
> + (DI "DI") (V1DI "DI")
> + (V2DI "DI")
Very, very minor, but: would be good to have one fewer space before "DI",
so that the quotes line up.
> (V4HF "HF") (V8HF "HF")
> (V2SF "SF") (V4SF "SF")
> - (DF "DF") (V2DF "DF")
> + (DF "DF") (V1DF "DF")
> + (V2DF "DF")
Same here.
OK for trunk with those changes, thanks.
Richard
> (SI "SI") (HI "HI")
> (QI "QI")
> (V4BF "BF") (V8BF "BF")
> @@ -1378,12 +1386,13 @@
> (define_mode_attr Vel [(V8QI "qi") (V16QI "qi")
> (V4HI "hi") (V8HI "hi")
> (V2SI "si") (V4SI "si")
> - (DI "di") (V2DI "di")
> + (DI "di") (V1DI "si")
> + (V2DI "di")
> (V4HF "hf") (V8HF "hf")
> (V2SF "sf") (V4SF "sf")
> - (V2DF "df") (DF "df")
> - (SI "si") (HI "hi")
> - (QI "qi")
> + (V1DF "df") (V2DF "df")
> + (DF "df") (SI "si")
> + (HI "hi") (QI "qi")
> (V4BF "bf") (V8BF "bf")
> (VNx16QI "qi") (VNx8QI "qi") (VNx4QI "qi") (VNx2QI "qi")
> (VNx8HI "hi") (VNx4HI "hi") (VNx2HI "hi")
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH 3/5] aarch64: rcpc3: Add Neon ACLE intrinsics
2023-11-09 14:12 [PATCH 0/5] aarch64: Add ACLE intrinsics codegen support for lrcpc3 instructions Victor Do Nascimento
2023-11-09 14:12 ` [PATCH 1/5] aarch64: rcpc3: Add +rcpc3 extension Victor Do Nascimento
2023-11-09 14:12 ` [PATCH 2/5] aarch64: rcpc3: Add relevant iterators to handle Neon intrinsics Victor Do Nascimento
@ 2023-11-09 14:12 ` Victor Do Nascimento
2023-11-24 11:38 ` Richard Sandiford
2023-11-09 14:12 ` [PATCH 4/5] aarch64: rcpc3: add Neon ACLE wrapper functions to `arm_neon.h' Victor Do Nascimento
2023-11-09 14:12 ` [PATCH 5/5] aarch64: rcpc3: Add intrinsics tests Victor Do Nascimento
4 siblings, 1 reply; 12+ messages in thread
From: Victor Do Nascimento @ 2023-11-09 14:12 UTC (permalink / raw)
To: gcc-patches
Cc: kyrylo.tkachov, richard.sandiford, Richard.Earnshaw,
Victor Do Nascimento
Register the target specific builtins in `aarch64-simd-builtins.def'
and implement their associated backend patterns in `aarch64-simd.md'.
gcc/ChangeLog:
* config/aarch64/aarch64-simd-builtins.def
(vec_ldap1_lane): New.
(vec_stl1_lane): Likewise.
* config/aarch64/aarch64-simd.md
(aarch64_vec_stl1_lanes<mode>_lane<Vel>): New.
(aarch64_vec_stl1_lane<mode>): Likewise.
(aarch64_vec_ldap1_lanes<mode>_lane<Vel>): Likewise.
(aarch64_vec_ldap1_lane<mode>): Likewise.
---
gcc/config/aarch64/aarch64-simd-builtins.def | 7 +++
gcc/config/aarch64/aarch64-simd.md | 65 ++++++++++++++++++++
gcc/config/aarch64/aarch64.md | 2 +
3 files changed, 74 insertions(+)
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index e2b94ad8247..0ae6c4ad41a 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -43,6 +43,13 @@
help describe the attributes (for example, pure) for the intrinsic
function. */
+ BUILTIN_V12DIF (LOADSTRUCT_LANE, vec_ldap1_lane, 0, ALL)
+ BUILTIN_V12DIUP (LOADSTRUCT_LANE_U, vec_ldap1_lane, 0, ALL)
+ BUILTIN_V12DIUP (LOADSTRUCT_LANE_P, vec_ldap1_lane, 0, ALL)
+ BUILTIN_V12DIF (STORESTRUCT_LANE, vec_stl1_lane, 0, ALL)
+ BUILTIN_V12DIUP (STORESTRUCT_LANE_U, vec_stl1_lane, 0, ALL)
+ BUILTIN_V12DIUP (STORESTRUCT_LANE_P, vec_stl1_lane, 0, ALL)
+
BUILTIN_VDC (BINOP, combine, 0, AUTO_FP)
BUILTIN_VD_I (BINOPU, combine, 0, NONE)
BUILTIN_VDC_P (BINOPP, combine, 0, NONE)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 81ff5bad03d..79697336f61 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -7697,6 +7697,71 @@
DONE;
})
+;; Patterns for rcpc3 vector lane loads and stores.
+
+(define_insn "aarch64_vec_stl1_lanes<mode>_lane<Vel>"
+ [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Q")
+ (unspec:BLK [(match_operand:V12DIF 1 "register_operand" "w")
+ (match_operand:SI 2 "immediate_operand" "i")]
+ UNSPEC_STL1_LANE))]
+ "TARGET_RCPC3"
+ {
+ operands[2] = aarch64_endian_lane_rtx (<MODE>mode,
+ INTVAL (operands[2]));
+ return "stl1\\t{%S1.<Vetype>}[%2], %0";
+ }
+ [(set_attr "type" "neon_store2_one_lane")]
+)
+
+(define_expand "aarch64_vec_stl1_lane<mode>"
+ [(match_operand:DI 0 "register_operand")
+ (match_operand:V12DIF 1 "register_operand")
+ (match_operand:SI 2 "immediate_operand")]
+ "TARGET_RCPC3"
+{
+ rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
+ set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)));
+
+ aarch64_simd_lane_bounds (operands[2], 0,
+ GET_MODE_NUNITS (<MODE>mode).to_constant (), NULL);
+ emit_insn (gen_aarch64_vec_stl1_lanes<mode>_lane<Vel> (mem,
+ operands[1], operands[2]));
+ DONE;
+})
+
+(define_insn "aarch64_vec_ldap1_lanes<mode>_lane<Vel>"
+ [(set (match_operand:V12DIF 0 "register_operand" "=w")
+ (unspec:V12DIF [
+ (match_operand:BLK 1 "aarch64_simd_struct_operand" "Q")
+ (match_operand:V12DIF 2 "register_operand" "0")
+ (match_operand:SI 3 "immediate_operand" "i")]
+ UNSPEC_LDAP1_LANE))]
+ "TARGET_RCPC3"
+ {
+ operands[3] = aarch64_endian_lane_rtx (<MODE>mode,
+ INTVAL (operands[3]));
+ return "ldap1\\t{%S0.<Vetype>}[%3], %1";
+ }
+ [(set_attr "type" "neon_load2_one_lane")]
+)
+
+(define_expand "aarch64_vec_ldap1_lane<mode>"
+ [(match_operand:V12DIF 0 "register_operand")
+ (match_operand:DI 1 "register_operand")
+ (match_operand:V12DIF 2 "register_operand")
+ (match_operand:SI 3 "immediate_operand")]
+ "TARGET_RCPC3"
+{
+ rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
+ set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)));
+
+ aarch64_simd_lane_bounds (operands[3], 0,
+ GET_MODE_NUNITS (<MODE>mode).to_constant (), NULL);
+ emit_insn (gen_aarch64_vec_ldap1_lanes<mode>_lane<Vel> (operands[0],
+ mem, operands[2], operands[3]));
+ DONE;
+})
+
(define_insn_and_split "aarch64_rev_reglist<mode>"
[(set (match_operand:VSTRUCT_QD 0 "register_operand" "=&w")
(unspec:VSTRUCT_QD
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 5bb8c772be8..fb6de3b1fbf 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -295,6 +295,8 @@
UNSPEC_LD1RO
UNSPEC_SALT_ADDR
UNSPECV_PATCHABLE_AREA
+ UNSPEC_LDAP1_LANE
+ UNSPEC_STL1_LANE
])
(define_c_enum "unspecv" [
--
2.41.0
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 3/5] aarch64: rcpc3: Add Neon ACLE intrinsics
2023-11-09 14:12 ` [PATCH 3/5] aarch64: rcpc3: Add Neon ACLE intrinsics Victor Do Nascimento
@ 2023-11-24 11:38 ` Richard Sandiford
0 siblings, 0 replies; 12+ messages in thread
From: Richard Sandiford @ 2023-11-24 11:38 UTC (permalink / raw)
To: Victor Do Nascimento; +Cc: gcc-patches, kyrylo.tkachov, Richard.Earnshaw
Victor Do Nascimento <victor.donascimento@arm.com> writes:
> Register the target specific builtins in `aarch64-simd-builtins.def'
> and implement their associated backend patterns in `aarch64-simd.md'.
>
> gcc/ChangeLog:
>
> * config/aarch64/aarch64-simd-builtins.def
> (vec_ldap1_lane): New.
> (vec_stl1_lane): Likewise.
> * config/aarch64/aarch64-simd.md
> (aarch64_vec_stl1_lanes<mode>_lane<Vel>): New.
> (aarch64_vec_stl1_lane<mode>): Likewise.
> (aarch64_vec_ldap1_lanes<mode>_lane<Vel>): Likewise.
> (aarch64_vec_ldap1_lane<mode>): Likewise.
OK, thanks.
Richard
> ---
> gcc/config/aarch64/aarch64-simd-builtins.def | 7 +++
> gcc/config/aarch64/aarch64-simd.md | 65 ++++++++++++++++++++
> gcc/config/aarch64/aarch64.md | 2 +
> 3 files changed, 74 insertions(+)
>
> diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
> index e2b94ad8247..0ae6c4ad41a 100644
> --- a/gcc/config/aarch64/aarch64-simd-builtins.def
> +++ b/gcc/config/aarch64/aarch64-simd-builtins.def
> @@ -43,6 +43,13 @@
> help describe the attributes (for example, pure) for the intrinsic
> function. */
>
> + BUILTIN_V12DIF (LOADSTRUCT_LANE, vec_ldap1_lane, 0, ALL)
> + BUILTIN_V12DIUP (LOADSTRUCT_LANE_U, vec_ldap1_lane, 0, ALL)
> + BUILTIN_V12DIUP (LOADSTRUCT_LANE_P, vec_ldap1_lane, 0, ALL)
> + BUILTIN_V12DIF (STORESTRUCT_LANE, vec_stl1_lane, 0, ALL)
> + BUILTIN_V12DIUP (STORESTRUCT_LANE_U, vec_stl1_lane, 0, ALL)
> + BUILTIN_V12DIUP (STORESTRUCT_LANE_P, vec_stl1_lane, 0, ALL)
> +
> BUILTIN_VDC (BINOP, combine, 0, AUTO_FP)
> BUILTIN_VD_I (BINOPU, combine, 0, NONE)
> BUILTIN_VDC_P (BINOPP, combine, 0, NONE)
> diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
> index 81ff5bad03d..79697336f61 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -7697,6 +7697,71 @@
> DONE;
> })
>
> +;; Patterns for rcpc3 vector lane loads and stores.
> +
> +(define_insn "aarch64_vec_stl1_lanes<mode>_lane<Vel>"
> + [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Q")
> + (unspec:BLK [(match_operand:V12DIF 1 "register_operand" "w")
> + (match_operand:SI 2 "immediate_operand" "i")]
> + UNSPEC_STL1_LANE))]
> + "TARGET_RCPC3"
> + {
> + operands[2] = aarch64_endian_lane_rtx (<MODE>mode,
> + INTVAL (operands[2]));
> + return "stl1\\t{%S1.<Vetype>}[%2], %0";
> + }
> + [(set_attr "type" "neon_store2_one_lane")]
> +)
> +
> +(define_expand "aarch64_vec_stl1_lane<mode>"
> + [(match_operand:DI 0 "register_operand")
> + (match_operand:V12DIF 1 "register_operand")
> + (match_operand:SI 2 "immediate_operand")]
> + "TARGET_RCPC3"
> +{
> + rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
> + set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)));
> +
> + aarch64_simd_lane_bounds (operands[2], 0,
> + GET_MODE_NUNITS (<MODE>mode).to_constant (), NULL);
> + emit_insn (gen_aarch64_vec_stl1_lanes<mode>_lane<Vel> (mem,
> + operands[1], operands[2]));
> + DONE;
> +})
> +
> +(define_insn "aarch64_vec_ldap1_lanes<mode>_lane<Vel>"
> + [(set (match_operand:V12DIF 0 "register_operand" "=w")
> + (unspec:V12DIF [
> + (match_operand:BLK 1 "aarch64_simd_struct_operand" "Q")
> + (match_operand:V12DIF 2 "register_operand" "0")
> + (match_operand:SI 3 "immediate_operand" "i")]
> + UNSPEC_LDAP1_LANE))]
> + "TARGET_RCPC3"
> + {
> + operands[3] = aarch64_endian_lane_rtx (<MODE>mode,
> + INTVAL (operands[3]));
> + return "ldap1\\t{%S0.<Vetype>}[%3], %1";
> + }
> + [(set_attr "type" "neon_load2_one_lane")]
> +)
> +
> +(define_expand "aarch64_vec_ldap1_lane<mode>"
> + [(match_operand:V12DIF 0 "register_operand")
> + (match_operand:DI 1 "register_operand")
> + (match_operand:V12DIF 2 "register_operand")
> + (match_operand:SI 3 "immediate_operand")]
> + "TARGET_RCPC3"
> +{
> + rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
> + set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)));
> +
> + aarch64_simd_lane_bounds (operands[3], 0,
> + GET_MODE_NUNITS (<MODE>mode).to_constant (), NULL);
> + emit_insn (gen_aarch64_vec_ldap1_lanes<mode>_lane<Vel> (operands[0],
> + mem, operands[2], operands[3]));
> + DONE;
> +})
> +
> (define_insn_and_split "aarch64_rev_reglist<mode>"
> [(set (match_operand:VSTRUCT_QD 0 "register_operand" "=&w")
> (unspec:VSTRUCT_QD
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 5bb8c772be8..fb6de3b1fbf 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -295,6 +295,8 @@
> UNSPEC_LD1RO
> UNSPEC_SALT_ADDR
> UNSPECV_PATCHABLE_AREA
> + UNSPEC_LDAP1_LANE
> + UNSPEC_STL1_LANE
> ])
>
> (define_c_enum "unspecv" [
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH 4/5] aarch64: rcpc3: add Neon ACLE wrapper functions to `arm_neon.h'
2023-11-09 14:12 [PATCH 0/5] aarch64: Add ACLE intrinsics codegen support for lrcpc3 instructions Victor Do Nascimento
` (2 preceding siblings ...)
2023-11-09 14:12 ` [PATCH 3/5] aarch64: rcpc3: Add Neon ACLE intrinsics Victor Do Nascimento
@ 2023-11-09 14:12 ` Victor Do Nascimento
2023-11-24 12:05 ` Richard Sandiford
2023-12-07 17:22 ` Prathamesh Kulkarni
2023-11-09 14:12 ` [PATCH 5/5] aarch64: rcpc3: Add intrinsics tests Victor Do Nascimento
4 siblings, 2 replies; 12+ messages in thread
From: Victor Do Nascimento @ 2023-11-09 14:12 UTC (permalink / raw)
To: gcc-patches
Cc: kyrylo.tkachov, richard.sandiford, Richard.Earnshaw,
Victor Do Nascimento
Create the necessary mappings from the ACLE-defined Neon intrinsics
names[1] to the internal builtin function names.
[1] https://arm-software.github.io/acle/neon_intrinsics/advsimd.html
gcc/ChangeLog:
* gcc/config/aarch64/arm_neon.h (vldap1_lane_u64): New.
(vldap1q_lane_u64): Likewise.
(vldap1_lane_s64): Likewise.
(vldap1q_lane_s64): Likewise.
(vldap1_lane_f64): Likewise.
(vldap1q_lane_f64): Likewise.
(vldap1_lane_p64): Likewise.
(vldap1q_lane_p64): Likewise.
(vstl1_lane_u64): Likewise.
(vstl1q_lane_u64): Likewise.
(vstl1_lane_s64): Likewise.
(vstl1q_lane_s64): Likewise.
(vstl1_lane_f64): Likewise.
(vstl1q_lane_f64): Likewise.
(vstl1_lane_p64): Likewise.
(vstl1q_lane_p64): Likewise.
---
gcc/config/aarch64/arm_neon.h | 129 ++++++++++++++++++++++++++++++++++
1 file changed, 129 insertions(+)
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 349f3167699..ef0d75e07ce 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -13446,6 +13446,135 @@ vld1q_lane_u64 (const uint64_t *__src, uint64x2_t __vec, const int __lane)
return __aarch64_vset_lane_any (*__src, __vec, __lane);
}
+#pragma GCC push_options
+#pragma GCC target ("+nothing+rcpc3+simd")
+
+/* vldap1_lane. */
+
+__extension__ extern __inline uint64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vldap1_lane_u64 (const uint64_t *__src, uint64x1_t __vec, const int __lane)
+{
+ return __builtin_aarch64_vec_ldap1_lanev1di_usus (
+ (__builtin_aarch64_simd_di *) __src, __vec, __lane);
+}
+
+__extension__ extern __inline uint64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vldap1q_lane_u64 (const uint64_t *__src, uint64x2_t __vec, const int __lane)
+{
+ return __builtin_aarch64_vec_ldap1_lanev2di_usus (
+ (__builtin_aarch64_simd_di *) __src, __vec, __lane);
+}
+
+__extension__ extern __inline int64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vldap1_lane_s64 (const int64_t *__src, int64x1_t __vec, const int __lane)
+{
+ return __builtin_aarch64_vec_ldap1_lanev1di (__src, __vec, __lane);
+}
+
+__extension__ extern __inline int64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vldap1q_lane_s64 (const int64_t *__src, int64x2_t __vec, const int __lane)
+{
+ return __builtin_aarch64_vec_ldap1_lanev2di (__src, __vec, __lane);
+}
+
+__extension__ extern __inline float64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vldap1_lane_f64 (const float64_t *__src, float64x1_t __vec, const int __lane)
+{
+ return __builtin_aarch64_vec_ldap1_lanev1df (__src, __vec, __lane);
+}
+
+__extension__ extern __inline float64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vldap1q_lane_f64 (const float64_t *__src, float64x2_t __vec, const int __lane)
+{
+ return __builtin_aarch64_vec_ldap1_lanev2df (__src, __vec, __lane);
+}
+
+__extension__ extern __inline poly64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vldap1_lane_p64 (const poly64_t *__src, poly64x1_t __vec, const int __lane)
+{
+ return __builtin_aarch64_vec_ldap1_lanev1di_psps (
+ (__builtin_aarch64_simd_di *) __src, __vec, __lane);
+}
+
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vldap1q_lane_p64 (const poly64_t *__src, poly64x2_t __vec, const int __lane)
+{
+ return __builtin_aarch64_vec_ldap1_lanev2di_psps (
+ (__builtin_aarch64_simd_di *) __src, __vec, __lane);
+}
+
+/* vstl1_lane. */
+
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vstl1_lane_u64 (const uint64_t *__src, uint64x1_t __vec, const int __lane)
+{
+ __builtin_aarch64_vec_stl1_lanev1di_sus ((__builtin_aarch64_simd_di *) __src,
+ __vec, __lane);
+}
+
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vstl1q_lane_u64 (uint64_t *__src, uint64x2_t __vec, const int __lane)
+{
+ __builtin_aarch64_vec_stl1_lanev2di_sus ((__builtin_aarch64_simd_di *) __src,
+ __vec, __lane);
+}
+
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vstl1_lane_s64 (int64_t *__src, int64x1_t __vec, const int __lane)
+{
+ __builtin_aarch64_vec_stl1_lanev1di (__src, __vec, __lane);
+}
+
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vstl1q_lane_s64 (int64_t *__src, int64x2_t __vec, const int __lane)
+{
+ __builtin_aarch64_vec_stl1_lanev2di (__src, __vec, __lane);
+}
+
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vstl1_lane_f64 (float64_t *__src, float64x1_t __vec, const int __lane)
+{
+ __builtin_aarch64_vec_stl1_lanev1df (__src, __vec, __lane);
+}
+
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vstl1q_lane_f64 (float64_t *__src, float64x2_t __vec, const int __lane)
+{
+ __builtin_aarch64_vec_stl1_lanev2df (__src, __vec, __lane);
+}
+
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vstl1_lane_p64 (poly64_t *__src, poly64x1_t __vec, const int __lane)
+{
+ __builtin_aarch64_vec_stl1_lanev1di_sps ((__builtin_aarch64_simd_di *) __src,
+ __vec, __lane);
+}
+
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vstl1q_lane_p64 (poly64_t *__src, poly64x2_t __vec, const int __lane)
+{
+ __builtin_aarch64_vec_stl1_lanev2di_sps ((__builtin_aarch64_simd_di *) __src,
+ __vec, __lane);
+}
+
+#pragma GCC pop_options
+
/* vldn */
__extension__ extern __inline int64x1x2_t
--
2.41.0
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 4/5] aarch64: rcpc3: add Neon ACLE wrapper functions to `arm_neon.h'
2023-11-09 14:12 ` [PATCH 4/5] aarch64: rcpc3: add Neon ACLE wrapper functions to `arm_neon.h' Victor Do Nascimento
@ 2023-11-24 12:05 ` Richard Sandiford
2023-12-07 17:22 ` Prathamesh Kulkarni
1 sibling, 0 replies; 12+ messages in thread
From: Richard Sandiford @ 2023-11-24 12:05 UTC (permalink / raw)
To: Victor Do Nascimento; +Cc: gcc-patches, kyrylo.tkachov, Richard.Earnshaw
Victor Do Nascimento <victor.donascimento@arm.com> writes:
> Create the necessary mappings from the ACLE-defined Neon intrinsics
> names[1] to the internal builtin function names.
>
> [1] https://arm-software.github.io/acle/neon_intrinsics/advsimd.html
>
> gcc/ChangeLog:
>
> * gcc/config/aarch64/arm_neon.h (vldap1_lane_u64): New.
> (vldap1q_lane_u64): Likewise.
> (vldap1_lane_s64): Likewise.
> (vldap1q_lane_s64): Likewise.
> (vldap1_lane_f64): Likewise.
> (vldap1q_lane_f64): Likewise.
> (vldap1_lane_p64): Likewise.
> (vldap1q_lane_p64): Likewise.
> (vstl1_lane_u64): Likewise.
> (vstl1q_lane_u64): Likewise.
> (vstl1_lane_s64): Likewise.
> (vstl1q_lane_s64): Likewise.
> (vstl1_lane_f64): Likewise.
> (vstl1q_lane_f64): Likewise.
> (vstl1_lane_p64): Likewise.
> (vstl1q_lane_p64): Likewise.
OK, thanks.
Richard
> ---
> gcc/config/aarch64/arm_neon.h | 129 ++++++++++++++++++++++++++++++++++
> 1 file changed, 129 insertions(+)
>
> diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
> index 349f3167699..ef0d75e07ce 100644
> --- a/gcc/config/aarch64/arm_neon.h
> +++ b/gcc/config/aarch64/arm_neon.h
> @@ -13446,6 +13446,135 @@ vld1q_lane_u64 (const uint64_t *__src, uint64x2_t __vec, const int __lane)
> return __aarch64_vset_lane_any (*__src, __vec, __lane);
> }
>
> +#pragma GCC push_options
> +#pragma GCC target ("+nothing+rcpc3+simd")
> +
> +/* vldap1_lane. */
> +
> +__extension__ extern __inline uint64x1_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vldap1_lane_u64 (const uint64_t *__src, uint64x1_t __vec, const int __lane)
> +{
> + return __builtin_aarch64_vec_ldap1_lanev1di_usus (
> + (__builtin_aarch64_simd_di *) __src, __vec, __lane);
> +}
> +
> +__extension__ extern __inline uint64x2_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vldap1q_lane_u64 (const uint64_t *__src, uint64x2_t __vec, const int __lane)
> +{
> + return __builtin_aarch64_vec_ldap1_lanev2di_usus (
> + (__builtin_aarch64_simd_di *) __src, __vec, __lane);
> +}
> +
> +__extension__ extern __inline int64x1_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vldap1_lane_s64 (const int64_t *__src, int64x1_t __vec, const int __lane)
> +{
> + return __builtin_aarch64_vec_ldap1_lanev1di (__src, __vec, __lane);
> +}
> +
> +__extension__ extern __inline int64x2_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vldap1q_lane_s64 (const int64_t *__src, int64x2_t __vec, const int __lane)
> +{
> + return __builtin_aarch64_vec_ldap1_lanev2di (__src, __vec, __lane);
> +}
> +
> +__extension__ extern __inline float64x1_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vldap1_lane_f64 (const float64_t *__src, float64x1_t __vec, const int __lane)
> +{
> + return __builtin_aarch64_vec_ldap1_lanev1df (__src, __vec, __lane);
> +}
> +
> +__extension__ extern __inline float64x2_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vldap1q_lane_f64 (const float64_t *__src, float64x2_t __vec, const int __lane)
> +{
> + return __builtin_aarch64_vec_ldap1_lanev2df (__src, __vec, __lane);
> +}
> +
> +__extension__ extern __inline poly64x1_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vldap1_lane_p64 (const poly64_t *__src, poly64x1_t __vec, const int __lane)
> +{
> + return __builtin_aarch64_vec_ldap1_lanev1di_psps (
> + (__builtin_aarch64_simd_di *) __src, __vec, __lane);
> +}
> +
> +__extension__ extern __inline poly64x2_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vldap1q_lane_p64 (const poly64_t *__src, poly64x2_t __vec, const int __lane)
> +{
> + return __builtin_aarch64_vec_ldap1_lanev2di_psps (
> + (__builtin_aarch64_simd_di *) __src, __vec, __lane);
> +}
> +
> +/* vstl1_lane. */
> +
> +__extension__ extern __inline void
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vstl1_lane_u64 (const uint64_t *__src, uint64x1_t __vec, const int __lane)
> +{
> + __builtin_aarch64_vec_stl1_lanev1di_sus ((__builtin_aarch64_simd_di *) __src,
> + __vec, __lane);
> +}
> +
> +__extension__ extern __inline void
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vstl1q_lane_u64 (uint64_t *__src, uint64x2_t __vec, const int __lane)
> +{
> + __builtin_aarch64_vec_stl1_lanev2di_sus ((__builtin_aarch64_simd_di *) __src,
> + __vec, __lane);
> +}
> +
> +__extension__ extern __inline void
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vstl1_lane_s64 (int64_t *__src, int64x1_t __vec, const int __lane)
> +{
> + __builtin_aarch64_vec_stl1_lanev1di (__src, __vec, __lane);
> +}
> +
> +__extension__ extern __inline void
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vstl1q_lane_s64 (int64_t *__src, int64x2_t __vec, const int __lane)
> +{
> + __builtin_aarch64_vec_stl1_lanev2di (__src, __vec, __lane);
> +}
> +
> +__extension__ extern __inline void
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vstl1_lane_f64 (float64_t *__src, float64x1_t __vec, const int __lane)
> +{
> + __builtin_aarch64_vec_stl1_lanev1df (__src, __vec, __lane);
> +}
> +
> +__extension__ extern __inline void
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vstl1q_lane_f64 (float64_t *__src, float64x2_t __vec, const int __lane)
> +{
> + __builtin_aarch64_vec_stl1_lanev2df (__src, __vec, __lane);
> +}
> +
> +__extension__ extern __inline void
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vstl1_lane_p64 (poly64_t *__src, poly64x1_t __vec, const int __lane)
> +{
> + __builtin_aarch64_vec_stl1_lanev1di_sps ((__builtin_aarch64_simd_di *) __src,
> + __vec, __lane);
> +}
> +
> +__extension__ extern __inline void
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vstl1q_lane_p64 (poly64_t *__src, poly64x2_t __vec, const int __lane)
> +{
> + __builtin_aarch64_vec_stl1_lanev2di_sps ((__builtin_aarch64_simd_di *) __src,
> + __vec, __lane);
> +}
> +
> +#pragma GCC pop_options
> +
> /* vldn */
>
> __extension__ extern __inline int64x1x2_t
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 4/5] aarch64: rcpc3: add Neon ACLE wrapper functions to `arm_neon.h'
2023-11-09 14:12 ` [PATCH 4/5] aarch64: rcpc3: add Neon ACLE wrapper functions to `arm_neon.h' Victor Do Nascimento
2023-11-24 12:05 ` Richard Sandiford
@ 2023-12-07 17:22 ` Prathamesh Kulkarni
1 sibling, 0 replies; 12+ messages in thread
From: Prathamesh Kulkarni @ 2023-12-07 17:22 UTC (permalink / raw)
To: Victor Do Nascimento
Cc: gcc-patches, kyrylo.tkachov, richard.sandiford, Richard.Earnshaw
On Thu, 9 Nov 2023 at 19:44, Victor Do Nascimento
<victor.donascimento@arm.com> wrote:
>
> Create the necessary mappings from the ACLE-defined Neon intrinsics
> names[1] to the internal builtin function names.
>
> [1] https://arm-software.github.io/acle/neon_intrinsics/advsimd.html
Hi Victor,
It seems this patch broke kernel build after the recent patch to
upgrade -Wincompatible-pointer-types to an error:
00:00:56 /home/tcwg-buildslave/workspace/tcwg_kernel_1/abe/builds/destdir/x86_64-pc-linux-gnu/lib/gcc/aarch64-linux-gnu/14.0.0/include/arm_neon.h:
In function ‘vldap1_lane_s64’:
00:00:56 /home/tcwg-buildslave/workspace/tcwg_kernel_1/abe/builds/destdir/x86_64-pc-linux-gnu/lib/gcc/aarch64-linux-gnu/14.0.0/include/arm_neon.h:13474:48:
error: passing argument 1 of ‘__builtin_aarch64_vec_ldap1_lanev1di’
from incompatible pointer type [-Wincompatible-pointer-types]
00:00:56 13474 | return __builtin_aarch64_vec_ldap1_lanev1di (__src,
__vec, __lane);
00:00:56 | ^~~~~
00:00:56 | |
00:00:56 | const
int64_t * {aka const long long int *}
00:00:56 /home/tcwg-buildslave/workspace/tcwg_kernel_1/abe/builds/destdir/x86_64-pc-linux-gnu/lib/gcc/aarch64-linux-gnu/14.0.0/include/arm_neon.h:13474:48:
note: expected ‘const long int *’ but argument is of type ‘const
int64_t *’ {aka ‘const long long int *’}
Looking cursorily at the code, should __src be casted to
(__builtin_aarch64_simd_di *) before passing it to
__builtin_aarch64_vec_ldap1_lanev1di ?
For more details, please see:
https://ci.linaro.org/job/tcwg_kernel--gnu-master-aarch64-next-defconfig-build/91/artifact/artifacts/notify/mail-body.txt/*view*/
Thanks,
Prathamesh
>
> gcc/ChangeLog:
>
> * gcc/config/aarch64/arm_neon.h (vldap1_lane_u64): New.
> (vldap1q_lane_u64): Likewise.
> (vldap1_lane_s64): Likewise.
> (vldap1q_lane_s64): Likewise.
> (vldap1_lane_f64): Likewise.
> (vldap1q_lane_f64): Likewise.
> (vldap1_lane_p64): Likewise.
> (vldap1q_lane_p64): Likewise.
> (vstl1_lane_u64): Likewise.
> (vstl1q_lane_u64): Likewise.
> (vstl1_lane_s64): Likewise.
> (vstl1q_lane_s64): Likewise.
> (vstl1_lane_f64): Likewise.
> (vstl1q_lane_f64): Likewise.
> (vstl1_lane_p64): Likewise.
> (vstl1q_lane_p64): Likewise.
> ---
> gcc/config/aarch64/arm_neon.h | 129 ++++++++++++++++++++++++++++++++++
> 1 file changed, 129 insertions(+)
>
> diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
> index 349f3167699..ef0d75e07ce 100644
> --- a/gcc/config/aarch64/arm_neon.h
> +++ b/gcc/config/aarch64/arm_neon.h
> @@ -13446,6 +13446,135 @@ vld1q_lane_u64 (const uint64_t *__src, uint64x2_t __vec, const int __lane)
> return __aarch64_vset_lane_any (*__src, __vec, __lane);
> }
>
> +#pragma GCC push_options
> +#pragma GCC target ("+nothing+rcpc3+simd")
> +
> +/* vldap1_lane. */
> +
> +__extension__ extern __inline uint64x1_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vldap1_lane_u64 (const uint64_t *__src, uint64x1_t __vec, const int __lane)
> +{
> + return __builtin_aarch64_vec_ldap1_lanev1di_usus (
> + (__builtin_aarch64_simd_di *) __src, __vec, __lane);
> +}
> +
> +__extension__ extern __inline uint64x2_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vldap1q_lane_u64 (const uint64_t *__src, uint64x2_t __vec, const int __lane)
> +{
> + return __builtin_aarch64_vec_ldap1_lanev2di_usus (
> + (__builtin_aarch64_simd_di *) __src, __vec, __lane);
> +}
> +
> +__extension__ extern __inline int64x1_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vldap1_lane_s64 (const int64_t *__src, int64x1_t __vec, const int __lane)
> +{
> + return __builtin_aarch64_vec_ldap1_lanev1di (__src, __vec, __lane);
> +}
> +
> +__extension__ extern __inline int64x2_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vldap1q_lane_s64 (const int64_t *__src, int64x2_t __vec, const int __lane)
> +{
> + return __builtin_aarch64_vec_ldap1_lanev2di (__src, __vec, __lane);
> +}
> +
> +__extension__ extern __inline float64x1_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vldap1_lane_f64 (const float64_t *__src, float64x1_t __vec, const int __lane)
> +{
> + return __builtin_aarch64_vec_ldap1_lanev1df (__src, __vec, __lane);
> +}
> +
> +__extension__ extern __inline float64x2_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vldap1q_lane_f64 (const float64_t *__src, float64x2_t __vec, const int __lane)
> +{
> + return __builtin_aarch64_vec_ldap1_lanev2df (__src, __vec, __lane);
> +}
> +
> +__extension__ extern __inline poly64x1_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vldap1_lane_p64 (const poly64_t *__src, poly64x1_t __vec, const int __lane)
> +{
> + return __builtin_aarch64_vec_ldap1_lanev1di_psps (
> + (__builtin_aarch64_simd_di *) __src, __vec, __lane);
> +}
> +
> +__extension__ extern __inline poly64x2_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vldap1q_lane_p64 (const poly64_t *__src, poly64x2_t __vec, const int __lane)
> +{
> + return __builtin_aarch64_vec_ldap1_lanev2di_psps (
> + (__builtin_aarch64_simd_di *) __src, __vec, __lane);
> +}
> +
> +/* vstl1_lane. */
> +
> +__extension__ extern __inline void
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vstl1_lane_u64 (const uint64_t *__src, uint64x1_t __vec, const int __lane)
> +{
> + __builtin_aarch64_vec_stl1_lanev1di_sus ((__builtin_aarch64_simd_di *) __src,
> + __vec, __lane);
> +}
> +
> +__extension__ extern __inline void
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vstl1q_lane_u64 (uint64_t *__src, uint64x2_t __vec, const int __lane)
> +{
> + __builtin_aarch64_vec_stl1_lanev2di_sus ((__builtin_aarch64_simd_di *) __src,
> + __vec, __lane);
> +}
> +
> +__extension__ extern __inline void
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vstl1_lane_s64 (int64_t *__src, int64x1_t __vec, const int __lane)
> +{
> + __builtin_aarch64_vec_stl1_lanev1di (__src, __vec, __lane);
> +}
> +
> +__extension__ extern __inline void
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vstl1q_lane_s64 (int64_t *__src, int64x2_t __vec, const int __lane)
> +{
> + __builtin_aarch64_vec_stl1_lanev2di (__src, __vec, __lane);
> +}
> +
> +__extension__ extern __inline void
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vstl1_lane_f64 (float64_t *__src, float64x1_t __vec, const int __lane)
> +{
> + __builtin_aarch64_vec_stl1_lanev1df (__src, __vec, __lane);
> +}
> +
> +__extension__ extern __inline void
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vstl1q_lane_f64 (float64_t *__src, float64x2_t __vec, const int __lane)
> +{
> + __builtin_aarch64_vec_stl1_lanev2df (__src, __vec, __lane);
> +}
> +
> +__extension__ extern __inline void
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vstl1_lane_p64 (poly64_t *__src, poly64x1_t __vec, const int __lane)
> +{
> + __builtin_aarch64_vec_stl1_lanev1di_sps ((__builtin_aarch64_simd_di *) __src,
> + __vec, __lane);
> +}
> +
> +__extension__ extern __inline void
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vstl1q_lane_p64 (poly64_t *__src, poly64x2_t __vec, const int __lane)
> +{
> + __builtin_aarch64_vec_stl1_lanev2di_sps ((__builtin_aarch64_simd_di *) __src,
> + __vec, __lane);
> +}
> +
> +#pragma GCC pop_options
> +
> /* vldn */
>
> __extension__ extern __inline int64x1x2_t
> --
> 2.41.0
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH 5/5] aarch64: rcpc3: Add intrinsics tests
2023-11-09 14:12 [PATCH 0/5] aarch64: Add ACLE intrinsics codegen support for lrcpc3 instructions Victor Do Nascimento
` (3 preceding siblings ...)
2023-11-09 14:12 ` [PATCH 4/5] aarch64: rcpc3: add Neon ACLE wrapper functions to `arm_neon.h' Victor Do Nascimento
@ 2023-11-09 14:12 ` Victor Do Nascimento
2023-11-24 12:06 ` Richard Sandiford
4 siblings, 1 reply; 12+ messages in thread
From: Victor Do Nascimento @ 2023-11-09 14:12 UTC (permalink / raw)
To: gcc-patches
Cc: kyrylo.tkachov, richard.sandiford, Richard.Earnshaw,
Victor Do Nascimento
Add unit test to ensure that added intrinsics compile to the correct
`LDAP1 {Vt.D}[lane],[Xn]' and `STL1 {Vt.d}[lane],[Xn]' instructions.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/acle/rcpc3.c: New.
---
gcc/testsuite/gcc.target/aarch64/acle/rcpc3.c | 47 +++++++++++++++++++
1 file changed, 47 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/aarch64/acle/rcpc3.c
diff --git a/gcc/testsuite/gcc.target/aarch64/acle/rcpc3.c b/gcc/testsuite/gcc.target/aarch64/acle/rcpc3.c
new file mode 100644
index 00000000000..689d047ab91
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/acle/rcpc3.c
@@ -0,0 +1,47 @@
+/* Test the rcpc3 ACLE intrinsics. */
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv8.2-a+rcpc3" } */
+#include <stdint.h>
+#include <arm_neon.h>
+
+#define TEST_LDAP(TYPE, T) \
+ TYPE##x##1_t T##1_test (TYPE##_t const * ptr, TYPE##x##1_t src) { \
+ return vldap1_lane_##T##64 (ptr, src, 0); \
+ }
+
+#define TEST_LDAPQ(TYPE, T) \
+ TYPE##x##2_t T##2_test (TYPE##_t const * ptr, TYPE##x##2_t src) { \
+ return vldap1q_lane_##T##64 (ptr, src, 1); \
+ }
+
+#define TEST_STL(TYPE, T) \
+ void T##1s_test (TYPE##_t * ptr, TYPE##x##1_t src) { \
+ vstl1_lane_##T##64 (ptr, src, 0); \
+ }
+
+#define TEST_STLQ(TYPE, T) \
+ void T##2s_test (TYPE##_t * ptr, TYPE##x##2_t src) { \
+ vstl1q_lane_##T##64 (ptr, src, 1); \
+ }
+
+TEST_LDAP (uint64, u);
+TEST_LDAP (int64, s);
+TEST_LDAP (float64, f);
+TEST_LDAP (poly64, p);
+/* { dg-final { scan-assembler-times {ldap1\t\{v\d.d\}\[0\], \[x\d\]} 4 } } */
+TEST_LDAPQ (uint64, u);
+TEST_LDAPQ (int64, s);
+TEST_LDAPQ (float64, f);
+TEST_LDAPQ (poly64, p);
+/* { dg-final { scan-assembler-times {ldap1\t\{v\d.d\}\[1\], \[x\d\]} 4 } } */
+
+TEST_STL (uint64, u);
+TEST_STL (int64, s);
+TEST_STL (float64, f);
+TEST_STL (poly64, p);
+/* { dg-final { scan-assembler-times {stl1\t\{v\d.d\}\[0\], \[x\d\]} 4 } } */
+TEST_STLQ (uint64, u);
+TEST_STLQ (int64, s);
+TEST_STLQ (float64, f);
+TEST_STLQ (poly64, p);
+/* { dg-final { scan-assembler-times {stl1\t\{v\d.d\}\[1\], \[x\d\]} 4 } } */
--
2.41.0
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 5/5] aarch64: rcpc3: Add intrinsics tests
2023-11-09 14:12 ` [PATCH 5/5] aarch64: rcpc3: Add intrinsics tests Victor Do Nascimento
@ 2023-11-24 12:06 ` Richard Sandiford
0 siblings, 0 replies; 12+ messages in thread
From: Richard Sandiford @ 2023-11-24 12:06 UTC (permalink / raw)
To: Victor Do Nascimento; +Cc: gcc-patches, kyrylo.tkachov, Richard.Earnshaw
Victor Do Nascimento <victor.donascimento@arm.com> writes:
> Add unit test to ensure that added intrinsics compile to the correct
> `LDAP1 {Vt.D}[lane],[Xn]' and `STL1 {Vt.d}[lane],[Xn]' instructions.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/aarch64/acle/rcpc3.c: New.
OK, thanks.
Richard
> ---
> gcc/testsuite/gcc.target/aarch64/acle/rcpc3.c | 47 +++++++++++++++++++
> 1 file changed, 47 insertions(+)
> create mode 100644 gcc/testsuite/gcc.target/aarch64/acle/rcpc3.c
>
> diff --git a/gcc/testsuite/gcc.target/aarch64/acle/rcpc3.c b/gcc/testsuite/gcc.target/aarch64/acle/rcpc3.c
> new file mode 100644
> index 00000000000..689d047ab91
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/acle/rcpc3.c
> @@ -0,0 +1,47 @@
> +/* Test the rcpc3 ACLE intrinsics. */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -march=armv8.2-a+rcpc3" } */
> +#include <stdint.h>
> +#include <arm_neon.h>
> +
> +#define TEST_LDAP(TYPE, T) \
> + TYPE##x##1_t T##1_test (TYPE##_t const * ptr, TYPE##x##1_t src) { \
> + return vldap1_lane_##T##64 (ptr, src, 0); \
> + }
> +
> +#define TEST_LDAPQ(TYPE, T) \
> + TYPE##x##2_t T##2_test (TYPE##_t const * ptr, TYPE##x##2_t src) { \
> + return vldap1q_lane_##T##64 (ptr, src, 1); \
> + }
> +
> +#define TEST_STL(TYPE, T) \
> + void T##1s_test (TYPE##_t * ptr, TYPE##x##1_t src) { \
> + vstl1_lane_##T##64 (ptr, src, 0); \
> + }
> +
> +#define TEST_STLQ(TYPE, T) \
> + void T##2s_test (TYPE##_t * ptr, TYPE##x##2_t src) { \
> + vstl1q_lane_##T##64 (ptr, src, 1); \
> + }
> +
> +TEST_LDAP (uint64, u);
> +TEST_LDAP (int64, s);
> +TEST_LDAP (float64, f);
> +TEST_LDAP (poly64, p);
> +/* { dg-final { scan-assembler-times {ldap1\t\{v\d.d\}\[0\], \[x\d\]} 4 } } */
> +TEST_LDAPQ (uint64, u);
> +TEST_LDAPQ (int64, s);
> +TEST_LDAPQ (float64, f);
> +TEST_LDAPQ (poly64, p);
> +/* { dg-final { scan-assembler-times {ldap1\t\{v\d.d\}\[1\], \[x\d\]} 4 } } */
> +
> +TEST_STL (uint64, u);
> +TEST_STL (int64, s);
> +TEST_STL (float64, f);
> +TEST_STL (poly64, p);
> +/* { dg-final { scan-assembler-times {stl1\t\{v\d.d\}\[0\], \[x\d\]} 4 } } */
> +TEST_STLQ (uint64, u);
> +TEST_STLQ (int64, s);
> +TEST_STLQ (float64, f);
> +TEST_STLQ (poly64, p);
> +/* { dg-final { scan-assembler-times {stl1\t\{v\d.d\}\[1\], \[x\d\]} 4 } } */
^ permalink raw reply [flat|nested] 12+ messages in thread