* [PATCH]AArch64 Rewrite simd move immediate patterns to new syntax
@ 2023-09-27 0:52 Tamar Christina
2023-09-27 1:27 ` Ramana Radhakrishnan
2023-09-27 10:41 ` Richard Sandiford
0 siblings, 2 replies; 4+ messages in thread
From: Tamar Christina @ 2023-09-27 0:52 UTC (permalink / raw)
To: gcc-patches
Cc: nd, Richard.Earnshaw, Marcus.Shawcroft, Kyrylo.Tkachov,
richard.sandiford
[-- Attachment #1: Type: text/plain, Size: 5850 bytes --]
Hi All,
This rewrites the simd MOV patterns to use the new compact syntax.
No change in semantics is expected. This will be needed in follow on patches.
This also merges the splits into the define_insn which will also be needed soon.
Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
Ok for master?
Thanks,
Tamar
gcc/ChangeLog:
PR tree-optimization/109154
* config/aarch64/aarch64-simd.md (*aarch64_simd_mov<VDMOV:mode>):
Rewrite to new syntax.
(*aarch64_simd_mov<VQMOV:mode): Rewrite to new syntax and merge in
splits.
--- inline copy of patch --
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index e955691f1be8830efacc237465119764ce2a4942..7b4d5a37a9795fefda785aaacc246918826ed0a2 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -143,54 +143,57 @@ (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
)
(define_insn "*aarch64_simd_mov<VDMOV:mode>"
- [(set (match_operand:VDMOV 0 "nonimmediate_operand"
- "=w, r, m, m, m, w, ?r, ?w, ?r, w, w")
- (match_operand:VDMOV 1 "general_operand"
- "m, m, Dz, w, r, w, w, r, r, Dn, Dz"))]
+ [(set (match_operand:VDMOV 0 "nonimmediate_operand")
+ (match_operand:VDMOV 1 "general_operand"))]
"TARGET_FLOAT
&& (register_operand (operands[0], <MODE>mode)
|| aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
- "@
- ldr\t%d0, %1
- ldr\t%x0, %1
- str\txzr, %0
- str\t%d1, %0
- str\t%x1, %0
- * return TARGET_SIMD ? \"mov\t%0.<Vbtype>, %1.<Vbtype>\" : \"fmov\t%d0, %d1\";
- * return TARGET_SIMD ? \"umov\t%0, %1.d[0]\" : \"fmov\t%x0, %d1\";
- fmov\t%d0, %1
- mov\t%0, %1
- * return aarch64_output_simd_mov_immediate (operands[1], 64);
- fmov\t%d0, xzr"
- [(set_attr "type" "neon_load1_1reg<q>, load_8, store_8, neon_store1_1reg<q>,\
- store_8, neon_logic<q>, neon_to_gp<q>, f_mcr,\
- mov_reg, neon_move<q>, f_mcr")
- (set_attr "arch" "*,*,*,*,*,*,*,*,*,simd,*")]
-)
-
-(define_insn "*aarch64_simd_mov<VQMOV:mode>"
- [(set (match_operand:VQMOV 0 "nonimmediate_operand"
- "=w, Umn, m, w, ?r, ?w, ?r, w, w")
- (match_operand:VQMOV 1 "general_operand"
- "m, Dz, w, w, w, r, r, Dn, Dz"))]
+ {@ [cons: =0, 1; attrs: type, arch]
+ [w , m ; neon_load1_1reg<q> , * ] ldr\t%d0, %1
+ [r , m ; load_8 , * ] ldr\t%x0, %1
+ [m , Dz; store_8 , * ] str\txzr, %0
+ [m , w ; neon_store1_1reg<q>, * ] str\t%d1, %0
+ [m , r ; store_8 , * ] str\t%x1, %0
+ [w , w ; neon_logic<q> , simd] mov\t%0.<Vbtype>, %1.<Vbtype>
+ [w , w ; neon_logic<q> , * ] fmov\t%d0, %d1
+ [?r, w ; neon_to_gp<q> , simd] umov\t%0, %1.d[0]
+ [?r, w ; neon_to_gp<q> , * ] fmov\t%x0, %d1
+ [?w, r ; f_mcr , * ] fmov\t%d0, %1
+ [?r, r ; mov_reg , * ] mov\t%0, %1
+ [w , Dn; neon_move<q> , simd] << aarch64_output_simd_mov_immediate (operands[1], 64);
+ [w , Dz; f_mcr , * ] fmov\t%d0, xzr
+ }
+)
+
+(define_insn_and_split "*aarch64_simd_mov<VQMOV:mode>"
+ [(set (match_operand:VQMOV 0 "nonimmediate_operand")
+ (match_operand:VQMOV 1 "general_operand"))]
"TARGET_FLOAT
&& (register_operand (operands[0], <MODE>mode)
|| aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
- "@
- ldr\t%q0, %1
- stp\txzr, xzr, %0
- str\t%q1, %0
- mov\t%0.<Vbtype>, %1.<Vbtype>
- #
- #
- #
- * return aarch64_output_simd_mov_immediate (operands[1], 128);
- fmov\t%d0, xzr"
- [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
- neon_logic<q>, multiple, multiple,\
- multiple, neon_move<q>, fmov")
- (set_attr "length" "4,4,4,4,8,8,8,4,4")
- (set_attr "arch" "*,*,*,simd,*,*,*,simd,*")]
+ {@ [cons: =0, 1; attrs: type, arch, length]
+ [w , m ; neon_load1_1reg<q> , * , 4] ldr\t%q0, %1
+ [Umn, Dz; store_16 , * , 4] stp\txzr, xzr, %0
+ [m , w ; neon_store1_1reg<q>, * , 4] str\t%q1, %0
+ [w , w ; neon_logic<q> , simd, 4] mov\t%0.<Vbtype>, %1.<Vbtype>
+ [?r , w ; multiple , * , 8] #
+ [?w , r ; multiple , * , 8] #
+ [?r , r ; multiple , * , 8] #
+ [w , Dn; neon_move<q> , simd, 4] << aarch64_output_simd_mov_immediate (operands[1], 128);
+ [w , Dz; fmov , * , 4] fmov\t%d0, xzr
+ }
+ "&& reload_completed
+ && !(FP_REGNUM_P (REGNO (operands[0]))
+ && FP_REGNUM_P (REGNO (operands[1])))"
+ [(const_int 0)]
+ {
+ if (GP_REGNUM_P (REGNO (operands[0]))
+ && GP_REGNUM_P (REGNO (operands[1])))
+ aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
+ else
+ aarch64_split_simd_move (operands[0], operands[1]);
+ DONE;
+ }
)
;; When storing lane zero we can use the normal STR and its more permissive
@@ -276,33 +279,6 @@ (define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
[(set_attr "type" "neon_stp_q")]
)
-
-(define_split
- [(set (match_operand:VQMOV 0 "register_operand" "")
- (match_operand:VQMOV 1 "register_operand" ""))]
- "TARGET_FLOAT
- && reload_completed
- && GP_REGNUM_P (REGNO (operands[0]))
- && GP_REGNUM_P (REGNO (operands[1]))"
- [(const_int 0)]
-{
- aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
- DONE;
-})
-
-(define_split
- [(set (match_operand:VQMOV 0 "register_operand" "")
- (match_operand:VQMOV 1 "register_operand" ""))]
- "TARGET_FLOAT
- && reload_completed
- && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
- || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
- [(const_int 0)]
-{
- aarch64_split_simd_move (operands[0], operands[1]);
- DONE;
-})
-
(define_expand "@aarch64_split_simd_mov<mode>"
[(set (match_operand:VQMOV 0)
(match_operand:VQMOV 1))]
--
[-- Attachment #2: rb17721.patch --]
[-- Type: text/plain, Size: 5268 bytes --]
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index e955691f1be8830efacc237465119764ce2a4942..7b4d5a37a9795fefda785aaacc246918826ed0a2 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -143,54 +143,57 @@ (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
)
(define_insn "*aarch64_simd_mov<VDMOV:mode>"
- [(set (match_operand:VDMOV 0 "nonimmediate_operand"
- "=w, r, m, m, m, w, ?r, ?w, ?r, w, w")
- (match_operand:VDMOV 1 "general_operand"
- "m, m, Dz, w, r, w, w, r, r, Dn, Dz"))]
+ [(set (match_operand:VDMOV 0 "nonimmediate_operand")
+ (match_operand:VDMOV 1 "general_operand"))]
"TARGET_FLOAT
&& (register_operand (operands[0], <MODE>mode)
|| aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
- "@
- ldr\t%d0, %1
- ldr\t%x0, %1
- str\txzr, %0
- str\t%d1, %0
- str\t%x1, %0
- * return TARGET_SIMD ? \"mov\t%0.<Vbtype>, %1.<Vbtype>\" : \"fmov\t%d0, %d1\";
- * return TARGET_SIMD ? \"umov\t%0, %1.d[0]\" : \"fmov\t%x0, %d1\";
- fmov\t%d0, %1
- mov\t%0, %1
- * return aarch64_output_simd_mov_immediate (operands[1], 64);
- fmov\t%d0, xzr"
- [(set_attr "type" "neon_load1_1reg<q>, load_8, store_8, neon_store1_1reg<q>,\
- store_8, neon_logic<q>, neon_to_gp<q>, f_mcr,\
- mov_reg, neon_move<q>, f_mcr")
- (set_attr "arch" "*,*,*,*,*,*,*,*,*,simd,*")]
-)
-
-(define_insn "*aarch64_simd_mov<VQMOV:mode>"
- [(set (match_operand:VQMOV 0 "nonimmediate_operand"
- "=w, Umn, m, w, ?r, ?w, ?r, w, w")
- (match_operand:VQMOV 1 "general_operand"
- "m, Dz, w, w, w, r, r, Dn, Dz"))]
+ {@ [cons: =0, 1; attrs: type, arch]
+ [w , m ; neon_load1_1reg<q> , * ] ldr\t%d0, %1
+ [r , m ; load_8 , * ] ldr\t%x0, %1
+ [m , Dz; store_8 , * ] str\txzr, %0
+ [m , w ; neon_store1_1reg<q>, * ] str\t%d1, %0
+ [m , r ; store_8 , * ] str\t%x1, %0
+ [w , w ; neon_logic<q> , simd] mov\t%0.<Vbtype>, %1.<Vbtype>
+ [w , w ; neon_logic<q> , * ] fmov\t%d0, %d1
+ [?r, w ; neon_to_gp<q> , simd] umov\t%0, %1.d[0]
+ [?r, w ; neon_to_gp<q> , * ] fmov\t%x0, %d1
+ [?w, r ; f_mcr , * ] fmov\t%d0, %1
+ [?r, r ; mov_reg , * ] mov\t%0, %1
+ [w , Dn; neon_move<q> , simd] << aarch64_output_simd_mov_immediate (operands[1], 64);
+ [w , Dz; f_mcr , * ] fmov\t%d0, xzr
+ }
+)
+
+(define_insn_and_split "*aarch64_simd_mov<VQMOV:mode>"
+ [(set (match_operand:VQMOV 0 "nonimmediate_operand")
+ (match_operand:VQMOV 1 "general_operand"))]
"TARGET_FLOAT
&& (register_operand (operands[0], <MODE>mode)
|| aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
- "@
- ldr\t%q0, %1
- stp\txzr, xzr, %0
- str\t%q1, %0
- mov\t%0.<Vbtype>, %1.<Vbtype>
- #
- #
- #
- * return aarch64_output_simd_mov_immediate (operands[1], 128);
- fmov\t%d0, xzr"
- [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
- neon_logic<q>, multiple, multiple,\
- multiple, neon_move<q>, fmov")
- (set_attr "length" "4,4,4,4,8,8,8,4,4")
- (set_attr "arch" "*,*,*,simd,*,*,*,simd,*")]
+ {@ [cons: =0, 1; attrs: type, arch, length]
+ [w , m ; neon_load1_1reg<q> , * , 4] ldr\t%q0, %1
+ [Umn, Dz; store_16 , * , 4] stp\txzr, xzr, %0
+ [m , w ; neon_store1_1reg<q>, * , 4] str\t%q1, %0
+ [w , w ; neon_logic<q> , simd, 4] mov\t%0.<Vbtype>, %1.<Vbtype>
+ [?r , w ; multiple , * , 8] #
+ [?w , r ; multiple , * , 8] #
+ [?r , r ; multiple , * , 8] #
+ [w , Dn; neon_move<q> , simd, 4] << aarch64_output_simd_mov_immediate (operands[1], 128);
+ [w , Dz; fmov , * , 4] fmov\t%d0, xzr
+ }
+ "&& reload_completed
+ && !(FP_REGNUM_P (REGNO (operands[0]))
+ && FP_REGNUM_P (REGNO (operands[1])))"
+ [(const_int 0)]
+ {
+ if (GP_REGNUM_P (REGNO (operands[0]))
+ && GP_REGNUM_P (REGNO (operands[1])))
+ aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
+ else
+ aarch64_split_simd_move (operands[0], operands[1]);
+ DONE;
+ }
)
;; When storing lane zero we can use the normal STR and its more permissive
@@ -276,33 +279,6 @@ (define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
[(set_attr "type" "neon_stp_q")]
)
-
-(define_split
- [(set (match_operand:VQMOV 0 "register_operand" "")
- (match_operand:VQMOV 1 "register_operand" ""))]
- "TARGET_FLOAT
- && reload_completed
- && GP_REGNUM_P (REGNO (operands[0]))
- && GP_REGNUM_P (REGNO (operands[1]))"
- [(const_int 0)]
-{
- aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
- DONE;
-})
-
-(define_split
- [(set (match_operand:VQMOV 0 "register_operand" "")
- (match_operand:VQMOV 1 "register_operand" ""))]
- "TARGET_FLOAT
- && reload_completed
- && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
- || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
- [(const_int 0)]
-{
- aarch64_split_simd_move (operands[0], operands[1]);
- DONE;
-})
-
(define_expand "@aarch64_split_simd_mov<mode>"
[(set (match_operand:VQMOV 0)
(match_operand:VQMOV 1))]
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH]AArch64 Rewrite simd move immediate patterns to new syntax
2023-09-27 0:52 [PATCH]AArch64 Rewrite simd move immediate patterns to new syntax Tamar Christina
@ 2023-09-27 1:27 ` Ramana Radhakrishnan
2023-09-27 2:40 ` Tamar Christina
2023-09-27 10:41 ` Richard Sandiford
1 sibling, 1 reply; 4+ messages in thread
From: Ramana Radhakrishnan @ 2023-09-27 1:27 UTC (permalink / raw)
To: Tamar Christina
Cc: gcc-patches, nd, Richard.Earnshaw, Marcus.Shawcroft,
Kyrylo.Tkachov, richard.sandiford
On Wed, Sep 27, 2023 at 1:53 AM Tamar Christina <tamar.christina@arm.com> wrote:
>
> Hi All,
>
> This rewrites the simd MOV patterns to use the new compact syntax.
> No change in semantics is expected. This will be needed in follow on patches.
>
> This also merges the splits into the define_insn which will also be needed soon.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
> PR tree-optimization/109154
> * config/aarch64/aarch64-simd.md (*aarch64_simd_mov<VDMOV:mode>):
> Rewrite to new syntax.
> (*aarch64_simd_mov<VQMOV:mode): Rewrite to new syntax and merge in
> splits.
>
> --- inline copy of patch --
> diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
> index e955691f1be8830efacc237465119764ce2a4942..7b4d5a37a9795fefda785aaacc246918826ed0a2 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -143,54 +143,57 @@ (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
> )
>
> (define_insn "*aarch64_simd_mov<VDMOV:mode>"
> - [(set (match_operand:VDMOV 0 "nonimmediate_operand"
> - "=w, r, m, m, m, w, ?r, ?w, ?r, w, w")
> - (match_operand:VDMOV 1 "general_operand"
> - "m, m, Dz, w, r, w, w, r, r, Dn, Dz"))]
> + [(set (match_operand:VDMOV 0 "nonimmediate_operand")
> + (match_operand:VDMOV 1 "general_operand"))]
> "TARGET_FLOAT
> && (register_operand (operands[0], <MODE>mode)
> || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
> - "@
> - ldr\t%d0, %1
> - ldr\t%x0, %1
> - str\txzr, %0
> - str\t%d1, %0
> - str\t%x1, %0
> - * return TARGET_SIMD ? \"mov\t%0.<Vbtype>, %1.<Vbtype>\" : \"fmov\t%d0, %d1\";
> - * return TARGET_SIMD ? \"umov\t%0, %1.d[0]\" : \"fmov\t%x0, %d1\";
> - fmov\t%d0, %1
> - mov\t%0, %1
> - * return aarch64_output_simd_mov_immediate (operands[1], 64);
> - fmov\t%d0, xzr"
> - [(set_attr "type" "neon_load1_1reg<q>, load_8, store_8, neon_store1_1reg<q>,\
> - store_8, neon_logic<q>, neon_to_gp<q>, f_mcr,\
> - mov_reg, neon_move<q>, f_mcr")
> - (set_attr "arch" "*,*,*,*,*,*,*,*,*,simd,*")]
> -)
> -
> -(define_insn "*aarch64_simd_mov<VQMOV:mode>"
> - [(set (match_operand:VQMOV 0 "nonimmediate_operand"
> - "=w, Umn, m, w, ?r, ?w, ?r, w, w")
> - (match_operand:VQMOV 1 "general_operand"
> - "m, Dz, w, w, w, r, r, Dn, Dz"))]
> + {@ [cons: =0, 1; attrs: type, arch]
> + [w , m ; neon_load1_1reg<q> , * ] ldr\t%d0, %1
> + [r , m ; load_8 , * ] ldr\t%x0, %1
> + [m , Dz; store_8 , * ] str\txzr, %0
> + [m , w ; neon_store1_1reg<q>, * ] str\t%d1, %0
> + [m , r ; store_8 , * ] str\t%x1, %0
This hunk starting here.
> + [w , w ; neon_logic<q> , simd] mov\t%0.<Vbtype>, %1.<Vbtype>
> + [w , w ; neon_logic<q> , * ] fmov\t%d0, %d1
Can you explain how this hunk is equivalent to
return TARGET_SIMD ? \"mov\t%0.<Vbtype>, %1.<Vbtype>\" : \"fmov\t%d0, %d1\"; ?
> + [?r, w ; neon_to_gp<q> , simd] umov\t%0, %1.d[0]
> + [?r, w ; neon_to_gp<q> , * ] fmov\t%x0, %d1
> + [?w, r ; f_mcr , * ] fmov\t%d0, %1
> + [?r, r ; mov_reg , * ] mov\t%0, %1
> + [w , Dn; neon_move<q> , simd] << aarch64_output_simd_mov_immediate (operands[1], 64);
> + [w , Dz; f_mcr , * ] fmov\t%d0, xzr
> + }
> +)
> +
> +(define_insn_and_split "*aarch64_simd_mov<VQMOV:mode>"
> + [(set (match_operand:VQMOV 0 "nonimmediate_operand")
> + (match_operand:VQMOV 1 "general_operand"))]
> "TARGET_FLOAT
> && (register_operand (operands[0], <MODE>mode)
> || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
> - "@
> - ldr\t%q0, %1
> - stp\txzr, xzr, %0
> - str\t%q1, %0
> - mov\t%0.<Vbtype>, %1.<Vbtype>
> - #
> - #
> - #
> - * return aarch64_output_simd_mov_immediate (operands[1], 128);
> - fmov\t%d0, xzr"
> - [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
> - neon_logic<q>, multiple, multiple,\
> - multiple, neon_move<q>, fmov")
> - (set_attr "length" "4,4,4,4,8,8,8,4,4")
> - (set_attr "arch" "*,*,*,simd,*,*,*,simd,*")]
> + {@ [cons: =0, 1; attrs: type, arch, length]
> + [w , m ; neon_load1_1reg<q> , * , 4] ldr\t%q0, %1
> + [Umn, Dz; store_16 , * , 4] stp\txzr, xzr, %0
> + [m , w ; neon_store1_1reg<q>, * , 4] str\t%q1, %0
> + [w , w ; neon_logic<q> , simd, 4] mov\t%0.<Vbtype>, %1.<Vbtype>
> + [?r , w ; multiple , * , 8] #
> + [?w , r ; multiple , * , 8] #
> + [?r , r ; multiple , * , 8] #
> + [w , Dn; neon_move<q> , simd, 4] << aarch64_output_simd_mov_immediate (operands[1], 128);
> + [w , Dz; fmov , * , 4] fmov\t%d0, xzr
> + }
> + "&& reload_completed
> + && !(FP_REGNUM_P (REGNO (operands[0]))
> + && FP_REGNUM_P (REGNO (operands[1])))"
> + [(const_int 0)]
> + {
> + if (GP_REGNUM_P (REGNO (operands[0]))
> + && GP_REGNUM_P (REGNO (operands[1])))
> + aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
> + else
> + aarch64_split_simd_move (operands[0], operands[1]);
> + DONE;
> + }
> )
>
Reads correctly at first glance. Perhaps a sanity check with the
aarch64 simd intrinsics suite, vect.exp or tsvc under a suitable
multilib to give some confidence as to no code changes. ?
Reviewed-by: Ramana Radhakrishnan <ramana@gcc.gnu.org>
regards
Ramana
Ramana
> ;; When storing lane zero we can use the normal STR and its more permissive
> @@ -276,33 +279,6 @@ (define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
> [(set_attr "type" "neon_stp_q")]
> )
>
> -
> -(define_split
> - [(set (match_operand:VQMOV 0 "register_operand" "")
> - (match_operand:VQMOV 1 "register_operand" ""))]
> - "TARGET_FLOAT
> - && reload_completed
> - && GP_REGNUM_P (REGNO (operands[0]))
> - && GP_REGNUM_P (REGNO (operands[1]))"
> - [(const_int 0)]
> -{
> - aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
> - DONE;
> -})
> -
> -(define_split
> - [(set (match_operand:VQMOV 0 "register_operand" "")
> - (match_operand:VQMOV 1 "register_operand" ""))]
> - "TARGET_FLOAT
> - && reload_completed
> - && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
> - || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
> - [(const_int 0)]
> -{
> - aarch64_split_simd_move (operands[0], operands[1]);
> - DONE;
> -})
> -
> (define_expand "@aarch64_split_simd_mov<mode>"
> [(set (match_operand:VQMOV 0)
> (match_operand:VQMOV 1))]
>
>
>
>
> --
^ permalink raw reply [flat|nested] 4+ messages in thread
* RE: [PATCH]AArch64 Rewrite simd move immediate patterns to new syntax
2023-09-27 1:27 ` Ramana Radhakrishnan
@ 2023-09-27 2:40 ` Tamar Christina
0 siblings, 0 replies; 4+ messages in thread
From: Tamar Christina @ 2023-09-27 2:40 UTC (permalink / raw)
To: Ramana Radhakrishnan
Cc: gcc-patches, nd, Richard Earnshaw, Marcus Shawcroft,
Kyrylo Tkachov, Richard Sandiford
> -----Original Message-----
> From: Ramana Radhakrishnan <ramana.gcc@googlemail.com>
> Sent: Wednesday, September 27, 2023 2:28 AM
> To: Tamar Christina <Tamar.Christina@arm.com>
> Cc: gcc-patches@gcc.gnu.org; nd <nd@arm.com>; Richard Earnshaw
> <Richard.Earnshaw@arm.com>; Marcus Shawcroft
> <Marcus.Shawcroft@arm.com>; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>;
> Richard Sandiford <Richard.Sandiford@arm.com>
> Subject: Re: [PATCH]AArch64 Rewrite simd move immediate patterns to new
> syntax
>
> On Wed, Sep 27, 2023 at 1:53 AM Tamar Christina
> <tamar.christina@arm.com> wrote:
> >
> > Hi All,
> >
> > This rewrites the simd MOV patterns to use the new compact syntax.
> > No change in semantics is expected. This will be needed in follow on patches.
> >
> > This also merges the splits into the define_insn which will also be needed
> soon.
> >
> > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> >
> > Ok for master?
> >
> > Thanks,
> > Tamar
> >
> > gcc/ChangeLog:
> >
> > PR tree-optimization/109154
> > * config/aarch64/aarch64-simd.md
> (*aarch64_simd_mov<VDMOV:mode>):
> > Rewrite to new syntax.
> > (*aarch64_simd_mov<VQMOV:mode): Rewrite to new syntax and
> merge in
> > splits.
> >
> > --- inline copy of patch --
> > diff --git a/gcc/config/aarch64/aarch64-simd.md
> > b/gcc/config/aarch64/aarch64-simd.md
> > index
> >
> e955691f1be8830efacc237465119764ce2a4942..7b4d5a37a9795fefda785
> aaacc24
> > 6918826ed0a2 100644
> > --- a/gcc/config/aarch64/aarch64-simd.md
> > +++ b/gcc/config/aarch64/aarch64-simd.md
> > @@ -143,54 +143,57 @@ (define_insn
> "aarch64_dup_lane_<vswap_width_name><mode>"
> > )
> >
> > (define_insn "*aarch64_simd_mov<VDMOV:mode>"
> > - [(set (match_operand:VDMOV 0 "nonimmediate_operand"
> > - "=w, r, m, m, m, w, ?r, ?w, ?r, w, w")
> > - (match_operand:VDMOV 1 "general_operand"
> > - "m, m, Dz, w, r, w, w, r, r, Dn, Dz"))]
> > + [(set (match_operand:VDMOV 0 "nonimmediate_operand")
> > + (match_operand:VDMOV 1 "general_operand"))]
> > "TARGET_FLOAT
> > && (register_operand (operands[0], <MODE>mode)
> > || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
> > - "@
> > - ldr\t%d0, %1
> > - ldr\t%x0, %1
> > - str\txzr, %0
> > - str\t%d1, %0
> > - str\t%x1, %0
> > - * return TARGET_SIMD ? \"mov\t%0.<Vbtype>, %1.<Vbtype>\" :
> \"fmov\t%d0, %d1\";
> > - * return TARGET_SIMD ? \"umov\t%0, %1.d[0]\" : \"fmov\t%x0, %d1\";
> > - fmov\t%d0, %1
> > - mov\t%0, %1
> > - * return aarch64_output_simd_mov_immediate (operands[1], 64);
> > - fmov\t%d0, xzr"
> > - [(set_attr "type" "neon_load1_1reg<q>, load_8, store_8,
> neon_store1_1reg<q>,\
> > - store_8, neon_logic<q>, neon_to_gp<q>, f_mcr,\
> > - mov_reg, neon_move<q>, f_mcr")
> > - (set_attr "arch" "*,*,*,*,*,*,*,*,*,simd,*")]
> > -)
> > -
> > -(define_insn "*aarch64_simd_mov<VQMOV:mode>"
> > - [(set (match_operand:VQMOV 0 "nonimmediate_operand"
> > - "=w, Umn, m, w, ?r, ?w, ?r, w, w")
> > - (match_operand:VQMOV 1 "general_operand"
> > - "m, Dz, w, w, w, r, r, Dn, Dz"))]
> > + {@ [cons: =0, 1; attrs: type, arch]
> > + [w , m ; neon_load1_1reg<q> , * ] ldr\t%d0, %1
> > + [r , m ; load_8 , * ] ldr\t%x0, %1
> > + [m , Dz; store_8 , * ] str\txzr, %0
> > + [m , w ; neon_store1_1reg<q>, * ] str\t%d1, %0
> > + [m , r ; store_8 , * ] str\t%x1, %0
>
> This hunk starting here.
>
> > + [w , w ; neon_logic<q> , simd] mov\t%0.<Vbtype>, %1.<Vbtype>
> > + [w , w ; neon_logic<q> , * ] fmov\t%d0, %d1
>
> Can you explain how this hunk is equivalent to
>
> return TARGET_SIMD ? \"mov\t%0.<Vbtype>, %1.<Vbtype>\" : \"fmov\t%d0,
> %d1\"; ?
>
LRA matches constraints from left to right, or with the new patterns from top to bottom.
So since both alternatives are w, w, their costs would always be the same. In which case
LRA will pick the first matching one. If SIMD is enabled that would be the first one, which
corresponds to TARGET_SIMD. If it's not enabled the second alternative would be chosen.
In fact since w, w would have no losers the second alternative would not even be inspected
If the first one is enabled. So the equivalence is maintained by the order of which alternatives
are processed.
> > + [?r, w ; neon_to_gp<q> , simd] umov\t%0, %1.d[0]
> > + [?r, w ; neon_to_gp<q> , * ] fmov\t%x0, %d1
> > + [?w, r ; f_mcr , * ] fmov\t%d0, %1
> > + [?r, r ; mov_reg , * ] mov\t%0, %1
> > + [w , Dn; neon_move<q> , simd] <<
> aarch64_output_simd_mov_immediate (operands[1], 64);
> > + [w , Dz; f_mcr , * ] fmov\t%d0, xzr
> > + }
> > +)
> > +
> > +(define_insn_and_split "*aarch64_simd_mov<VQMOV:mode>"
> > + [(set (match_operand:VQMOV 0 "nonimmediate_operand")
> > + (match_operand:VQMOV 1 "general_operand"))]
> > "TARGET_FLOAT
> > && (register_operand (operands[0], <MODE>mode)
> > || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
> > - "@
> > - ldr\t%q0, %1
> > - stp\txzr, xzr, %0
> > - str\t%q1, %0
> > - mov\t%0.<Vbtype>, %1.<Vbtype>
> > - #
> > - #
> > - #
> > - * return aarch64_output_simd_mov_immediate (operands[1], 128);
> > - fmov\t%d0, xzr"
> > - [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
> > - neon_logic<q>, multiple, multiple,\
> > - multiple, neon_move<q>, fmov")
> > - (set_attr "length" "4,4,4,4,8,8,8,4,4")
> > - (set_attr "arch" "*,*,*,simd,*,*,*,simd,*")]
> > + {@ [cons: =0, 1; attrs: type, arch, length]
> > + [w , m ; neon_load1_1reg<q> , * , 4] ldr\t%q0, %1
> > + [Umn, Dz; store_16 , * , 4] stp\txzr, xzr, %0
> > + [m , w ; neon_store1_1reg<q>, * , 4] str\t%q1, %0
> > + [w , w ; neon_logic<q> , simd, 4] mov\t%0.<Vbtype>, %1.<Vbtype>
> > + [?r , w ; multiple , * , 8] #
> > + [?w , r ; multiple , * , 8] #
> > + [?r , r ; multiple , * , 8] #
> > + [w , Dn; neon_move<q> , simd, 4] <<
> aarch64_output_simd_mov_immediate (operands[1], 128);
> > + [w , Dz; fmov , * , 4] fmov\t%d0, xzr
> > + }
> > + "&& reload_completed
> > + && !(FP_REGNUM_P (REGNO (operands[0]))
> > + && FP_REGNUM_P (REGNO (operands[1])))"
> > + [(const_int 0)]
> > + {
> > + if (GP_REGNUM_P (REGNO (operands[0]))
> > + && GP_REGNUM_P (REGNO (operands[1])))
> > + aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
> > + else
> > + aarch64_split_simd_move (operands[0], operands[1]);
> > + DONE;
> > + }
> > )
> >
>
>
> Reads correctly at first glance. Perhaps a sanity check with the
> aarch64 simd intrinsics suite, vect.exp or tsvc under a suitable multilib to give
> some confidence as to no code changes. ?
Hmm what multilib did you have in mind? AArch64 only has ILP32 as additional
multilibs.
Regards,
Tamar
>
> Reviewed-by: Ramana Radhakrishnan <ramana@gcc.gnu.org>
>
> regards
> Ramana
>
> Ramana
>
>
>
>
> > ;; When storing lane zero we can use the normal STR and its more
> > permissive @@ -276,33 +279,6 @@ (define_insn
> "vec_store_pair<VQ:mode><VQ2:mode>"
> > [(set_attr "type" "neon_stp_q")]
> > )
> >
> > -
> > -(define_split
> > - [(set (match_operand:VQMOV 0 "register_operand" "")
> > - (match_operand:VQMOV 1 "register_operand" ""))]
> > - "TARGET_FLOAT
> > - && reload_completed
> > - && GP_REGNUM_P (REGNO (operands[0]))
> > - && GP_REGNUM_P (REGNO (operands[1]))"
> > - [(const_int 0)]
> > -{
> > - aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
> > - DONE;
> > -})
> > -
> > -(define_split
> > - [(set (match_operand:VQMOV 0 "register_operand" "")
> > - (match_operand:VQMOV 1 "register_operand" ""))]
> > - "TARGET_FLOAT
> > - && reload_completed
> > - && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO
> (operands[1])))
> > - || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P
> (REGNO (operands[1]))))"
> > - [(const_int 0)]
> > -{
> > - aarch64_split_simd_move (operands[0], operands[1]);
> > - DONE;
> > -})
> > -
> > (define_expand "@aarch64_split_simd_mov<mode>"
> > [(set (match_operand:VQMOV 0)
> > (match_operand:VQMOV 1))]
> >
> >
> >
> >
> > --
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH]AArch64 Rewrite simd move immediate patterns to new syntax
2023-09-27 0:52 [PATCH]AArch64 Rewrite simd move immediate patterns to new syntax Tamar Christina
2023-09-27 1:27 ` Ramana Radhakrishnan
@ 2023-09-27 10:41 ` Richard Sandiford
1 sibling, 0 replies; 4+ messages in thread
From: Richard Sandiford @ 2023-09-27 10:41 UTC (permalink / raw)
To: Tamar Christina
Cc: gcc-patches, nd, Richard.Earnshaw, Marcus.Shawcroft, Kyrylo.Tkachov
Tamar Christina <tamar.christina@arm.com> writes:
> Hi All,
>
> This rewrites the simd MOV patterns to use the new compact syntax.
> No change in semantics is expected. This will be needed in follow on patches.
>
> This also merges the splits into the define_insn which will also be needed soon.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
> PR tree-optimization/109154
> * config/aarch64/aarch64-simd.md (*aarch64_simd_mov<VDMOV:mode>):
> Rewrite to new syntax.
> (*aarch64_simd_mov<VQMOV:mode): Rewrite to new syntax and merge in
> splits.
>
> --- inline copy of patch --
> diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
> index e955691f1be8830efacc237465119764ce2a4942..7b4d5a37a9795fefda785aaacc246918826ed0a2 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -143,54 +143,57 @@ (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
> )
>
> (define_insn "*aarch64_simd_mov<VDMOV:mode>"
> - [(set (match_operand:VDMOV 0 "nonimmediate_operand"
> - "=w, r, m, m, m, w, ?r, ?w, ?r, w, w")
> - (match_operand:VDMOV 1 "general_operand"
> - "m, m, Dz, w, r, w, w, r, r, Dn, Dz"))]
> + [(set (match_operand:VDMOV 0 "nonimmediate_operand")
> + (match_operand:VDMOV 1 "general_operand"))]
> "TARGET_FLOAT
> && (register_operand (operands[0], <MODE>mode)
> || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
> - "@
> - ldr\t%d0, %1
> - ldr\t%x0, %1
> - str\txzr, %0
> - str\t%d1, %0
> - str\t%x1, %0
> - * return TARGET_SIMD ? \"mov\t%0.<Vbtype>, %1.<Vbtype>\" : \"fmov\t%d0, %d1\";
> - * return TARGET_SIMD ? \"umov\t%0, %1.d[0]\" : \"fmov\t%x0, %d1\";
> - fmov\t%d0, %1
> - mov\t%0, %1
> - * return aarch64_output_simd_mov_immediate (operands[1], 64);
> - fmov\t%d0, xzr"
> - [(set_attr "type" "neon_load1_1reg<q>, load_8, store_8, neon_store1_1reg<q>,\
> - store_8, neon_logic<q>, neon_to_gp<q>, f_mcr,\
> - mov_reg, neon_move<q>, f_mcr")
> - (set_attr "arch" "*,*,*,*,*,*,*,*,*,simd,*")]
> -)
> -
> -(define_insn "*aarch64_simd_mov<VQMOV:mode>"
> - [(set (match_operand:VQMOV 0 "nonimmediate_operand"
> - "=w, Umn, m, w, ?r, ?w, ?r, w, w")
> - (match_operand:VQMOV 1 "general_operand"
> - "m, Dz, w, w, w, r, r, Dn, Dz"))]
> + {@ [cons: =0, 1; attrs: type, arch]
> + [w , m ; neon_load1_1reg<q> , * ] ldr\t%d0, %1
> + [r , m ; load_8 , * ] ldr\t%x0, %1
> + [m , Dz; store_8 , * ] str\txzr, %0
> + [m , w ; neon_store1_1reg<q>, * ] str\t%d1, %0
> + [m , r ; store_8 , * ] str\t%x1, %0
> + [w , w ; neon_logic<q> , simd] mov\t%0.<Vbtype>, %1.<Vbtype>
> + [w , w ; neon_logic<q> , * ] fmov\t%d0, %d1
> + [?r, w ; neon_to_gp<q> , simd] umov\t%0, %1.d[0]
> + [?r, w ; neon_to_gp<q> , * ] fmov\t%x0, %d1
> + [?w, r ; f_mcr , * ] fmov\t%d0, %1
> + [?r, r ; mov_reg , * ] mov\t%0, %1
> + [w , Dn; neon_move<q> , simd] << aarch64_output_simd_mov_immediate (operands[1], 64);
> + [w , Dz; f_mcr , * ] fmov\t%d0, xzr
> + }
> +)
> +
> +(define_insn_and_split "*aarch64_simd_mov<VQMOV:mode>"
> + [(set (match_operand:VQMOV 0 "nonimmediate_operand")
> + (match_operand:VQMOV 1 "general_operand"))]
> "TARGET_FLOAT
> && (register_operand (operands[0], <MODE>mode)
> || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
> - "@
> - ldr\t%q0, %1
> - stp\txzr, xzr, %0
> - str\t%q1, %0
> - mov\t%0.<Vbtype>, %1.<Vbtype>
> - #
> - #
> - #
> - * return aarch64_output_simd_mov_immediate (operands[1], 128);
> - fmov\t%d0, xzr"
> - [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
> - neon_logic<q>, multiple, multiple,\
> - multiple, neon_move<q>, fmov")
> - (set_attr "length" "4,4,4,4,8,8,8,4,4")
> - (set_attr "arch" "*,*,*,simd,*,*,*,simd,*")]
> + {@ [cons: =0, 1; attrs: type, arch, length]
> + [w , m ; neon_load1_1reg<q> , * , 4] ldr\t%q0, %1
> + [Umn, Dz; store_16 , * , 4] stp\txzr, xzr, %0
> + [m , w ; neon_store1_1reg<q>, * , 4] str\t%q1, %0
> + [w , w ; neon_logic<q> , simd, 4] mov\t%0.<Vbtype>, %1.<Vbtype>
> + [?r , w ; multiple , * , 8] #
> + [?w , r ; multiple , * , 8] #
> + [?r , r ; multiple , * , 8] #
> + [w , Dn; neon_move<q> , simd, 4] << aarch64_output_simd_mov_immediate (operands[1], 128);
> + [w , Dz; fmov , * , 4] fmov\t%d0, xzr
> + }
> + "&& reload_completed
> + && !(FP_REGNUM_P (REGNO (operands[0]))
> + && FP_REGNUM_P (REGNO (operands[1])))"
Won't this also trigger for the load, store, and Dn alternatives?
Looks OK otherwise.
Thanks,
Richard
> + [(const_int 0)]
> + {
> + if (GP_REGNUM_P (REGNO (operands[0]))
> + && GP_REGNUM_P (REGNO (operands[1])))
> + aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
> + else
> + aarch64_split_simd_move (operands[0], operands[1]);
> + DONE;
> + }
> )
>
> ;; When storing lane zero we can use the normal STR and its more permissive
> @@ -276,33 +279,6 @@ (define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
> [(set_attr "type" "neon_stp_q")]
> )
>
> -
> -(define_split
> - [(set (match_operand:VQMOV 0 "register_operand" "")
> - (match_operand:VQMOV 1 "register_operand" ""))]
> - "TARGET_FLOAT
> - && reload_completed
> - && GP_REGNUM_P (REGNO (operands[0]))
> - && GP_REGNUM_P (REGNO (operands[1]))"
> - [(const_int 0)]
> -{
> - aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
> - DONE;
> -})
> -
> -(define_split
> - [(set (match_operand:VQMOV 0 "register_operand" "")
> - (match_operand:VQMOV 1 "register_operand" ""))]
> - "TARGET_FLOAT
> - && reload_completed
> - && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
> - || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
> - [(const_int 0)]
> -{
> - aarch64_split_simd_move (operands[0], operands[1]);
> - DONE;
> -})
> -
> (define_expand "@aarch64_split_simd_mov<mode>"
> [(set (match_operand:VQMOV 0)
> (match_operand:VQMOV 1))]
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2023-09-27 10:41 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-09-27 0:52 [PATCH]AArch64 Rewrite simd move immediate patterns to new syntax Tamar Christina
2023-09-27 1:27 ` Ramana Radhakrishnan
2023-09-27 2:40 ` Tamar Christina
2023-09-27 10:41 ` Richard Sandiford
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).