From: Jonathan Wright <Jonathan.Wright@arm.com>
To: "gcc-patches@gcc.gnu.org" <gcc-patches@gcc.gnu.org>
Subject: [PATCH] aarch64: Model zero-high-half semantics of XTN instruction in RTL
Date: Tue, 15 Jun 2021 09:45:05 +0000 [thread overview]
Message-ID: <DBBPR08MB4758E2696FE96A9D84A8B3DAEB309@DBBPR08MB4758.eurprd08.prod.outlook.com> (raw)
[-- Attachment #1: Type: text/plain, Size: 1071 bytes --]
Hi,
Modeling the zero-high-half semantics of the XTN narrowing
instruction in RTL indicates to the compiler that this is a totally
destructive operation. This enables more RTL simplifications and also
prevents some register allocation issues.
Regression tested and bootstrapped on aarch64-none-linux-gnu - no
issues.
Ok for master?
Thanks,
Jonathan
---
gcc/ChangeLog:
2021-06-11 Jonathan Wright <jonathan.wright@arm.com>
* config/aarch64/aarch64-simd.md (aarch64_xtn<mode>_insn_le):
Define - modeling zero-high-half semantics.
(aarch64_xtn<mode>): Change to an expander that emits the
appropriate instruction depending on endianness.
(aarch64_xtn<mode>_insn_be): Define - modeling zero-high-half
semantics.
(aarch64_xtn2<mode>_le): Rename to...
(aarch64_xtn2<mode>_insn_le): This.
(aarch64_xtn2<mode>_be): Rename to...
(aarch64_xtn2<mode>_insn_be): This.
(vec_pack_trunc_<mode>): Emit truncation instruction instead
of aarch64_xtn.
* config/aarch64/iterators.md (Vnarrowd): Add Vnarrowd mode
attribute iterator.
[-- Attachment #2: rb14563.patch --]
[-- Type: application/octet-stream, Size: 5773 bytes --]
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index e750faed1dbd940cdfa216d858b98f3bc25bba42..b23556b551cbbef420950007e9714acf190a534d 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1690,17 +1690,48 @@
;; Narrowing operations.
-;; For doubles.
+(define_insn "aarch64_xtn<mode>_insn_le"
+ [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
+ (vec_concat:<VNARROWQ2>
+ (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
+ (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")))]
+ "TARGET_SIMD && !BYTES_BIG_ENDIAN"
+ "xtn\\t%0.<Vntype>, %1.<Vtype>"
+ [(set_attr "type" "neon_move_narrow_q")]
+)
-(define_insn "aarch64_xtn<mode>"
- [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
- (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
- "TARGET_SIMD"
+(define_insn "aarch64_xtn<mode>_insn_be"
+ [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
+ (vec_concat:<VNARROWQ2>
+ (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")
+ (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))))]
+ "TARGET_SIMD && BYTES_BIG_ENDIAN"
"xtn\\t%0.<Vntype>, %1.<Vtype>"
[(set_attr "type" "neon_move_narrow_q")]
)
-(define_insn "aarch64_xtn2<mode>_le"
+(define_expand "aarch64_xtn<mode>"
+ [(set (match_operand:<VNARROWQ> 0 "register_operand")
+ (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand")))]
+ "TARGET_SIMD"
+ {
+ rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
+ if (BYTES_BIG_ENDIAN)
+ emit_insn (gen_aarch64_xtn<mode>_insn_be (tmp, operands[1],
+ CONST0_RTX (<VNARROWQ>mode)));
+ else
+ emit_insn (gen_aarch64_xtn<mode>_insn_le (tmp, operands[1],
+ CONST0_RTX (<VNARROWQ>mode)));
+
+ /* The intrinsic expects a narrow result, so emit a subreg that will get
+ optimized away as appropriate. */
+ emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
+ <VNARROWQ2>mode));
+ DONE;
+ }
+)
+
+(define_insn "aarch64_xtn2<mode>_insn_le"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
(match_operand:<VNARROWQ> 1 "register_operand" "0")
@@ -1710,7 +1741,7 @@
[(set_attr "type" "neon_move_narrow_q")]
)
-(define_insn "aarch64_xtn2<mode>_be"
+(define_insn "aarch64_xtn2<mode>_insn_be"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
(truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))
@@ -1727,15 +1758,17 @@
"TARGET_SIMD"
{
if (BYTES_BIG_ENDIAN)
- emit_insn (gen_aarch64_xtn2<mode>_be (operands[0], operands[1],
- operands[2]));
+ emit_insn (gen_aarch64_xtn2<mode>_insn_be (operands[0], operands[1],
+ operands[2]));
else
- emit_insn (gen_aarch64_xtn2<mode>_le (operands[0], operands[1],
- operands[2]));
+ emit_insn (gen_aarch64_xtn2<mode>_insn_le (operands[0], operands[1],
+ operands[2]));
DONE;
}
)
+;; Packing doubles.
+
(define_expand "vec_pack_trunc_<mode>"
[(match_operand:<VNARROWD> 0 "register_operand")
(match_operand:VDN 1 "register_operand")
@@ -1748,10 +1781,35 @@
emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
- emit_insn (gen_aarch64_xtn<Vdbl> (operands[0], tempreg));
+ emit_insn (gen_trunc<Vdbl><Vnarrowd>2 (operands[0], tempreg));
DONE;
})
+;; Packing quads.
+
+(define_expand "vec_pack_trunc_<mode>"
+ [(set (match_operand:<VNARROWQ2> 0 "register_operand")
+ (vec_concat:<VNARROWQ2>
+ (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand"))
+ (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))))]
+ "TARGET_SIMD"
+ {
+ rtx tmpreg = gen_reg_rtx (<VNARROWQ>mode);
+ int lo = BYTES_BIG_ENDIAN ? 2 : 1;
+ int hi = BYTES_BIG_ENDIAN ? 1 : 2;
+
+ emit_insn (gen_trunc<mode><Vnarrowq>2 (tmpreg, operands[lo]));
+
+ if (BYTES_BIG_ENDIAN)
+ emit_insn (gen_aarch64_xtn2<mode>_insn_be (operands[0], tmpreg,
+ operands[hi]));
+ else
+ emit_insn (gen_aarch64_xtn2<mode>_insn_le (operands[0], tmpreg,
+ operands[hi]));
+ DONE;
+ }
+)
+
(define_insn "aarch64_shrn<mode>_insn_le"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
@@ -1936,29 +1994,6 @@
}
)
-;; For quads.
-
-(define_expand "vec_pack_trunc_<mode>"
- [(set (match_operand:<VNARROWQ2> 0 "register_operand")
- (vec_concat:<VNARROWQ2>
- (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand"))
- (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))))]
- "TARGET_SIMD"
- {
- rtx tmpreg = gen_reg_rtx (<VNARROWQ>mode);
- int lo = BYTES_BIG_ENDIAN ? 2 : 1;
- int hi = BYTES_BIG_ENDIAN ? 1 : 2;
-
- emit_insn (gen_aarch64_xtn<mode> (tmpreg, operands[lo]));
-
- if (BYTES_BIG_ENDIAN)
- emit_insn (gen_aarch64_xtn2<mode>_be (operands[0], tmpreg, operands[hi]));
- else
- emit_insn (gen_aarch64_xtn2<mode>_le (operands[0], tmpreg, operands[hi]));
- DONE;
- }
-)
-
;; Widening operations.
(define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index e9047d00d979411752d9aeddaadb05ec38e3a145..caa42f8f169fbf2cf46a90cf73dee05619acc300 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1257,6 +1257,8 @@
;; Narrowed modes for VDN.
(define_mode_attr VNARROWD [(V4HI "V8QI") (V2SI "V4HI")
(DI "V2SI")])
+(define_mode_attr Vnarrowd [(V4HI "v8qi") (V2SI "v4hi")
+ (DI "v2si")])
;; Narrowed double-modes for VQN (Used for XTN).
(define_mode_attr VNARROWQ [(V8HI "V8QI") (V4SI "V4HI")
next reply other threads:[~2021-06-15 9:45 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-06-15 9:45 Jonathan Wright [this message]
2021-06-16 9:04 ` [PATCH V2] " Jonathan Wright
2021-06-16 12:28 ` Richard Sandiford
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=DBBPR08MB4758E2696FE96A9D84A8B3DAEB309@DBBPR08MB4758.eurprd08.prod.outlook.com \
--to=jonathan.wright@arm.com \
--cc=gcc-patches@gcc.gnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).