From: Tamar Christina <tamar.christina@arm.com>
To: gcc-patches@gcc.gnu.org
Cc: nd@arm.com, Richard.Earnshaw@arm.com, Marcus.Shawcroft@arm.com,
Kyrylo.Tkachov@arm.com, richard.sandiford@arm.com
Subject: [PATCH 2/2]AArch64 Add implementation for vector cbranch.
Date: Wed, 2 Nov 2022 14:46:04 +0000 [thread overview]
Message-ID: <Y2KCrKb019Z1/HgC@arm.com> (raw)
In-Reply-To: <patch-16498-tamar@arm.com>
[-- Attachment #1: Type: text/plain, Size: 3591 bytes --]
Hi All,
This adds an implementation for conditional branch optab for AArch64.
For 128-bit vectors we generate:
cmhi v1.4s, v1.4s, v0.4s
umaxp v1.4s, v1.4s, v1.4s
fmov x3, d1
cbnz x3, .L8
and of 64-bit vector we can omit the compression:
cmhi v1.2s, v1.2s, v0.2s
fmov x2, d1
cbz x2, .L13
I did also want to provide a version that mixes SVE and NEON so I can use the
SVE CMHI instructions with a NEON register.
So concretely for a 128-bit vector you'd get:
ptrue p0.s, vl4
.L3:
...
cmplo p2.s, p0/z, z0.s, z2.s
b.any .L6
...
cmp w2, 200
bne .L3
However I ran into an issue where cbranch is not the thing that does the
comparison. And if I use combine to do it then the resulting ptrue wouldn't be
floated outside the loop.
Is there a way to currently do this? or does a mid-end pass need to be changed
for this?
Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
Ok for master?
Thanks,
Tamar
gcc/ChangeLog:
* config/aarch64/aarch64-simd.md (cbranch<mode>4): New.
gcc/testsuite/ChangeLog:
* lib/target-supports.exp: Enable AArch64 generically.
--- inline copy of patch --
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 5386043739a9b2e328bfb2fc9067da8feeac1a92..e53d339ea20492812a3faa7c20ed945255321b11 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -3795,6 +3795,41 @@ (define_expand "vcond_mask_<mode><v_int_equiv>"
DONE;
})
+;; Patterns comparing two vectors to produce a sets flagsi.
+
+(define_expand "cbranch<mode>4"
+ [(set (pc)
+ (if_then_else
+ (match_operator 0 "aarch64_equality_operator"
+ [(match_operand:VDQ_BHSI 1 "register_operand")
+ (match_operand:VDQ_BHSI 2 "aarch64_simd_reg_or_zero")])
+ (label_ref (match_operand 3 ""))
+ (pc)))]
+ "TARGET_SIMD"
+{
+ rtx tmp = gen_reg_rtx (<MODE>mode);
+
+ /* For 64-bit vectors we need no reductions. */
+ if (known_eq (128, GET_MODE_BITSIZE (<MODE>mode)))
+ {
+ /* Always reduce using a V4SI. */
+ rtx reduc = simplify_gen_subreg (V4SImode, operands[1], <MODE>mode, 0);
+ rtx res = gen_reg_rtx (V4SImode);
+ emit_insn (gen_aarch64_umaxpv4si (res, reduc, reduc));
+ emit_move_insn (tmp, simplify_gen_subreg (<MODE>mode, res, V4SImode, 0));
+ }
+ else
+ tmp = operands[1];
+
+ rtx val = gen_reg_rtx (DImode);
+ emit_move_insn (val, simplify_gen_subreg (DImode, tmp, <MODE>mode, 0));
+
+ rtx cc_reg = aarch64_gen_compare_reg (NE, val, const0_rtx);
+ rtx cmp_rtx = gen_rtx_fmt_ee (NE, DImode, cc_reg, operands[2]);
+ emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, operands[3]));
+ DONE;
+})
+
;; Patterns comparing two vectors to produce a mask.
(define_expand "vec_cmp<mode><mode>"
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 5cbf54bd2a23dfdc5dc7b148b0dc6ed4c63814ae..8964cbd6610a718711546d312e89cee937d210e8 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -3653,8 +3653,7 @@ proc check_effective_target_vect_int { } {
proc check_effective_target_vect_early_break { } {
return [check_cached_effective_target_indexed vect_early_break {
expr {
- ([istarget aarch64*-*-*]
- && [check_effective_target_aarch64_sve])
+ [istarget aarch64*-*-*]
}}]
}
# Return 1 if the target supports hardware vectorization of complex additions of
--
[-- Attachment #2: rb16499.patch --]
[-- Type: text/plain, Size: 2335 bytes --]
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 5386043739a9b2e328bfb2fc9067da8feeac1a92..e53d339ea20492812a3faa7c20ed945255321b11 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -3795,6 +3795,41 @@ (define_expand "vcond_mask_<mode><v_int_equiv>"
DONE;
})
+;; Patterns comparing two vectors to produce a sets flagsi.
+
+(define_expand "cbranch<mode>4"
+ [(set (pc)
+ (if_then_else
+ (match_operator 0 "aarch64_equality_operator"
+ [(match_operand:VDQ_BHSI 1 "register_operand")
+ (match_operand:VDQ_BHSI 2 "aarch64_simd_reg_or_zero")])
+ (label_ref (match_operand 3 ""))
+ (pc)))]
+ "TARGET_SIMD"
+{
+ rtx tmp = gen_reg_rtx (<MODE>mode);
+
+ /* For 64-bit vectors we need no reductions. */
+ if (known_eq (128, GET_MODE_BITSIZE (<MODE>mode)))
+ {
+ /* Always reduce using a V4SI. */
+ rtx reduc = simplify_gen_subreg (V4SImode, operands[1], <MODE>mode, 0);
+ rtx res = gen_reg_rtx (V4SImode);
+ emit_insn (gen_aarch64_umaxpv4si (res, reduc, reduc));
+ emit_move_insn (tmp, simplify_gen_subreg (<MODE>mode, res, V4SImode, 0));
+ }
+ else
+ tmp = operands[1];
+
+ rtx val = gen_reg_rtx (DImode);
+ emit_move_insn (val, simplify_gen_subreg (DImode, tmp, <MODE>mode, 0));
+
+ rtx cc_reg = aarch64_gen_compare_reg (NE, val, const0_rtx);
+ rtx cmp_rtx = gen_rtx_fmt_ee (NE, DImode, cc_reg, operands[2]);
+ emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, operands[3]));
+ DONE;
+})
+
;; Patterns comparing two vectors to produce a mask.
(define_expand "vec_cmp<mode><mode>"
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 5cbf54bd2a23dfdc5dc7b148b0dc6ed4c63814ae..8964cbd6610a718711546d312e89cee937d210e8 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -3653,8 +3653,7 @@ proc check_effective_target_vect_int { } {
proc check_effective_target_vect_early_break { } {
return [check_cached_effective_target_indexed vect_early_break {
expr {
- ([istarget aarch64*-*-*]
- && [check_effective_target_aarch64_sve])
+ [istarget aarch64*-*-*]
}}]
}
# Return 1 if the target supports hardware vectorization of complex additions of
next prev parent reply other threads:[~2022-11-02 14:46 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-11-02 14:45 [PATCH 1/2]middle-end: Support early break/return auto-vectorization Tamar Christina
2022-11-02 14:46 ` Tamar Christina [this message]
2022-11-02 21:50 ` Bernhard Reutner-Fischer
2022-11-02 22:32 ` Jeff Law
2022-11-03 8:51 ` Tamar Christina
2022-11-08 17:36 ` Tamar Christina
2022-11-15 11:11 ` Tamar Christina
2022-11-16 12:17 ` Richard Biener
2022-11-16 18:52 ` Jeff Law
2022-11-18 15:04 ` Richard Biener
2022-11-18 18:23 ` Tamar Christina
2022-11-19 10:49 ` Tamar Christina
2022-11-24 9:02 ` Richard Biener
2022-11-24 11:56 ` Tamar Christina
2022-11-25 9:33 ` Richard Biener
2022-11-25 10:32 ` Tamar Christina
2022-12-13 15:01 ` Tamar Christina
2022-12-14 9:41 ` Richard Biener
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=Y2KCrKb019Z1/HgC@arm.com \
--to=tamar.christina@arm.com \
--cc=Kyrylo.Tkachov@arm.com \
--cc=Marcus.Shawcroft@arm.com \
--cc=Richard.Earnshaw@arm.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=nd@arm.com \
--cc=richard.sandiford@arm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).