From: Tamar Christina <tamar.christina@arm.com>
To: gcc-patches@gcc.gnu.org
Cc: nd@arm.com, Richard.Earnshaw@arm.com, Marcus.Shawcroft@arm.com,
Kyrylo.Tkachov@arm.com, richard.sandiford@arm.com
Subject: [PATCH 2/2]AArch64 Support new tbranch optab.
Date: Mon, 31 Oct 2022 11:53:50 +0000 [thread overview]
Message-ID: <Y1+3ThtA9vUT43aA@arm.com> (raw)
In-Reply-To: <patch-16485-tamar@arm.com>
[-- Attachment #1: Type: text/plain, Size: 6867 bytes --]
Hi All,
This implements the new tbranch optab for AArch64.
Instead of emitting the instruction directly I've chosen to expand the pattern
using a zero extract and generating the existing pattern for comparisons for two
reasons:
1. Allows for CSE of the actual comparison.
2. It looks like the code in expand makes the label as unused and removed it
if it doesn't see a separate reference to it.
Because of this expansion though I disable the pattern at -O0 since we have no
combine in that case so we'd end up with worse code. I did try emitting the
pattern directly, but as mentioned in no#2 expand would then kill the label.
While doing this I noticed that the version that checks the signbit doesn't work
The reason for this looks like an incorrect pattern. The [us]fbx
instructions are defined for index + size == regiter size. They architecturally
alias to different instructions and binutils handles this correctly.
In GCC however we tried to prematurely optimize this and added a separate split
pattern. But this pattern is also missing alternatives only handling DImode.
This just removes this and relaxes the constraints on the normal bfx pattern.
Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
Ok for master?
Thanks,
Tamar
gcc/ChangeLog:
* config/aarch64/aarch64.md (*tb<optab><mode>1): Rename to...
(*tb<optab><ALLI:mode><GPI:mode>1): ... this.
(tbranch<mode>4): New.
(*<optab><mode>): Rename to...
(*<optab><GPI:mode><ALLI:mode>): ... this.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/tbz_1.c: New test.
--- inline copy of patch --
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 2bc2684b82c35a44e0a2cea6e3aaf32d939f8cdf..6a4494a9a370139313cc8e57447717aafa14da2d 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -943,12 +943,28 @@ (define_insn "*cb<optab><mode>1"
(const_int 1)))]
)
-(define_insn "*tb<optab><mode>1"
+(define_expand "tbranch<mode>4"
[(set (pc) (if_then_else
- (EQL (zero_extract:DI (match_operand:GPI 0 "register_operand" "r")
- (const_int 1)
- (match_operand 1
- "aarch64_simd_shift_imm_<mode>" "n"))
+ (match_operator 0 "aarch64_comparison_operator"
+ [(match_operand:ALLI 1 "register_operand")
+ (match_operand:ALLI 2 "aarch64_simd_shift_imm_<ALLI:mode>")])
+ (label_ref (match_operand 3 "" ""))
+ (pc)))]
+ "optimize > 0"
+{
+ rtx bitvalue = gen_reg_rtx (DImode);
+ emit_insn (gen_extzv (bitvalue, operands[1], const1_rtx, operands[2]));
+ operands[2] = const0_rtx;
+ operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), bitvalue,
+ operands[2]);
+})
+
+(define_insn "*tb<optab><ALLI:mode><GPI:mode>1"
+ [(set (pc) (if_then_else
+ (EQL (zero_extract:GPI (match_operand:ALLI 0 "register_operand" "r")
+ (const_int 1)
+ (match_operand 1
+ "aarch64_simd_shift_imm_<ALLI:mode>" "n"))
(const_int 0))
(label_ref (match_operand 2 "" ""))
(pc)))
@@ -959,15 +975,15 @@ (define_insn "*tb<optab><mode>1"
{
if (get_attr_far_branch (insn) == 1)
return aarch64_gen_far_branch (operands, 2, "Ltb",
- "<inv_tb>\\t%<w>0, %1, ");
+ "<inv_tb>\\t%<ALLI:w>0, %1, ");
else
{
operands[1] = GEN_INT (HOST_WIDE_INT_1U << UINTVAL (operands[1]));
- return "tst\t%<w>0, %1\;<bcond>\t%l2";
+ return "tst\t%<ALLI:w>0, %1\;<bcond>\t%l2";
}
}
else
- return "<tbz>\t%<w>0, %1, %l2";
+ return "<tbz>\t%<ALLI:w>0, %1, %l2";
}
[(set_attr "type" "branch")
(set (attr "length")
@@ -5752,39 +5768,19 @@ (define_expand "<optab>"
)
-(define_insn "*<optab><mode>"
+(define_insn "*<optab><GPI:mode><ALLI:mode>"
[(set (match_operand:GPI 0 "register_operand" "=r")
- (ANY_EXTRACT:GPI (match_operand:GPI 1 "register_operand" "r")
+ (ANY_EXTRACT:GPI (match_operand:ALLI 1 "register_operand" "r")
(match_operand 2
- "aarch64_simd_shift_imm_offset_<mode>" "n")
+ "aarch64_simd_shift_imm_offset_<ALLI:mode>" "n")
(match_operand 3
- "aarch64_simd_shift_imm_<mode>" "n")))]
+ "aarch64_simd_shift_imm_<ALLI:mode>" "n")))]
"IN_RANGE (INTVAL (operands[2]) + INTVAL (operands[3]),
- 1, GET_MODE_BITSIZE (<MODE>mode) - 1)"
- "<su>bfx\\t%<w>0, %<w>1, %3, %2"
+ 1, GET_MODE_BITSIZE (<ALLI:MODE>mode))"
+ "<su>bfx\\t%<GPI:w>0, %<GPI:w>1, %3, %2"
[(set_attr "type" "bfx")]
)
-;; When the bit position and width add up to 32 we can use a W-reg LSR
-;; instruction taking advantage of the implicit zero-extension of the X-reg.
-(define_split
- [(set (match_operand:DI 0 "register_operand")
- (zero_extract:DI (match_operand:DI 1 "register_operand")
- (match_operand 2
- "aarch64_simd_shift_imm_offset_di")
- (match_operand 3
- "aarch64_simd_shift_imm_di")))]
- "IN_RANGE (INTVAL (operands[2]) + INTVAL (operands[3]), 1,
- GET_MODE_BITSIZE (DImode) - 1)
- && (INTVAL (operands[2]) + INTVAL (operands[3]))
- == GET_MODE_BITSIZE (SImode)"
- [(set (match_dup 0)
- (zero_extend:DI (lshiftrt:SI (match_dup 4) (match_dup 3))))]
- {
- operands[4] = gen_lowpart (SImode, operands[1]);
- }
-)
-
;; Bitfield Insert (insv)
(define_expand "insv<mode>"
[(set (zero_extract:GPI (match_operand:GPI 0 "register_operand")
diff --git a/gcc/testsuite/gcc.target/aarch64/tbz_1.c b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..86f5d3e23cf7f1ea6f3596549ce1a0cff6774463
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
@@ -0,0 +1,95 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -std=c99 -fno-unwind-tables -fno-asynchronous-unwind-tables" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+#include <stdbool.h>
+
+void h(void);
+
+/*
+** g1:
+** tbnz x[0-9]+, #?0, .L([0-9]+)
+** ret
+** ...
+*/
+void g1(bool x)
+{
+ if (__builtin_expect (x, 0))
+ h ();
+}
+
+/*
+** g2:
+** tbz x[0-9]+, #?0, .L([0-9]+)
+** b h
+** ...
+*/
+void g2(bool x)
+{
+ if (__builtin_expect (x, 1))
+ h ();
+}
+
+/*
+** g3_ge:
+** tbnz w[0-9]+, #?31, .L[0-9]+
+** b h
+** ...
+*/
+void g3_ge(int x)
+{
+ if (__builtin_expect (x >= 0, 1))
+ h ();
+}
+
+/*
+** g3_gt:
+** cmp w[0-9]+, 0
+** ble .L[0-9]+
+** b h
+** ...
+*/
+void g3_gt(int x)
+{
+ if (__builtin_expect (x > 0, 1))
+ h ();
+}
+
+/*
+** g3_lt:
+** tbz w[0-9]+, #?31, .L[0-9]+
+** b h
+** ...
+*/
+void g3_lt(int x)
+{
+ if (__builtin_expect (x < 0, 1))
+ h ();
+}
+
+/*
+** g3_le:
+** cmp w[0-9]+, 0
+** bgt .L[0-9]+
+** b h
+** ...
+*/
+void g3_le(int x)
+{
+ if (__builtin_expect (x <= 0, 1))
+ h ();
+}
+
+/*
+** g5:
+** mov w[0-9]+, 65279
+** tst w[0-9]+, w[0-9]+
+** beq .L[0-9]+
+** b h
+** ...
+*/
+void g5(int x)
+{
+ if (__builtin_expect (x & 0xfeff, 1))
+ h ();
+}
--
[-- Attachment #2: rb16486.patch --]
[-- Type: text/plain, Size: 5264 bytes --]
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 2bc2684b82c35a44e0a2cea6e3aaf32d939f8cdf..6a4494a9a370139313cc8e57447717aafa14da2d 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -943,12 +943,28 @@ (define_insn "*cb<optab><mode>1"
(const_int 1)))]
)
-(define_insn "*tb<optab><mode>1"
+(define_expand "tbranch<mode>4"
[(set (pc) (if_then_else
- (EQL (zero_extract:DI (match_operand:GPI 0 "register_operand" "r")
- (const_int 1)
- (match_operand 1
- "aarch64_simd_shift_imm_<mode>" "n"))
+ (match_operator 0 "aarch64_comparison_operator"
+ [(match_operand:ALLI 1 "register_operand")
+ (match_operand:ALLI 2 "aarch64_simd_shift_imm_<ALLI:mode>")])
+ (label_ref (match_operand 3 "" ""))
+ (pc)))]
+ "optimize > 0"
+{
+ rtx bitvalue = gen_reg_rtx (DImode);
+ emit_insn (gen_extzv (bitvalue, operands[1], const1_rtx, operands[2]));
+ operands[2] = const0_rtx;
+ operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), bitvalue,
+ operands[2]);
+})
+
+(define_insn "*tb<optab><ALLI:mode><GPI:mode>1"
+ [(set (pc) (if_then_else
+ (EQL (zero_extract:GPI (match_operand:ALLI 0 "register_operand" "r")
+ (const_int 1)
+ (match_operand 1
+ "aarch64_simd_shift_imm_<ALLI:mode>" "n"))
(const_int 0))
(label_ref (match_operand 2 "" ""))
(pc)))
@@ -959,15 +975,15 @@ (define_insn "*tb<optab><mode>1"
{
if (get_attr_far_branch (insn) == 1)
return aarch64_gen_far_branch (operands, 2, "Ltb",
- "<inv_tb>\\t%<w>0, %1, ");
+ "<inv_tb>\\t%<ALLI:w>0, %1, ");
else
{
operands[1] = GEN_INT (HOST_WIDE_INT_1U << UINTVAL (operands[1]));
- return "tst\t%<w>0, %1\;<bcond>\t%l2";
+ return "tst\t%<ALLI:w>0, %1\;<bcond>\t%l2";
}
}
else
- return "<tbz>\t%<w>0, %1, %l2";
+ return "<tbz>\t%<ALLI:w>0, %1, %l2";
}
[(set_attr "type" "branch")
(set (attr "length")
@@ -5752,39 +5768,19 @@ (define_expand "<optab>"
)
-(define_insn "*<optab><mode>"
+(define_insn "*<optab><GPI:mode><ALLI:mode>"
[(set (match_operand:GPI 0 "register_operand" "=r")
- (ANY_EXTRACT:GPI (match_operand:GPI 1 "register_operand" "r")
+ (ANY_EXTRACT:GPI (match_operand:ALLI 1 "register_operand" "r")
(match_operand 2
- "aarch64_simd_shift_imm_offset_<mode>" "n")
+ "aarch64_simd_shift_imm_offset_<ALLI:mode>" "n")
(match_operand 3
- "aarch64_simd_shift_imm_<mode>" "n")))]
+ "aarch64_simd_shift_imm_<ALLI:mode>" "n")))]
"IN_RANGE (INTVAL (operands[2]) + INTVAL (operands[3]),
- 1, GET_MODE_BITSIZE (<MODE>mode) - 1)"
- "<su>bfx\\t%<w>0, %<w>1, %3, %2"
+ 1, GET_MODE_BITSIZE (<ALLI:MODE>mode))"
+ "<su>bfx\\t%<GPI:w>0, %<GPI:w>1, %3, %2"
[(set_attr "type" "bfx")]
)
-;; When the bit position and width add up to 32 we can use a W-reg LSR
-;; instruction taking advantage of the implicit zero-extension of the X-reg.
-(define_split
- [(set (match_operand:DI 0 "register_operand")
- (zero_extract:DI (match_operand:DI 1 "register_operand")
- (match_operand 2
- "aarch64_simd_shift_imm_offset_di")
- (match_operand 3
- "aarch64_simd_shift_imm_di")))]
- "IN_RANGE (INTVAL (operands[2]) + INTVAL (operands[3]), 1,
- GET_MODE_BITSIZE (DImode) - 1)
- && (INTVAL (operands[2]) + INTVAL (operands[3]))
- == GET_MODE_BITSIZE (SImode)"
- [(set (match_dup 0)
- (zero_extend:DI (lshiftrt:SI (match_dup 4) (match_dup 3))))]
- {
- operands[4] = gen_lowpart (SImode, operands[1]);
- }
-)
-
;; Bitfield Insert (insv)
(define_expand "insv<mode>"
[(set (zero_extract:GPI (match_operand:GPI 0 "register_operand")
diff --git a/gcc/testsuite/gcc.target/aarch64/tbz_1.c b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..86f5d3e23cf7f1ea6f3596549ce1a0cff6774463
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
@@ -0,0 +1,95 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -std=c99 -fno-unwind-tables -fno-asynchronous-unwind-tables" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+#include <stdbool.h>
+
+void h(void);
+
+/*
+** g1:
+** tbnz x[0-9]+, #?0, .L([0-9]+)
+** ret
+** ...
+*/
+void g1(bool x)
+{
+ if (__builtin_expect (x, 0))
+ h ();
+}
+
+/*
+** g2:
+** tbz x[0-9]+, #?0, .L([0-9]+)
+** b h
+** ...
+*/
+void g2(bool x)
+{
+ if (__builtin_expect (x, 1))
+ h ();
+}
+
+/*
+** g3_ge:
+** tbnz w[0-9]+, #?31, .L[0-9]+
+** b h
+** ...
+*/
+void g3_ge(int x)
+{
+ if (__builtin_expect (x >= 0, 1))
+ h ();
+}
+
+/*
+** g3_gt:
+** cmp w[0-9]+, 0
+** ble .L[0-9]+
+** b h
+** ...
+*/
+void g3_gt(int x)
+{
+ if (__builtin_expect (x > 0, 1))
+ h ();
+}
+
+/*
+** g3_lt:
+** tbz w[0-9]+, #?31, .L[0-9]+
+** b h
+** ...
+*/
+void g3_lt(int x)
+{
+ if (__builtin_expect (x < 0, 1))
+ h ();
+}
+
+/*
+** g3_le:
+** cmp w[0-9]+, 0
+** bgt .L[0-9]+
+** b h
+** ...
+*/
+void g3_le(int x)
+{
+ if (__builtin_expect (x <= 0, 1))
+ h ();
+}
+
+/*
+** g5:
+** mov w[0-9]+, 65279
+** tst w[0-9]+, w[0-9]+
+** beq .L[0-9]+
+** b h
+** ...
+*/
+void g5(int x)
+{
+ if (__builtin_expect (x & 0xfeff, 1))
+ h ();
+}
next prev parent reply other threads:[~2022-10-31 11:54 UTC|newest]
Thread overview: 33+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-10-31 11:53 [PATCH 1/2]middle-end: Add new tbranch optab to add support for bit-test-and-branch operations Tamar Christina
2022-10-31 11:53 ` Tamar Christina [this message]
2022-11-14 15:58 ` [PATCH 2/2]AArch64 Support new tbranch optab Tamar Christina
2022-11-15 10:36 ` Richard Sandiford
2022-11-15 10:42 ` Tamar Christina
2022-11-15 10:50 ` Richard Sandiford
2022-11-15 11:00 ` Tamar Christina
2022-11-15 11:14 ` Richard Sandiford
2022-11-15 11:23 ` Tamar Christina
2022-11-15 11:33 ` Richard Sandiford
2022-11-15 11:39 ` Tamar Christina
2022-11-22 13:48 ` Tamar Christina
2022-11-22 14:00 ` Richard Sandiford
2022-11-24 12:18 ` Tamar Christina
2022-12-01 16:44 ` Tamar Christina
2022-12-05 14:06 ` Richard Sandiford
2022-10-31 11:54 ` [PATCH]AArch64 Extend umov and sbfx patterns Tamar Christina
2022-10-31 12:26 ` Richard Sandiford
2022-11-11 14:42 ` Tamar Christina
2022-11-15 11:10 ` Richard Sandiford
2022-10-31 21:16 ` [PATCH 1/2]middle-end: Add new tbranch optab to add support for bit-test-and-branch operations Jeff Law
2022-11-01 15:53 ` Tamar Christina
2022-11-01 17:00 ` Jeff Law
2022-11-02 9:55 ` Tamar Christina
2022-11-02 11:08 ` Aldy Hernandez
2022-11-05 14:23 ` Richard Biener
2022-11-14 15:56 ` Tamar Christina
2022-11-14 16:22 ` Jeff Law
2022-11-15 7:33 ` Richard Biener
2022-12-01 16:29 ` Tamar Christina
2022-12-02 7:09 ` Richard Biener
2022-12-05 12:00 ` Richard Sandiford
2022-12-05 13:14 ` Richard Sandiford
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=Y1+3ThtA9vUT43aA@arm.com \
--to=tamar.christina@arm.com \
--cc=Kyrylo.Tkachov@arm.com \
--cc=Marcus.Shawcroft@arm.com \
--cc=Richard.Earnshaw@arm.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=nd@arm.com \
--cc=richard.sandiford@arm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).