* [PATCH][4/4][committed] aarch64: Convert UABAL2 and SABAL2 patterns to standard RTL codes
@ 2023-04-24 8:47 Kyrylo Tkachov
0 siblings, 0 replies; only message in thread
From: Kyrylo Tkachov @ 2023-04-24 8:47 UTC (permalink / raw)
To: gcc-patches
[-- Attachment #1: Type: text/plain, Size: 1338 bytes --]
Hi all,
The final patch in the series tackles the most complex of this family of patterns, UABAL2 and SABAL2.
These extract the high part of the sources, perform an absdiff on them, widen the result and accumulate.
The motivating testcase for this patch (series) is included and the simplification required doesn't actually
trigger with just the RTL pattern change because rtx_costs block it.
So this patch also extends rtx costs to recognise the (minus (smax (x, y) (smin (x, y)))) expression we use
to describe absdiff in the backend and avoid recursing into its arms.
This allows us to generate the single-instruction sequence expected here.
Bootstrapped and tested on aarch64-none-linux-gnu.
Pushing to trunk.
Thanks,
Kyrill
gcc/ChangeLog:
* config/aarch64/aarch64-simd.md (aarch64_<sur>abal2<mode>): Rename to...
(aarch64_<su>abal2<mode>_insn): ... This. Use RTL codes instead of unspec.
(aarch64_<su>abal2<mode>): New define_expand.
* config/aarch64/aarch64.cc (aarch64_abd_rtx_p): New function.
(aarch64_rtx_costs): Handle ABD rtxes.
* config/aarch64/aarch64.md (UNSPEC_SABAL2, UNSPEC_UABAL2): Delete.
* config/aarch64/iterators.md (ABAL2): Delete.
(sur): Remove handling of UNSPEC_UABAL2 and UNSPEC_SABAL2.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/simd/vabal_combine.c: New test.
[-- Attachment #2: abal2.patch --]
[-- Type: application/octet-stream, Size: 7758 bytes --]
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 379c322fbd435b05024243096705de2b957f326e..015e82b5092911ac690c67db0dee423e3ad06310 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -941,17 +941,46 @@ (define_insn "aarch64_<su>abal<mode>"
[(set_attr "type" "neon_arith_acc<q>")]
)
-(define_insn "aarch64_<sur>abal2<mode>"
- [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
- (unspec:<VWIDE> [(match_operand:VQW 2 "register_operand" "w")
- (match_operand:VQW 3 "register_operand" "w")
- (match_operand:<VWIDE> 1 "register_operand" "0")]
- ABAL2))]
+(define_insn "aarch64_<su>abal2<mode>_insn"
+ [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
+ (plus:<VDBLW>
+ (zero_extend:<VDBLW>
+ (minus:<VHALF>
+ (USMAX:<VHALF>
+ (vec_select:<VHALF>
+ (match_operand:VQW 2 "register_operand" "w")
+ (match_operand:VQW 4 "vect_par_cnst_hi_half" ""))
+ (vec_select:<VHALF>
+ (match_operand:VQW 3 "register_operand" "w")
+ (match_dup 4)))
+ (<max_opp>:<VHALF>
+ (vec_select:<VHALF>
+ (match_dup 2)
+ (match_dup 4))
+ (vec_select:<VHALF>
+ (match_dup 3)
+ (match_dup 4)))))
+ (match_operand:<VDBLW> 1 "register_operand" "0")))]
"TARGET_SIMD"
- "<sur>abal2\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
+ "<su>abal2\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
[(set_attr "type" "neon_arith_acc<q>")]
)
+(define_expand "aarch64_<su>abal2<mode>"
+ [(match_operand:<VDBLW> 0 "register_operand")
+ (match_operand:<VDBLW> 1 "register_operand")
+ (USMAX:VQW
+ (match_operand:VQW 2 "register_operand")
+ (match_operand:VQW 3 "register_operand"))]
+ "TARGET_SIMD"
+ {
+ rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
+ emit_insn (gen_aarch64_<su>abal2<mode>_insn (operands[0], operands[1],
+ operands[2], operands[3], hi));
+ DONE;
+ }
+)
+
(define_insn "aarch64_<sur>adalp<mode>"
[(set (match_operand:<VDBLW> 0 "register_operand" "=w")
(unspec:<VDBLW> [(match_operand:VDQV_L 2 "register_operand" "w")
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 5c40b6ed22a508723bd535a7460762c3a243d441..552de301b2f9c63adcb8f56cbad166ca0b3092f2 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -13786,6 +13786,31 @@ aarch64_masks_and_shift_for_bfi_p (scalar_int_mode mode,
return (t == (t & -t));
}
+/* Return true if X is an RTX representing an operation in the ABD family
+ of instructions. */
+
+static bool
+aarch64_abd_rtx_p (rtx x)
+{
+ if (GET_CODE (x) != MINUS)
+ return false;
+ rtx max_arm = XEXP (x, 0);
+ rtx min_arm = XEXP (x, 1);
+ if (GET_CODE (max_arm) != SMAX && GET_CODE (max_arm) != UMAX)
+ return false;
+ bool signed_p = GET_CODE (max_arm) == SMAX;
+ if (signed_p && GET_CODE (min_arm) != SMIN)
+ return false;
+ else if (!signed_p && GET_CODE (min_arm) != UMIN)
+ return false;
+
+ rtx maxop0 = XEXP (max_arm, 0);
+ rtx maxop1 = XEXP (max_arm, 1);
+ rtx minop0 = XEXP (min_arm, 0);
+ rtx minop1 = XEXP (min_arm, 1);
+ return rtx_equal_p (maxop0, minop0) && rtx_equal_p (maxop1, minop1);
+}
+
/* Calculate the cost of calculating X, storing it in *COST. Result
is true if the total cost of the operation has now been calculated. */
static bool
@@ -14182,11 +14207,20 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
cost_minus:
if (VECTOR_MODE_P (mode))
{
- /* SUBL2 and SUBW2. */
unsigned int vec_flags = aarch64_classify_vector_mode (mode);
if (TARGET_SIMD && (vec_flags & VEC_ADVSIMD))
{
- /* The select-operand-high-half versions of the sub instruction
+ /* Recognise the SABD and UABD operation here.
+ Recursion from the PLUS case will catch the accumulating
+ forms. */
+ if (aarch64_abd_rtx_p (x))
+ {
+ if (speed)
+ *cost += extra_cost->vect.alu;
+ return true;
+ }
+ /* SUBL2 and SUBW2.
+ The select-operand-high-half versions of the sub instruction
have the same cost as the regular three vector version -
don't add the costs of the select into the costs of the sub.
*/
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 575aaf82eaf0a118a51a2e067f991a87401f5b9f..095acf61c29b34b3aebe6407e0367a66f23a0cc3 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -204,7 +204,6 @@ (define_c_enum "unspec" [
UNSPEC_PRLG_STK
UNSPEC_REV
UNSPEC_RBIT
- UNSPEC_SABAL2
UNSPEC_SADALP
UNSPEC_SCVTF
UNSPEC_SETMEM
@@ -225,7 +224,6 @@ (define_c_enum "unspec" [
UNSPEC_TLSLE24
UNSPEC_TLSLE32
UNSPEC_TLSLE48
- UNSPEC_UABAL2
UNSPEC_UADALP
UNSPEC_UCVTF
UNSPEC_USHL_2S
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index cc471ae90d157d7356a0ad4c1ac591aa6227c555..4b592f34692faaa1e5b524b520a4784fb9173ade 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -2562,9 +2562,6 @@ (define_code_attr inc_dec [(minus "dec") (ss_minus "sqdec") (us_minus "uqdec")
;; Int Iterators.
;; -------------------------------------------------------------------
-;; The unspec codes for the SABAL2, UABAL2 AdvancedSIMD instructions.
-(define_int_iterator ABAL2 [UNSPEC_SABAL2 UNSPEC_UABAL2])
-
;; The unspec codes for the SADALP, UADALP AdvancedSIMD instructions.
(define_int_iterator ADALP [UNSPEC_SADALP UNSPEC_UADALP])
@@ -3346,7 +3343,6 @@ (define_int_attr sur [(UNSPEC_SHADD "s") (UNSPEC_UHADD "u")
(UNSPEC_SRHADD "sr") (UNSPEC_URHADD "ur")
(UNSPEC_SHSUB "s") (UNSPEC_UHSUB "u")
(UNSPEC_ADDHN "") (UNSPEC_RADDHN "r")
- (UNSPEC_SABAL2 "s") (UNSPEC_UABAL2 "u")
(UNSPEC_SADALP "s") (UNSPEC_UADALP "u")
(UNSPEC_SUBHN "") (UNSPEC_RSUBHN "r")
(UNSPEC_USQADD "us") (UNSPEC_SUQADD "su")
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vabal_combine.c b/gcc/testsuite/gcc.target/aarch64/simd/vabal_combine.c
new file mode 100644
index 0000000000000000000000000000000000000000..c51878aa22652a3bcbc9435f84e224c58cbf5d94
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/vabal_combine.c
@@ -0,0 +1,72 @@
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#include <arm_neon.h>
+
+/*
+** test_vabal_s8:
+** sabal2 v0.8h, v2.16b, v1.16b
+** ret
+*/
+int16x8_t
+test_vabal_s8 (int16x8_t sadv, int8x16_t pv, int8x16_t sv)
+{
+ return vabal_s8 (sadv, vget_high_s8 (pv), vget_high_s8 (sv));
+}
+
+/*
+** test_vabal_u8:
+** uabal2 v0.8h, v2.16b, v1.16b
+** ret
+*/
+uint16x8_t
+test_vabal_u8 (uint16x8_t sadv, uint8x16_t pv, uint8x16_t sv)
+{
+ return vabal_u8 (sadv, vget_high_u8 (pv), vget_high_u8 (sv));
+}
+
+/*
+** test_vabal_s16:
+** sabal2 v0.4s, v2.8h, v1.8h
+** ret
+*/
+int32x4_t
+test_vabal_s16 (int32x4_t sadv, int16x8_t pv, int16x8_t sv)
+{
+ return vabal_s16 (sadv, vget_high_s16 (pv), vget_high_s16 (sv));
+}
+
+/*
+** test_vabal_u16:
+** uabal2 v0.4s, v2.8h, v1.8h
+** ret
+*/
+uint32x4_t
+test_vabal_u16 (uint32x4_t sadv, uint16x8_t pv, uint16x8_t sv)
+{
+ return vabal_u16 (sadv, vget_high_u16 (pv), vget_high_u16 (sv));
+}
+
+/*
+** test_vabal_s32:
+** sabal2 v0.2d, v2.4s, v1.4s
+** ret
+*/
+int64x2_t
+test_vabal_s32 (int64x2_t sadv, int32x4_t pv, int32x4_t sv)
+{
+ return vabal_s32 (sadv, vget_high_s32 (pv), vget_high_s32 (sv));
+}
+
+/*
+** test_vabal_u32:
+** uabal2 v0.2d, v2.4s, v1.4s
+** ret
+*/
+uint64x2_t
+test_vabal_u32 (uint64x2_t sadv, uint32x4_t pv, uint32x4_t sv)
+{
+ return vabal_u32 (sadv, vget_high_u32 (pv), vget_high_u32 (sv));
+}
+
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2023-04-24 8:48 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-04-24 8:47 [PATCH][4/4][committed] aarch64: Convert UABAL2 and SABAL2 patterns to standard RTL codes Kyrylo Tkachov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).