public inbox for gcc-cvs@sourceware.org help / color / mirror / Atom feed
From: Kyrylo Tkachov <ktkachov@gcc.gnu.org> To: gcc-cvs@gcc.gnu.org Subject: [gcc r14-192] [4/4] aarch64: Convert UABAL2 and SABAL2 patterns to standard RTL codes Date: Mon, 24 Apr 2023 08:48:28 +0000 (GMT) [thread overview] Message-ID: <20230424084828.296033856965@sourceware.org> (raw) https://gcc.gnu.org/g:636e2273aec555faa0a2f0e0b97b5f3355b25e47 commit r14-192-g636e2273aec555faa0a2f0e0b97b5f3355b25e47 Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com> Date: Mon Apr 24 09:44:55 2023 +0100 [4/4] aarch64: Convert UABAL2 and SABAL2 patterns to standard RTL codes The final patch in the series tackles the most complex of this family of patterns, UABAL2 and SABAL2. These extract the high part of the sources, perform an absdiff on them, widen the result and accumulate. The motivating testcase for this patch (series) is included and the simplification required doesn't actually trigger with just the RTL pattern change because rtx_costs block it. So this patch also extends rtx costs to recognise the (minus (smax (x, y) (smin (x, y)))) expression we use to describe absdiff in the backend and avoid recursing into its arms. This allows us to generate the single-instruction sequence expected here. Bootstrapped and tested on aarch64-none-linux-gnu. gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_<sur>abal2<mode>): Rename to... (aarch64_<su>abal2<mode>_insn): ... This. Use RTL codes instead of unspec. (aarch64_<su>abal2<mode>): New define_expand. * config/aarch64/aarch64.cc (aarch64_abd_rtx_p): New function. (aarch64_rtx_costs): Handle ABD rtxes. * config/aarch64/aarch64.md (UNSPEC_SABAL2, UNSPEC_UABAL2): Delete. * config/aarch64/iterators.md (ABAL2): Delete. (sur): Remove handling of UNSPEC_UABAL2 and UNSPEC_SABAL2. gcc/testsuite/ChangeLog: * gcc.target/aarch64/simd/vabal_combine.c: New test. Diff: --- gcc/config/aarch64/aarch64-simd.md | 43 ++++++++++--- gcc/config/aarch64/aarch64.cc | 38 +++++++++++- gcc/config/aarch64/aarch64.md | 2 - gcc/config/aarch64/iterators.md | 4 -- .../gcc.target/aarch64/simd/vabal_combine.c | 72 ++++++++++++++++++++++ 5 files changed, 144 insertions(+), 15 deletions(-) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 51bb6cf357c..e420f58633a 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -969,17 +969,46 @@ [(set_attr "type" "neon_arith_acc<q>")] ) -(define_insn "aarch64_<sur>abal2<mode>" - [(set (match_operand:<VWIDE> 0 "register_operand" "=w") - (unspec:<VWIDE> [(match_operand:VQW 2 "register_operand" "w") - (match_operand:VQW 3 "register_operand" "w") - (match_operand:<VWIDE> 1 "register_operand" "0")] - ABAL2))] +(define_insn "aarch64_<su>abal2<mode>_insn" + [(set (match_operand:<VDBLW> 0 "register_operand" "=w") + (plus:<VDBLW> + (zero_extend:<VDBLW> + (minus:<VHALF> + (USMAX:<VHALF> + (vec_select:<VHALF> + (match_operand:VQW 2 "register_operand" "w") + (match_operand:VQW 4 "vect_par_cnst_hi_half" "")) + (vec_select:<VHALF> + (match_operand:VQW 3 "register_operand" "w") + (match_dup 4))) + (<max_opp>:<VHALF> + (vec_select:<VHALF> + (match_dup 2) + (match_dup 4)) + (vec_select:<VHALF> + (match_dup 3) + (match_dup 4))))) + (match_operand:<VDBLW> 1 "register_operand" "0")))] "TARGET_SIMD" - "<sur>abal2\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>" + "<su>abal2\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>" [(set_attr "type" "neon_arith_acc<q>")] ) +(define_expand "aarch64_<su>abal2<mode>" + [(match_operand:<VDBLW> 0 "register_operand") + (match_operand:<VDBLW> 1 "register_operand") + (USMAX:VQW + (match_operand:VQW 2 "register_operand") + (match_operand:VQW 3 "register_operand"))] + "TARGET_SIMD" + { + rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); + emit_insn (gen_aarch64_<su>abal2<mode>_insn (operands[0], operands[1], + operands[2], operands[3], hi)); + DONE; + } +) + (define_insn "aarch64_<sur>adalp<mode>" [(set (match_operand:<VDBLW> 0 "register_operand" "=w") (unspec:<VDBLW> [(match_operand:VDQV_L 2 "register_operand" "w") diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index d7e895f8d34..2b0de7ca038 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -13822,6 +13822,31 @@ aarch64_masks_and_shift_for_bfi_p (scalar_int_mode mode, return (t == (t & -t)); } +/* Return true if X is an RTX representing an operation in the ABD family + of instructions. */ + +static bool +aarch64_abd_rtx_p (rtx x) +{ + if (GET_CODE (x) != MINUS) + return false; + rtx max_arm = XEXP (x, 0); + rtx min_arm = XEXP (x, 1); + if (GET_CODE (max_arm) != SMAX && GET_CODE (max_arm) != UMAX) + return false; + bool signed_p = GET_CODE (max_arm) == SMAX; + if (signed_p && GET_CODE (min_arm) != SMIN) + return false; + else if (!signed_p && GET_CODE (min_arm) != UMIN) + return false; + + rtx maxop0 = XEXP (max_arm, 0); + rtx maxop1 = XEXP (max_arm, 1); + rtx minop0 = XEXP (min_arm, 0); + rtx minop1 = XEXP (min_arm, 1); + return rtx_equal_p (maxop0, minop0) && rtx_equal_p (maxop1, minop1); +} + /* Calculate the cost of calculating X, storing it in *COST. Result is true if the total cost of the operation has now been calculated. */ static bool @@ -14218,11 +14243,20 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED, cost_minus: if (VECTOR_MODE_P (mode)) { - /* SUBL2 and SUBW2. */ unsigned int vec_flags = aarch64_classify_vector_mode (mode); if (TARGET_SIMD && (vec_flags & VEC_ADVSIMD)) { - /* The select-operand-high-half versions of the sub instruction + /* Recognise the SABD and UABD operation here. + Recursion from the PLUS case will catch the accumulating + forms. */ + if (aarch64_abd_rtx_p (x)) + { + if (speed) + *cost += extra_cost->vect.alu; + return true; + } + /* SUBL2 and SUBW2. + The select-operand-high-half versions of the sub instruction have the same cost as the regular three vector version - don't add the costs of the select into the costs of the sub. */ diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 426eb85d2db..3e18f0405fa 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -204,7 +204,6 @@ UNSPEC_PRLG_STK UNSPEC_REV UNSPEC_RBIT - UNSPEC_SABAL2 UNSPEC_SADALP UNSPEC_SCVTF UNSPEC_SETMEM @@ -225,7 +224,6 @@ UNSPEC_TLSLE24 UNSPEC_TLSLE32 UNSPEC_TLSLE48 - UNSPEC_UABAL2 UNSPEC_UADALP UNSPEC_UCVTF UNSPEC_USHL_2S diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 0195cdc545e..13a7e89777d 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -2567,9 +2567,6 @@ ;; Int Iterators. ;; ------------------------------------------------------------------- -;; The unspec codes for the SABAL2, UABAL2 AdvancedSIMD instructions. -(define_int_iterator ABAL2 [UNSPEC_SABAL2 UNSPEC_UABAL2]) - ;; The unspec codes for the SADALP, UADALP AdvancedSIMD instructions. (define_int_iterator ADALP [UNSPEC_SADALP UNSPEC_UADALP]) @@ -3351,7 +3348,6 @@ (UNSPEC_SRHADD "sr") (UNSPEC_URHADD "ur") (UNSPEC_SHSUB "s") (UNSPEC_UHSUB "u") (UNSPEC_ADDHN "") (UNSPEC_RADDHN "r") - (UNSPEC_SABAL2 "s") (UNSPEC_UABAL2 "u") (UNSPEC_SADALP "s") (UNSPEC_UADALP "u") (UNSPEC_SUBHN "") (UNSPEC_RSUBHN "r") (UNSPEC_USQADD "us") (UNSPEC_SUQADD "su") diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vabal_combine.c b/gcc/testsuite/gcc.target/aarch64/simd/vabal_combine.c new file mode 100644 index 00000000000..c51878aa226 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vabal_combine.c @@ -0,0 +1,72 @@ +/* { dg-do compile } */ +/* { dg-options "-O" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#include <arm_neon.h> + +/* +** test_vabal_s8: +** sabal2 v0.8h, v2.16b, v1.16b +** ret +*/ +int16x8_t +test_vabal_s8 (int16x8_t sadv, int8x16_t pv, int8x16_t sv) +{ + return vabal_s8 (sadv, vget_high_s8 (pv), vget_high_s8 (sv)); +} + +/* +** test_vabal_u8: +** uabal2 v0.8h, v2.16b, v1.16b +** ret +*/ +uint16x8_t +test_vabal_u8 (uint16x8_t sadv, uint8x16_t pv, uint8x16_t sv) +{ + return vabal_u8 (sadv, vget_high_u8 (pv), vget_high_u8 (sv)); +} + +/* +** test_vabal_s16: +** sabal2 v0.4s, v2.8h, v1.8h +** ret +*/ +int32x4_t +test_vabal_s16 (int32x4_t sadv, int16x8_t pv, int16x8_t sv) +{ + return vabal_s16 (sadv, vget_high_s16 (pv), vget_high_s16 (sv)); +} + +/* +** test_vabal_u16: +** uabal2 v0.4s, v2.8h, v1.8h +** ret +*/ +uint32x4_t +test_vabal_u16 (uint32x4_t sadv, uint16x8_t pv, uint16x8_t sv) +{ + return vabal_u16 (sadv, vget_high_u16 (pv), vget_high_u16 (sv)); +} + +/* +** test_vabal_s32: +** sabal2 v0.2d, v2.4s, v1.4s +** ret +*/ +int64x2_t +test_vabal_s32 (int64x2_t sadv, int32x4_t pv, int32x4_t sv) +{ + return vabal_s32 (sadv, vget_high_s32 (pv), vget_high_s32 (sv)); +} + +/* +** test_vabal_u32: +** uabal2 v0.2d, v2.4s, v1.4s +** ret +*/ +uint64x2_t +test_vabal_u32 (uint64x2_t sadv, uint32x4_t pv, uint32x4_t sv) +{ + return vabal_u32 (sadv, vget_high_u32 (pv), vget_high_u32 (sv)); +} +
reply other threads:[~2023-04-24 8:48 UTC|newest] Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20230424084828.296033856965@sourceware.org \ --to=ktkachov@gcc.gnu.org \ --cc=gcc-cvs@gcc.gnu.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).