public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r14-192] [4/4] aarch64: Convert UABAL2 and SABAL2 patterns to standard RTL codes
@ 2023-04-24  8:48 Kyrylo Tkachov
  0 siblings, 0 replies; only message in thread
From: Kyrylo Tkachov @ 2023-04-24  8:48 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:636e2273aec555faa0a2f0e0b97b5f3355b25e47

commit r14-192-g636e2273aec555faa0a2f0e0b97b5f3355b25e47
Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date:   Mon Apr 24 09:44:55 2023 +0100

    [4/4] aarch64: Convert UABAL2 and SABAL2 patterns to standard RTL codes
    
    The final patch in the series tackles the most complex of this family of patterns, UABAL2 and SABAL2.
    These extract the high part of the sources, perform an absdiff on them, widen the result and accumulate.
    The motivating testcase for this patch (series) is included and the simplification required doesn't actually
    trigger with just the RTL pattern change because rtx_costs block it.
    So this patch also extends rtx costs to recognise the (minus (smax (x, y) (smin (x, y)))) expression we use
    to describe absdiff in the backend and avoid recursing into its arms.
    
    This allows us to generate the single-instruction sequence expected here.
    Bootstrapped and tested on aarch64-none-linux-gnu.
    
    gcc/ChangeLog:
    
            * config/aarch64/aarch64-simd.md (aarch64_<sur>abal2<mode>): Rename to...
            (aarch64_<su>abal2<mode>_insn): ... This.  Use RTL codes instead of unspec.
            (aarch64_<su>abal2<mode>): New define_expand.
            * config/aarch64/aarch64.cc (aarch64_abd_rtx_p): New function.
            (aarch64_rtx_costs): Handle ABD rtxes.
            * config/aarch64/aarch64.md (UNSPEC_SABAL2, UNSPEC_UABAL2): Delete.
            * config/aarch64/iterators.md (ABAL2): Delete.
            (sur): Remove handling of UNSPEC_UABAL2 and UNSPEC_SABAL2.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/aarch64/simd/vabal_combine.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64-simd.md                 | 43 ++++++++++---
 gcc/config/aarch64/aarch64.cc                      | 38 +++++++++++-
 gcc/config/aarch64/aarch64.md                      |  2 -
 gcc/config/aarch64/iterators.md                    |  4 --
 .../gcc.target/aarch64/simd/vabal_combine.c        | 72 ++++++++++++++++++++++
 5 files changed, 144 insertions(+), 15 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 51bb6cf357c..e420f58633a 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -969,17 +969,46 @@
   [(set_attr "type" "neon_arith_acc<q>")]
 )
 
-(define_insn "aarch64_<sur>abal2<mode>"
-  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-	(unspec:<VWIDE> [(match_operand:VQW 2 "register_operand" "w")
-			  (match_operand:VQW 3 "register_operand" "w")
-			 (match_operand:<VWIDE> 1 "register_operand" "0")]
-	ABAL2))]
+(define_insn "aarch64_<su>abal2<mode>_insn"
+  [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
+	(plus:<VDBLW>
+	  (zero_extend:<VDBLW>
+	    (minus:<VHALF>
+	      (USMAX:<VHALF>
+		(vec_select:<VHALF>
+		  (match_operand:VQW 2 "register_operand" "w")
+		  (match_operand:VQW 4 "vect_par_cnst_hi_half" ""))
+		(vec_select:<VHALF>
+		  (match_operand:VQW 3 "register_operand" "w")
+		  (match_dup 4)))
+	      (<max_opp>:<VHALF>
+		(vec_select:<VHALF>
+		  (match_dup 2)
+		  (match_dup 4))
+		(vec_select:<VHALF>
+		  (match_dup 3)
+		  (match_dup 4)))))
+	  (match_operand:<VDBLW> 1 "register_operand" "0")))]
   "TARGET_SIMD"
-  "<sur>abal2\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
+  "<su>abal2\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
   [(set_attr "type" "neon_arith_acc<q>")]
 )
 
+(define_expand "aarch64_<su>abal2<mode>"
+  [(match_operand:<VDBLW> 0 "register_operand")
+   (match_operand:<VDBLW> 1 "register_operand")
+   (USMAX:VQW
+     (match_operand:VQW 2 "register_operand")
+     (match_operand:VQW 3 "register_operand"))]
+  "TARGET_SIMD"
+  {
+    rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
+    emit_insn (gen_aarch64_<su>abal2<mode>_insn (operands[0], operands[1],
+						 operands[2], operands[3], hi));
+    DONE;
+  }
+)
+
 (define_insn "aarch64_<sur>adalp<mode>"
   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
 	(unspec:<VDBLW> [(match_operand:VDQV_L 2 "register_operand" "w")
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index d7e895f8d34..2b0de7ca038 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -13822,6 +13822,31 @@ aarch64_masks_and_shift_for_bfi_p (scalar_int_mode mode,
   return (t == (t & -t));
 }
 
+/* Return true if X is an RTX representing an operation in the ABD family
+   of instructions.  */
+
+static bool
+aarch64_abd_rtx_p (rtx x)
+{
+  if (GET_CODE (x) != MINUS)
+    return false;
+  rtx max_arm = XEXP (x, 0);
+  rtx min_arm = XEXP (x, 1);
+  if (GET_CODE (max_arm) != SMAX && GET_CODE (max_arm) != UMAX)
+    return false;
+  bool signed_p = GET_CODE (max_arm) == SMAX;
+  if (signed_p && GET_CODE (min_arm) != SMIN)
+    return false;
+  else if (!signed_p && GET_CODE (min_arm) != UMIN)
+    return false;
+
+  rtx maxop0 = XEXP (max_arm, 0);
+  rtx maxop1 = XEXP (max_arm, 1);
+  rtx minop0 = XEXP (min_arm, 0);
+  rtx minop1 = XEXP (min_arm, 1);
+  return rtx_equal_p (maxop0, minop0) && rtx_equal_p (maxop1, minop1);
+}
+
 /* Calculate the cost of calculating X, storing it in *COST.  Result
    is true if the total cost of the operation has now been calculated.  */
 static bool
@@ -14218,11 +14243,20 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
 cost_minus:
 	if (VECTOR_MODE_P (mode))
 	  {
-	    /* SUBL2 and SUBW2.  */
 	    unsigned int vec_flags = aarch64_classify_vector_mode (mode);
 	    if (TARGET_SIMD && (vec_flags & VEC_ADVSIMD))
 	      {
-		/* The select-operand-high-half versions of the sub instruction
+		/* Recognise the SABD and UABD operation here.
+		   Recursion from the PLUS case will catch the accumulating
+		   forms.  */
+		if (aarch64_abd_rtx_p (x))
+		  {
+		    if (speed)
+		      *cost += extra_cost->vect.alu;
+		    return true;
+		  }
+		  /* SUBL2 and SUBW2.
+		   The select-operand-high-half versions of the sub instruction
 		   have the same cost as the regular three vector version -
 		   don't add the costs of the select into the costs of the sub.
 		   */
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 426eb85d2db..3e18f0405fa 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -204,7 +204,6 @@
     UNSPEC_PRLG_STK
     UNSPEC_REV
     UNSPEC_RBIT
-    UNSPEC_SABAL2
     UNSPEC_SADALP
     UNSPEC_SCVTF
     UNSPEC_SETMEM
@@ -225,7 +224,6 @@
     UNSPEC_TLSLE24
     UNSPEC_TLSLE32
     UNSPEC_TLSLE48
-    UNSPEC_UABAL2
     UNSPEC_UADALP
     UNSPEC_UCVTF
     UNSPEC_USHL_2S
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 0195cdc545e..13a7e89777d 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -2567,9 +2567,6 @@
 ;; Int Iterators.
 ;; -------------------------------------------------------------------
 
-;; The unspec codes for the SABAL2, UABAL2 AdvancedSIMD instructions.
-(define_int_iterator ABAL2 [UNSPEC_SABAL2 UNSPEC_UABAL2])
-
 ;; The unspec codes for the SADALP, UADALP AdvancedSIMD instructions.
 (define_int_iterator ADALP [UNSPEC_SADALP UNSPEC_UADALP])
 
@@ -3351,7 +3348,6 @@
 		      (UNSPEC_SRHADD "sr") (UNSPEC_URHADD "ur")
 		      (UNSPEC_SHSUB "s") (UNSPEC_UHSUB "u")
 		      (UNSPEC_ADDHN "") (UNSPEC_RADDHN "r")
-		      (UNSPEC_SABAL2 "s") (UNSPEC_UABAL2 "u")
 		      (UNSPEC_SADALP "s") (UNSPEC_UADALP "u")
 		      (UNSPEC_SUBHN "") (UNSPEC_RSUBHN "r")
 		      (UNSPEC_USQADD "us") (UNSPEC_SUQADD "su")
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vabal_combine.c b/gcc/testsuite/gcc.target/aarch64/simd/vabal_combine.c
new file mode 100644
index 00000000000..c51878aa226
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/vabal_combine.c
@@ -0,0 +1,72 @@
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#include <arm_neon.h>
+
+/*
+** test_vabal_s8:
+**      sabal2	v0.8h, v2.16b, v1.16b
+**      ret
+*/
+int16x8_t
+test_vabal_s8 (int16x8_t sadv, int8x16_t pv, int8x16_t sv)
+{
+  return vabal_s8 (sadv, vget_high_s8 (pv), vget_high_s8 (sv));
+}
+
+/*
+** test_vabal_u8:
+**      uabal2	v0.8h, v2.16b, v1.16b
+**      ret
+*/
+uint16x8_t
+test_vabal_u8 (uint16x8_t sadv, uint8x16_t pv, uint8x16_t sv)
+{
+  return vabal_u8 (sadv, vget_high_u8 (pv), vget_high_u8 (sv));
+}
+
+/*
+** test_vabal_s16:
+**      sabal2	v0.4s, v2.8h, v1.8h
+**      ret
+*/
+int32x4_t
+test_vabal_s16 (int32x4_t sadv, int16x8_t pv, int16x8_t sv)
+{
+  return vabal_s16 (sadv, vget_high_s16 (pv), vget_high_s16 (sv));
+}
+
+/*
+** test_vabal_u16:
+**      uabal2	v0.4s, v2.8h, v1.8h
+**      ret
+*/
+uint32x4_t
+test_vabal_u16 (uint32x4_t sadv, uint16x8_t pv, uint16x8_t sv)
+{
+  return vabal_u16 (sadv, vget_high_u16 (pv), vget_high_u16 (sv));
+}
+
+/*
+** test_vabal_s32:
+**      sabal2	v0.2d, v2.4s, v1.4s
+**      ret
+*/
+int64x2_t
+test_vabal_s32 (int64x2_t sadv, int32x4_t pv, int32x4_t sv)
+{
+  return vabal_s32 (sadv, vget_high_s32 (pv), vget_high_s32 (sv));
+}
+
+/*
+** test_vabal_u32:
+**      uabal2	v0.2d, v2.4s, v1.4s
+**      ret
+*/
+uint64x2_t
+test_vabal_u32 (uint64x2_t sadv, uint32x4_t pv, uint32x4_t sv)
+{
+  return vabal_u32 (sadv, vget_high_u32 (pv), vget_high_u32 (sv));
+}
+

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-04-24  8:48 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-04-24  8:48 [gcc r14-192] [4/4] aarch64: Convert UABAL2 and SABAL2 patterns to standard RTL codes Kyrylo Tkachov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).