public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r14-1609] aarch64: Represent SQXTUN with RTL operations
@ 2023-06-07 15:21 Kyrylo Tkachov
  0 siblings, 0 replies; only message in thread
From: Kyrylo Tkachov @ 2023-06-07 15:21 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:b747f54a2a930da55330c2861cd1e344f67a88d9

commit r14-1609-gb747f54a2a930da55330c2861cd1e344f67a88d9
Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date:   Wed Jun 7 16:20:57 2023 +0100

    aarch64: Represent SQXTUN with RTL operations
    
    This patch removes UNSPEC_SQXTUN and uses organic RTL codes to represent the operation.
    SQXTUN is an odd one. It's described in the architecture as "Signed saturating extract Unsigned Narrow".
    It's not a straightforward ss_truncate nor a us_truncate.
    It is a sort of truncating signed clamp operation with limits derived from the unsigned extrema of the narrow mode:
    (truncate:N
      (smin:M
        (smax:M (reg:M) (const_int 0))
        (const_int <unsigned-max-for-mode-N>)))
    
    This patch implements these semantics. I've checked that the vqmovun tests in advsimd-intrinsics.exp
    now get constant-folded and still pass validation, so I'm pretty confident in the semantics.
    
    Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf.
    
    gcc/ChangeLog:
    
            * config/aarch64/aarch64-simd.md (aarch64_sqmovun<mode><vczle><vczbe>):
            Rename to...
            (*aarch64_sqmovun<mode>_insn<vczle><vczbe>): ... This.  Reimplement
            with RTL codes.
            (aarch64_sqmovun<mode> [SD_HSDI]): Reimplement with RTL codes.
            (aarch64_sqxtun2<mode>_le): Likewise.
            (aarch64_sqxtun2<mode>_be): Likewise.
            (aarch64_sqxtun2<mode>): Adjust for the above.
            (aarch64_sqmovun<mode>): New define_expand.
            * config/aarch64/iterators.md (UNSPEC_SQXTUN): Delete.
            (half_mask): New mode attribute.
            * config/aarch64/predicates.md (aarch64_simd_umax_half_mode):
            New predicate.

Diff:
---
 gcc/config/aarch64/aarch64-simd.md | 59 +++++++++++++++++++++++++++++---------
 gcc/config/aarch64/iterators.md    |  3 +-
 gcc/config/aarch64/predicates.md   |  8 ++++++
 3 files changed, 56 insertions(+), 14 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index b23067c6754..3cecc10f3e8 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -5438,28 +5438,55 @@
 
 (define_insn "aarch64_sqmovun<mode>"
   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
-	(unspec:<VNARROWQ> [(match_operand:SD_HSDI 1 "register_operand" "w")]
-			   UNSPEC_SQXTUN))]
+	(truncate:<VNARROWQ>
+	  (smin:SD_HSDI
+	    (smax:SD_HSDI
+	      (match_operand:SD_HSDI 1 "register_operand" "w")
+	      (const_int 0))
+	    (const_int <half_mask>))))]
    "TARGET_SIMD"
    "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
 )
 
-(define_insn "aarch64_sqmovun<mode><vczle><vczbe>"
+(define_insn "*aarch64_sqmovun<mode>_insn<vczle><vczbe>"
   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
-	(unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")]
-	  UNSPEC_SQXTUN))]
+	(truncate:<VNARROWQ>
+	  (smin:VQN
+	    (smax:VQN (match_operand:VQN 1 "register_operand" "w")
+		      (match_operand:VQN 2 "aarch64_simd_or_scalar_imm_zero"))
+	    (match_operand:VQN 3 "aarch64_simd_umax_half_mode"))))]
   "TARGET_SIMD"
   "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
 )
 
+(define_expand "aarch64_sqmovun<mode>"
+  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
+	(truncate:<VNARROWQ>
+	  (smin:VQN
+	    (smax:VQN (match_operand:VQN 1 "register_operand" "w")
+		      (match_dup 2))
+	    (match_dup 3))))]
+  "TARGET_SIMD"
+  {
+    operands[2] = CONST0_RTX (<MODE>mode);
+    operands[3]
+      = aarch64_simd_gen_const_vector_dup (<MODE>mode,
+			GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
+  }
+)
+
 (define_insn "aarch64_sqxtun2<mode>_le"
   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
 	(vec_concat:<VNARROWQ2>
 	  (match_operand:<VNARROWQ> 1 "register_operand" "0")
-	  (unspec:<VNARROWQ>
-	    [(match_operand:VQN 2 "register_operand" "w")] UNSPEC_SQXTUN)))]
+	  (truncate:<VNARROWQ>
+	    (smin:VQN
+	      (smax:VQN
+		(match_operand:VQN 2 "register_operand" "w")
+		(match_operand:VQN 3 "aarch64_simd_or_scalar_imm_zero"))
+	      (match_operand:VQN 4 "aarch64_simd_umax_half_mode")))))]
   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
   "sqxtun2\\t%0.<V2ntype>, %2.<Vtype>"
    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
@@ -5468,8 +5495,12 @@
 (define_insn "aarch64_sqxtun2<mode>_be"
   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
 	(vec_concat:<VNARROWQ2>
-	  (unspec:<VNARROWQ>
-	    [(match_operand:VQN 2 "register_operand" "w")] UNSPEC_SQXTUN)
+	  (truncate:<VNARROWQ>
+	    (smin:VQN
+	      (smax:VQN
+		(match_operand:VQN 2 "register_operand" "w")
+		(match_operand:VQN 3 "aarch64_simd_or_scalar_imm_zero"))
+	      (match_operand:VQN 4 "aarch64_simd_umax_half_mode")))
 	  (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
   "TARGET_SIMD && BYTES_BIG_ENDIAN"
   "sqxtun2\\t%0.<V2ntype>, %2.<Vtype>"
@@ -5479,16 +5510,18 @@
 (define_expand "aarch64_sqxtun2<mode>"
   [(match_operand:<VNARROWQ2> 0 "register_operand")
    (match_operand:<VNARROWQ> 1 "register_operand")
-   (unspec:<VNARROWQ>
-     [(match_operand:VQN 2 "register_operand")] UNSPEC_SQXTUN)]
+   (match_operand:VQN 2 "register_operand")]
   "TARGET_SIMD"
   {
+    rtx zeros = CONST0_RTX (<MODE>mode);
+    rtx half_umax = aarch64_simd_gen_const_vector_dup (<MODE>mode,
+			GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
     if (BYTES_BIG_ENDIAN)
       emit_insn (gen_aarch64_sqxtun2<mode>_be (operands[0], operands[1],
-					      operands[2]));
+					       operands[2], zeros, half_umax));
     else
       emit_insn (gen_aarch64_sqxtun2<mode>_le (operands[0], operands[1],
-					       operands[2]));
+					       operands[2], zeros, half_umax));
     DONE;
   }
 )
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 9e1e17bc1b9..56ce1251e80 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -638,7 +638,6 @@
     UNSPEC_FMULX	; Used in aarch64-simd.md.
     UNSPEC_USQADD	; Used in aarch64-simd.md.
     UNSPEC_SUQADD	; Used in aarch64-simd.md.
-    UNSPEC_SQXTUN	; Used in aarch64-simd.md.
     UNSPEC_SSRA		; Used in aarch64-simd.md.
     UNSPEC_USRA		; Used in aarch64-simd.md.
     UNSPEC_SRSHR	; Used in aarch64-simd.md.
@@ -1025,6 +1024,8 @@
 
 (define_mode_attr short_mask [(HI "65535") (QI "255")])
 
+(define_mode_attr half_mask [(HI "255") (SI "65535") (DI "4294967295")])
+
 ;; For constraints used in scalar immediate vector moves
 (define_mode_attr hq [(HI "h") (QI "q")])
 
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index d93fd86fa27..9391aba40c4 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -595,6 +595,14 @@
 			GET_MODE_UNIT_BITSIZE (GET_MODE (op)) / 2,
 			GET_MODE_UNIT_BITSIZE (GET_MODE (op)) / 2)")))
 
+(define_predicate "aarch64_simd_umax_half_mode"
+  (and (match_code "const_vector")
+       (match_test "aarch64_const_vec_all_same_in_range_p (op,
+				(HOST_WIDE_INT_1U
+				<< (GET_MODE_UNIT_BITSIZE  (mode) / 2)) - 1,
+				(HOST_WIDE_INT_1U
+				<< (GET_MODE_UNIT_BITSIZE  (mode) / 2)) - 1)")))
+
 (define_predicate "aarch64_simd_shift_imm_vec_qi"
   (and (match_code "const_vector")
        (match_test "aarch64_const_vec_all_same_in_range_p (op, 1, 8)")))

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-06-07 15:21 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-06-07 15:21 [gcc r14-1609] aarch64: Represent SQXTUN with RTL operations Kyrylo Tkachov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).