[gcc r14-1886] aarch64: [US]Q(R)SHR(U)N scalar forms refactoring

public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed

From: Kyrylo Tkachov <ktkachov@gcc.gnu.org>
To: gcc-cvs@gcc.gnu.org
Subject: [gcc r14-1886] aarch64: [US]Q(R)SHR(U)N scalar forms refactoring
Date: Fri, 16 Jun 2023 13:07:55 +0000 (GMT)	[thread overview]
Message-ID: <20230616130755.4EA7C385B52A@sourceware.org> (raw)

https://gcc.gnu.org/g:d20b2ad845876eec0ee80a3933ad49f9f6c4ee30

commit r14-1886-gd20b2ad845876eec0ee80a3933ad49f9f6c4ee30
Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date:   Tue Jun 6 23:35:52 2023 +0100

    aarch64: [US]Q(R)SHR(U)N scalar forms refactoring
    
    Some instructions from the previous patch have scalar forms:
    SQSHRN,SQRSHRN,UQSHRN,UQRSHRN,SQSHRUN,SQRSHRUN.
    This patch converts the patterns for these to use standard RTL codes.
    Their MD patterns deviate slightly from the vector forms mostly due to
    things like operands being scalar rather than vectors.
    One nuance is in the SQSHRUN,SQRSHRUN patterns. These end in a truncate
    to the scalar narrow mode e.g. SI -> QI.  This gets simplified by the
    RTL passes to a subreg rather than keeping it as a truncate.
    So we end up representing these without the truncate and in the expander
    read the narrow subreg in order to comply with the expected width of the
    intrinsic.
    
    Bootstrapped and tested on aarch64-none-linux-gnu and
    aarch64_be-none-elf.
    
    gcc/ChangeLog:
    
            * config/aarch64/aarch64-simd.md (aarch64_<sur>q<r>shr<u>n_n<mode>):
            Rename to...
            (aarch64_<shrn_op>shrn_n<mode>): ... This.  Reimplement with RTL codes.
            (*aarch64_<shrn_op>rshrn_n<mode>_insn): New define_insn.
            (aarch64_sqrshrun_n<mode>_insn): Likewise.
            (aarch64_sqshrun_n<mode>_insn): Likewise.
            (aarch64_<shrn_op>rshrn_n<mode>): New define_expand.
            (aarch64_sqshrun_n<mode>): Likewise.
            (aarch64_sqrshrun_n<mode>): Likewise.
            * config/aarch64/iterators.md (V2XWIDE): Add HI and SI modes.

Diff:
---
 gcc/config/aarch64/aarch64-simd.md | 111 ++++++++++++++++++++++++++++++++++---
 gcc/config/aarch64/iterators.md    |   3 +-
 2 files changed, 106 insertions(+), 8 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 8b92981bebb..bbb54344eb7 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -6654,15 +6654,15 @@
 
 ;; vq(r)shr(u)n_n
 
-(define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
+(define_insn "aarch64_<shrn_op>shrn_n<mode>"
   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
-        (unspec:<VNARROWQ> [(match_operand:SD_HSDI 1 "register_operand" "w")
-			    (match_operand:SI 2
-			      "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
-			   VQSHRN_N))]
+	(SAT_TRUNC:<VNARROWQ>
+	  (<TRUNC_SHIFT>:SD_HSDI
+	    (match_operand:SD_HSDI 1 "register_operand" "w")
+	    (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
   "TARGET_SIMD"
-  "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
-  [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
+  "<shrn_op>shrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
+  [(set_attr "type" "neon_shift_imm_narrow_q")]
 )
 
 (define_insn "*aarch64_<shrn_op>shrn_n<mode>_insn<vczle><vczbe>"
@@ -6704,6 +6704,41 @@
   [(set_attr "type" "neon_shift_imm_narrow_q")]
 )
 
+(define_insn "*aarch64_<shrn_op>rshrn_n<mode>_insn"
+  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
+	(SAT_TRUNC:<VNARROWQ>
+	  (<TRUNC_SHIFT>:<DWI>
+	    (plus:<DWI>
+	      (<TRUNCEXTEND>:<DWI>
+	        (match_operand:SD_HSDI 1 "register_operand" "w"))
+	      (match_operand:<DWI> 3 "aarch64_simd_rsra_rnd_imm_vec"))
+	    (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
+  "TARGET_SIMD
+   && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
+  "<shrn_op>rshrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
+  [(set_attr "type" "neon_shift_imm_narrow_q")]
+)
+
+(define_expand "aarch64_<shrn_op>rshrn_n<mode>"
+  [(set (match_operand:<VNARROWQ> 0 "register_operand")
+	(SAT_TRUNC:<VNARROWQ>
+	  (<TRUNC_SHIFT>:<V2XWIDE>
+	    (plus:<V2XWIDE>
+	      (<TRUNCEXTEND>:<V2XWIDE>
+	        (match_operand:SD_HSDI 1 "register_operand"))
+	      (match_dup 3))
+	    (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
+  "TARGET_SIMD"
+  {
+    /* Use this expander to create the rounding constant vector, which is
+       1 << (shift - 1).  Use wide_int here to ensure that the right TImode
+       RTL is generated when handling the DImode expanders.  */
+    int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
+    wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
+    operands[3] = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
+  }
+)
+
 (define_expand "aarch64_<shrn_op>rshrn_n<mode>"
   [(set (match_operand:<VNARROWQ> 0 "register_operand")
 	(ALL_TRUNC:<VNARROWQ>
@@ -6748,6 +6783,34 @@
   [(set_attr "type" "neon_shift_imm_narrow_q")]
 )
 
+(define_insn "aarch64_sqshrun_n<mode>_insn"
+  [(set (match_operand:SD_HSDI 0 "register_operand" "=w")
+	(smin:SD_HSDI
+	  (smax:SD_HSDI
+	    (ashiftrt:SD_HSDI
+	      (match_operand:SD_HSDI 1 "register_operand" "w")
+	      (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
+	    (const_int 0))
+	  (const_int <half_mask>)))]
+  "TARGET_SIMD"
+  "sqshrun\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
+  [(set_attr "type" "neon_shift_imm_narrow_q")]
+)
+
+(define_expand "aarch64_sqshrun_n<mode>"
+  [(match_operand:<VNARROWQ> 0 "register_operand")
+   (match_operand:SD_HSDI 1 "register_operand")
+   (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>")]
+  "TARGET_SIMD"
+  {
+    rtx dst = gen_reg_rtx (<MODE>mode);
+    emit_insn (gen_aarch64_sqshrun_n<mode>_insn (dst, operands[1],
+						 operands[2]));
+    emit_move_insn (operands[0], gen_lowpart (<VNARROWQ>mode, dst));
+    DONE;
+  }
+)
+
 (define_expand "aarch64_sqshrun_n<mode>"
   [(set (match_operand:<VNARROWQ> 0 "register_operand")
 	(truncate:<VNARROWQ>
@@ -6788,6 +6851,40 @@
   [(set_attr "type" "neon_shift_imm_narrow_q")]
 )
 
+(define_insn "aarch64_sqrshrun_n<mode>_insn"
+  [(set (match_operand:<V2XWIDE> 0 "register_operand" "=w")
+	(smin:<V2XWIDE>
+	  (smax:<V2XWIDE>
+	    (ashiftrt:<V2XWIDE>
+	      (plus:<V2XWIDE>
+		(sign_extend:<V2XWIDE>
+		  (match_operand:SD_HSDI 1 "register_operand" "w"))
+		(match_operand:<V2XWIDE> 3 "aarch64_simd_rsra_rnd_imm_vec"))
+	      (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
+	    (const_int 0))
+	  (const_int <half_mask>)))]
+  "TARGET_SIMD
+   && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
+  "sqrshrun\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
+  [(set_attr "type" "neon_shift_imm_narrow_q")]
+)
+
+(define_expand "aarch64_sqrshrun_n<mode>"
+  [(match_operand:<VNARROWQ> 0 "register_operand")
+   (match_operand:SD_HSDI 1 "register_operand")
+   (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>")]
+  "TARGET_SIMD"
+  {
+    int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
+    wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
+    rtx rnd = immed_wide_int_const (rnd_wi, <V2XWIDE>mode);
+    rtx dst = gen_reg_rtx (<V2XWIDE>mode);
+    emit_insn (gen_aarch64_sqrshrun_n<mode>_insn (dst, operands[1], operands[2], rnd));
+    emit_move_insn (operands[0], gen_lowpart (<VNARROWQ>mode, dst));
+    DONE;
+  }
+)
+
 (define_expand "aarch64_sqrshrun_n<mode>"
   [(set (match_operand:<VNARROWQ> 0 "register_operand")
 	(truncate:<VNARROWQ>
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index e8c62c88b14..acc7a3ec46e 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1532,7 +1532,8 @@
 (define_mode_attr V2XWIDE [(V8QI "V8HI") (V4HI "V4SI")
 			   (V16QI "V16HI") (V8HI "V8SI")
 			   (V2SI "V2DI") (V4SI "V4DI")
-			   (V2DI "V2TI") (DI "TI")])
+			   (V2DI "V2TI") (DI "TI")
+			   (HI "SI") (SI "DI")])
 
 ;; Predicate mode associated with VWIDE.
 (define_mode_attr VWIDE_PRED [(VNx8HF "VNx4BI") (VNx4SF "VNx2BI")])

                 reply	other threads:[~2023-06-16 13:07 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230616130755.4EA7C385B52A@sourceware.org \
    --to=ktkachov@gcc.gnu.org \
    --cc=gcc-cvs@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).