[gcc r11-8223] SVE: Fix wrong sve predicate split (PR100048)

public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed

From: Tamar Christina <tnfchris@gcc.gnu.org>
To: gcc-cvs@gcc.gnu.org
Subject: [gcc r11-8223] SVE: Fix wrong sve predicate split (PR100048)
Date: Fri, 16 Apr 2021 15:59:41 +0000 (GMT)	[thread overview]
Message-ID: <20210416155941.4763F38930D6@sourceware.org> (raw)

https://gcc.gnu.org/g:8535755af70f819d820553b2e73e72a16a984599

commit r11-8223-g8535755af70f819d820553b2e73e72a16a984599
Author: Tamar Christina <tamar.christina@arm.com>
Date:   Fri Apr 16 16:58:50 2021 +0100

    SVE: Fix wrong sve predicate split (PR100048)
    
    The attached testcase generates the following paradoxical subregs when creating
    the predicates.
    
    (insn 22 21 23 2 (set (reg:VNx8BI 100)
            (subreg:VNx8BI (reg:VNx2BI 103) 0))
         (expr_list:REG_EQUAL (const_vector:VNx8BI [
                    (const_int 1 [0x1])
                    (const_int 0 [0])
                    (const_int 1 [0x1])
            (const_int 0 [0]) repeated x5
                ])
            (nil)))
    
    and
    
    (insn 15 14 16 2 (set (reg:VNx8BI 96)
            (subreg:VNx8BI (reg:VNx2BI 99) 0))
         (expr_list:REG_EQUAL (const_vector:VNx8BI [
                    (const_int 1 [0x1])
                    (const_int 0 [0]) repeated x7
                ])
            (nil)))
    
    This causes CSE to incorrectly think that the two predicates are equal because
    some of the significant bits get ignored due to the subreg.
    
    The attached patch instead makes it so it always looks at all 16-bits of the
    predicate, but in turn means we need to generate a TRN that matches the expected
    result mode.  In effect in RTL we keep the mode as VNx16BI but during codegen
    re-interpret them as the mode the predicate instruction wanted:
    
    (insn 10 9 11 2 (set (reg:VNx8BI 96)
            (subreg:VNx8BI (reg:VNx16BI 99) 0))
         (expr_list:REG_EQUAL (const_vector:VNx8BI [
                    (const_int 1 [0x1])
                    (const_int 0 [0]) repeated x7
                ])
            (nil)))
    
    Which needed correction to the TRN pattern.  A new TRN1_CONV unspec is
    introduced which allows one to keep the arguments as VNx16BI but encode the
    instruction as a type of the last operand.
    
    (insn 9 8 10 2 (set (reg:VNx16BI 99)
            (unspec:VNx16BI [
                    (reg:VNx16BI 97)
                    (reg:VNx16BI 98)
                    (reg:VNx2BI 100)
                ] UNSPEC_TRN1_CONV))
            (nil))
    
    This allows us remove all the paradoxical subregs and end up with
    
    (insn 16 15 17 2 (set (reg:VNx8BI 101)
            (subreg:VNx8BI (reg:VNx16BI 104) 0))
            (expr_list:REG_EQUAL (const_vector:VNx8BI [
                    (const_int 1 [0x1])
                    (const_int 0 [0])
                    (const_int 1 [0x1])
                    (const_int 0 [0]) repeated x5
                ])
            (nil)))
    
    gcc/ChangeLog:
    
            PR target/100048
            * config/aarch64/aarch64-sve.md (@aarch64_sve_trn1_conv<mode>): New.
            * config/aarch64/aarch64.c (aarch64_expand_sve_const_pred_trn): Use new
            TRN optab.
            * config/aarch64/iterators.md (UNSPEC_TRN1_CONV): New.
    
    gcc/testsuite/ChangeLog:
    
            PR target/100048
            * gcc.target/aarch64/sve/pr100048.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64-sve.md               | 14 ++++++++++++++
 gcc/config/aarch64/aarch64.c                    | 10 +++++-----
 gcc/config/aarch64/iterators.md                 |  1 +
 gcc/testsuite/gcc.target/aarch64/sve/pr100048.c | 25 +++++++++++++++++++++++++
 4 files changed, 45 insertions(+), 5 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index 7db2938bb84..b8b6f55e160 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -8657,6 +8657,20 @@
   "<perm_insn>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
 )
 
+;; Special purpose permute used by the predicate generation instructions.
+;; Unlike the normal permute patterns, these instructions operate on VNx16BI
+;; regardless of the element size, so that all input and output bits are
+;; well-defined.  Operand 3 then indicates the size of the permute.
+(define_insn "@aarch64_sve_trn1_conv<mode>"
+  [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+	(unspec:VNx16BI [(match_operand:VNx16BI 1 "register_operand" "Upa")
+			 (match_operand:VNx16BI 2 "register_operand" "Upa")
+			 (match_operand:PRED_ALL 3 "aarch64_simd_imm_zero")]
+			UNSPEC_TRN1_CONV))]
+  "TARGET_SVE"
+  "trn1\t%0.<PRED_ALL:Vetype>, %1.<PRED_ALL:Vetype>, %2.<PRED_ALL:Vetype>"
+)
+
 ;; =========================================================================
 ;; == Conversions
 ;; =========================================================================
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 04b55d9070b..09d79f67a61 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -5535,12 +5535,12 @@ aarch64_expand_sve_const_pred_trn (rtx target, rtx_vector_builder &builder,
 	}
     }
 
-  /* Emit the TRN1 itself.  */
+  /* Emit the TRN1 itself.  We emit a TRN that operates on VNx16BI
+     operands but permutes them as though they had mode MODE.  */
   machine_mode mode = aarch64_sve_pred_mode (permute_size).require ();
-  target = aarch64_target_reg (target, mode);
-  emit_insn (gen_aarch64_sve (UNSPEC_TRN1, mode, target,
-			      gen_lowpart (mode, a),
-			      gen_lowpart (mode, b)));
+  target = aarch64_target_reg (target, GET_MODE (a));
+  rtx type_reg = CONST0_RTX (mode);
+  emit_insn (gen_aarch64_sve_trn1_conv (mode, target, a, b, type_reg));
   return target;
 }
 
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 5f5abd60525..cac33ae812b 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -649,6 +649,7 @@
     UNSPEC_UZP2Q	; Used in aarch64-sve.md.
     UNSPEC_ZIP1Q	; Used in aarch64-sve.md.
     UNSPEC_ZIP2Q	; Used in aarch64-sve.md.
+    UNSPEC_TRN1_CONV	; Used in aarch64-sve.md.
     UNSPEC_COND_CMPEQ_WIDE ; Used in aarch64-sve.md.
     UNSPEC_COND_CMPGE_WIDE ; Used in aarch64-sve.md.
     UNSPEC_COND_CMPGT_WIDE ; Used in aarch64-sve.md.
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr100048.c b/gcc/testsuite/gcc.target/aarch64/sve/pr100048.c
new file mode 100644
index 00000000000..525933863f7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr100048.c
@@ -0,0 +1,25 @@
+/* { dg-additional-options "-O2 -fno-schedule-insns" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "arm_sve.h"
+
+/*
+** foo:
+**        ptrue   (p[0-7])\.d, all
+**        pfalse  (p[0-7])\.b
+**        ptrue   (p[0-7])\.s, all
+**        trn1    (p[0-7])\.d, \2\.d, \3\.d
+**        trn1    \2\.d, \1\.d, \3\.d
+**        faddv   (h[0-31]), \4\, (z[0-31]).h
+**        faddv   (h[0-31]), \2\, \6\.h
+**        str     \5, [x0]
+**        str     \7, [x0, 2]
+**        ret
+*/
+void foo(svfloat16_t in, float16_t *dst) {
+  const svbool_t pg_q0 = svdupq_n_b16(1, 0, 1, 0, 0, 0, 0, 0);
+  const svbool_t pg_f0 = svdupq_n_b16(1, 0, 0, 0, 0, 0, 0, 0);
+  dst[0] = svaddv_f16(pg_f0, in);
+  dst[1] = svaddv_f16(pg_q0, in);
+}
+

                 reply	other threads:[~2021-04-16 15:59 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210416155941.4763F38930D6@sourceware.org \
    --to=tnfchris@gcc.gnu.org \
    --cc=gcc-cvs@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).