public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Richard Biener <rguenther@suse.de>
To: gcc-patches@gcc.gnu.org
Cc: jakub@redhat.com, ubizjak@gmail.com
Subject: [PATCH] target/98856 - split vpinsrq with new peephole2
Date: Mon, 8 Mar 2021 12:04:22 +0100 (CET)	[thread overview]
Message-ID: <nycvar.YFH.7.76.2103081203060.23959@elmra.sevgm.obk> (raw)

This reduces the latency of a V2DImode construction from two
GPRs by avoiding the dependence on the GPR->XMM move with the
used vpinsrq instruction and instead allow the two GPR->XMM moves
to be concurrently executed and scheduled, performing the insert
using vpunpcklqdq.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

OK for trunk or do we want to defer this to GCC 12, maybe
unless we can also solve the spilling in PR98856 which would
then fix the performance regression?

2021-03-05  Richard Biener  <rguenther@suse.de>

	PR target/98856
	* config/i386/sse.md (vpinsrq peephole): New peephole2
	splitting vpinsrq to a vmovq and vpunpcklqdq.

	* gcc.target/i386/pr98856.c: New testcase.
	* gcc.target/i386/avx512dq-concatv2di-1.c: Adjust.
	* gcc.target/i386/avx512vl-concatv2di-1.c: Likewise.
---
 gcc/config/i386/sse.md                        | 15 +++++++++++
 .../gcc.target/i386/avx512dq-concatv2di-1.c   |  4 +--
 .../gcc.target/i386/avx512vl-concatv2di-1.c   |  2 +-
 gcc/testsuite/gcc.target/i386/pr98856.c       | 25 +++++++++++++++++++
 4 files changed, 43 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr98856.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index ca4372d4164..7c9be80540b 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1427,6 +1427,21 @@
   DONE;
 })
 
+;; Further split pinsrq variants of vec_concatv2di to hide the latency
+;; the GPR->XMM transition(s).
+(define_peephole2
+  [(match_scratch:DI 3 "Yv")
+   (set (match_operand:V2DI 0 "sse_reg_operand")
+	(vec_concat:V2DI (match_operand:DI 1 "sse_reg_operand")
+			 (match_operand:DI 2 "nonimmediate_gr_operand")))]
+  "TARGET_64BIT && TARGET_SSE4_1
+   && !optimize_insn_for_size_p ()"
+  [(set (match_dup 3)
+        (match_dup 2))
+   (set (match_dup 0)
+	(vec_concat:V2DI (match_dup 1)
+			 (match_dup 3)))])
+
 ;; Merge movsd/movhpd to movupd for TARGET_SSE_UNALIGNED_LOAD_OPTIMAL targets.
 (define_peephole2
   [(set (match_operand:V2DF 0 "sse_reg_operand")
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-concatv2di-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-concatv2di-1.c
index 82cb402575b..ac652bb1382 100644
--- a/gcc/testsuite/gcc.target/i386/avx512dq-concatv2di-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-concatv2di-1.c
@@ -14,7 +14,7 @@ f1 (long long x, long long y)
   asm volatile ("" : "+v" (c));
 }
 
-/* { dg-final { scan-assembler "vpinsrq\[^\n\r]*\\\$1\[^\n\r]*%rsi\[^\n\r]*%xmm16\[^\n\r]*%xmm17" } } */
+/* { dg-final { scan-assembler "vpunpcklqdq\[^\n\r]*%xmm16\[^\n\r]*%xmm17" } } */
 
 void
 f2 (long long x, long long *y)
@@ -27,7 +27,7 @@ f2 (long long x, long long *y)
   asm volatile ("" : "+v" (c));
 }
 
-/* { dg-final { scan-assembler "vpinsrq\[^\n\r]*\\\$1\[^\n\r]*%\[re]si\[^\n\r]*%xmm18\[^\n\r]*%xmm19" } } */
+/* { dg-final { scan-assembler "vpunpcklqdq\[^\n\r]*%xmm18\[^\n\r]*%xmm19" } } */
 
 void
 f3 (long long x)
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-concatv2di-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-concatv2di-1.c
index 8e637071aa2..b8300371a21 100644
--- a/gcc/testsuite/gcc.target/i386/avx512vl-concatv2di-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-concatv2di-1.c
@@ -28,7 +28,7 @@ f2 (long long x, long long *y)
   asm volatile ("" : "+v" (c));
 }
 
-/* { dg-final { scan-assembler "vmovhps\[^\n\r]*%\[re]si\[^\n\r]*%xmm18\[^\n\r]*%xmm19" } } */
+/* { dg-final { scan-assembler "vpunpcklqdq\[^\n\r]*%xmm18\[^\n\r]*%xmm19" } } */
 
 void
 f3 (long long x)
diff --git a/gcc/testsuite/gcc.target/i386/pr98856.c b/gcc/testsuite/gcc.target/i386/pr98856.c
new file mode 100644
index 00000000000..1ea24d0f1fb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr98856.c
@@ -0,0 +1,25 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-O3 -march=znver2" } */
+
+typedef __UINT64_TYPE__ uint64_t;
+void poly_double_le2 (unsigned char *out, const unsigned char *in)
+{
+  uint64_t W[2];
+
+  __builtin_memcpy (&W, in, 16);
+  uint64_t carry = (W[1] >> 63) * 135;
+  W[1] = (W[1] << 1) ^ (W[0] >> 63);
+  W[0] = (W[0] << 1) ^ carry;
+  __builtin_memcpy (out, &W[0], 8);
+  __builtin_memcpy (out + 8, &W[1], 8);
+}
+
+/* We should split 
+     vpinsrq $1, %rax, %xmm0, %xmm0
+   to
+     vmovq %rax, %xmm1
+     vpunpcklqdq %xmm0, %xmm1, %xmm0
+   to better hide the latency of the GPR->XMM transitions.  */
+
+/* { dg-final { scan-assembler-not "pinsrq" } } */
+/* { dg-final { scan-assembler-times "punpcklqdq" 1 } } */
-- 
2.26.2

             reply	other threads:[~2021-03-08 11:04 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-08 11:04 Richard Biener [this message]
2021-03-08 11:16 ` Jakub Jelinek
2021-03-08 12:01   ` Richard Biener

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=nycvar.YFH.7.76.2103081203060.23959@elmra.sevgm.obk \
    --to=rguenther@suse.de \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=jakub@redhat.com \
    --cc=ubizjak@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).