Re: [PATCH] i386: Omit clobbers from vzeroupper until final [PR92190]

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

From: Jakub Jelinek <jakub@redhat.com>
To: Uros Bizjak <ubizjak@gmail.com>
Cc: Richard Sandiford <richard.sandiford@arm.com>,
	       "gcc-patches@gcc.gnu.org" <gcc-patches@gcc.gnu.org>
Subject: Re: [PATCH] i386: Omit clobbers from vzeroupper until final [PR92190]
Date: Tue, 04 Feb 2020 13:13:00 -0000	[thread overview]
Message-ID: <20200204131257.GR17695@tucnak> (raw)
In-Reply-To: <CAFULd4YqVix5_0Sbcw3W1nDgztye07P2Yw=nfCLW=ba8yVQrjA@mail.gmail.com>

On Tue, Feb 04, 2020 at 01:38:51PM +0100, Uros Bizjak wrote:
> As Richard advised, let's put this safety stuff back. Usually, in
> i386.md, these kind of splitters are implemented as two patterns, one
> (define_insn_and_split) having "#", and the other (define_insn) with a
> real insn. My opinion is, that this separation avoids confusion as
> much as possible.

Okay.  So like this if it passes bootstrap/regtest then?

2020-02-04  Jakub Jelinek  <jakub@redhat.com>

	PR target/92190
	* config/i386/i386-features.c (ix86_add_reg_usage_to_vzeroupper): Only
	include sets and not clobbers in the vzeroupper pattern.
	* config/i386/sse.md (*avx_vzeroupper): Require in insn condition that
	the parallel has 17 (64-bit) or 9 (32-bit) elts.
	(*avx_vzeroupper_1): New define_insn_and_split.

	* gcc.target/i386/pr92190.c: New test.

--- gcc/config/i386/i386-features.c.jj	2020-02-04 13:33:32.713885386 +0100
+++ gcc/config/i386/i386-features.c	2020-02-04 13:55:44.358058104 +0100
@@ -1764,29 +1764,32 @@ convert_scalars_to_vector (bool timode_p
 
      (set (reg:V2DF R) (reg:V2DF R))
 
-   which preserves the low 128 bits but clobbers the upper bits.
-   For a dead register we just use:
-
-     (clobber (reg:V2DF R))
-
-   which invalidates any previous contents of R and stops R from becoming
-   live across the vzeroupper in future.  */
+   which preserves the low 128 bits but clobbers the upper bits.  */
 
 static void
 ix86_add_reg_usage_to_vzeroupper (rtx_insn *insn, bitmap live_regs)
 {
   rtx pattern = PATTERN (insn);
   unsigned int nregs = TARGET_64BIT ? 16 : 8;
-  rtvec vec = rtvec_alloc (nregs + 1);
-  RTVEC_ELT (vec, 0) = XVECEXP (pattern, 0, 0);
+  unsigned int npats = nregs;
   for (unsigned int i = 0; i < nregs; ++i)
     {
       unsigned int regno = GET_SSE_REGNO (i);
+      if (!bitmap_bit_p (live_regs, regno))
+	npats--;
+    }
+  if (npats == 0)
+    return;
+  rtvec vec = rtvec_alloc (npats + 1);
+  RTVEC_ELT (vec, 0) = XVECEXP (pattern, 0, 0);
+  for (unsigned int i = 0, j = 0; i < nregs; ++i)
+    {
+      unsigned int regno = GET_SSE_REGNO (i);
+      if (!bitmap_bit_p (live_regs, regno))
+	continue;
       rtx reg = gen_rtx_REG (V2DImode, regno);
-      if (bitmap_bit_p (live_regs, regno))
-	RTVEC_ELT (vec, i + 1) = gen_rtx_SET (reg, reg);
-      else
-	RTVEC_ELT (vec, i + 1) = gen_rtx_CLOBBER (VOIDmode, reg);
+      ++j;
+      RTVEC_ELT (vec, j) = gen_rtx_SET (reg, reg);
     }
   XVEC (pattern, 0) = vec;
   df_insn_rescan (insn);
--- gcc/config/i386/sse.md.jj	2020-02-04 13:33:32.733885088 +0100
+++ gcc/config/i386/sse.md	2020-02-04 13:57:38.995349722 +0100
@@ -19818,11 +19818,49 @@ (define_expand "avx_vzeroupper"
 (define_insn "*avx_vzeroupper"
   [(match_parallel 0 "vzeroupper_pattern"
      [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
-  "TARGET_AVX"
+  "TARGET_AVX && XVECLEN (operands[0], 0) == (TARGET_64BIT ? 16 : 8) + 1"
   "vzeroupper"
   [(set_attr "type" "sse")
    (set_attr "modrm" "0")
    (set_attr "memory" "none")
+   (set_attr "prefix" "vex")
+   (set_attr "btver2_decode" "vector")
+   (set_attr "mode" "OI")])
+
+(define_insn_and_split "*avx_vzeroupper_1"
+  [(match_parallel 0 "vzeroupper_pattern"
+     [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
+  "TARGET_AVX && XVECLEN (operands[0], 0) != (TARGET_64BIT ? 16 : 8) + 1"
+  "#"
+  "&& epilogue_completed"
+  [(match_dup 0)]
+{
+  /* For IPA-RA purposes, make it clear the instruction clobbers
+     even XMM registers not mentioned explicitly in the pattern.  */
+  unsigned int nregs = TARGET_64BIT ? 16 : 8;
+  unsigned int npats = XVECLEN (operands[0], 0);
+  rtvec vec = rtvec_alloc (nregs + 1);
+  RTVEC_ELT (vec, 0) = XVECEXP (operands[0], 0, 0);
+  for (unsigned int i = 0, j = 1; i < nregs; ++i)
+    {
+      unsigned int regno = GET_SSE_REGNO (i);
+      if (j < npats
+	  && REGNO (SET_DEST (XVECEXP (operands[0], 0, j))) == regno)
+	{
+	  RTVEC_ELT (vec, i + 1) = XVECEXP (operands[0], 0, j);
+	  j++;
+	}
+      else
+	{
+	  rtx reg = gen_rtx_REG (V2DImode, regno);
+	  RTVEC_ELT (vec, i + 1) = gen_rtx_CLOBBER (VOIDmode, reg);
+	}
+    }
+  operands[0] = gen_rtx_PARALLEL (VOIDmode, vec);
+}
+  [(set_attr "type" "sse")
+   (set_attr "modrm" "0")
+   (set_attr "memory" "none")
    (set_attr "prefix" "vex")
    (set_attr "btver2_decode" "vector")
    (set_attr "mode" "OI")])
--- gcc/testsuite/gcc.target/i386/pr92190.c.jj	2020-02-04 13:55:44.364058015 +0100
+++ gcc/testsuite/gcc.target/i386/pr92190.c	2020-02-04 13:55:44.364058015 +0100
@@ -0,0 +1,19 @@
+/* PR target/92190 */
+/* { dg-do compile { target { *-*-linux* && lp64 } } } */
+/* { dg-options "-mabi=ms -O2 -mavx512f" } */
+
+typedef char VC __attribute__((vector_size (16)));
+typedef int VI __attribute__((vector_size (16 * sizeof 0)));
+VC a;
+VI b;
+void bar (VI);
+void baz (VC);
+
+void
+foo (void)
+{
+  VC k = a;
+  VI n = b;
+  bar (n);
+  baz (k);
+}


	Jakub

next prev parent reply	other threads:[~2020-02-04 13:13 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-02-04  9:39 Jakub Jelinek
2020-02-04 10:16 ` Uros Bizjak
2020-02-04 11:05   ` Jakub Jelinek
2020-02-04 11:13     ` Uros Bizjak
2020-02-04 11:24       ` Uros Bizjak
2020-02-04 12:06         ` Richard Sandiford
2020-02-04 12:23           ` Uros Bizjak
2020-02-08  1:50           ` Segher Boessenkool
2020-02-04 12:31         ` Jakub Jelinek
2020-02-04 12:39           ` Uros Bizjak
2020-02-04 13:13             ` Jakub Jelinek [this message]
2020-02-04 13:15               ` Uros Bizjak
2020-02-05 10:05                 ` Jakub Jelinek
2020-02-05 10:12                   ` Jakub Jelinek
2020-02-05 10:47                   ` Uros Bizjak
2020-02-05 11:03                     ` Jakub Jelinek
2020-02-05 11:11                       ` Uros Bizjak
2020-02-05 11:24                         ` Jakub Jelinek
2020-02-04 11:42   ` [PATCH] i386: Make xmm16-xmm31 call used even in ms ABI Jakub Jelinek
2020-02-06  1:00     ` JonY
2020-02-06  6:07       ` Jakub Jelinek
2020-02-07 10:57         ` JonY
2020-02-07 11:28           ` Jakub Jelinek
2020-02-08  8:24             ` JonY
2020-02-08 10:05               ` Jakub Jelinek
2020-02-08 10:32                 ` Uros Bizjak
2020-02-08 10:34                   ` Jakub Jelinek
2020-02-08 10:52                   ` Jakub Jelinek
2020-02-08 12:52                     ` Uros Bizjak

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200204131257.GR17695@tucnak \
    --to=jakub@redhat.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=richard.sandiford@arm.com \
    --cc=ubizjak@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).