public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
From: Jakub Jelinek <jakub@gcc.gnu.org>
To: gcc-cvs@gcc.gnu.org
Subject: [gcc r10-10611] Fix incorrect optimization by cprop_hardreg.
Date: Tue, 10 May 2022 08:17:56 +0000 (GMT)	[thread overview]
Message-ID: <20220510081757.005053836013@sourceware.org> (raw)

https://gcc.gnu.org/g:0372a414e7500dccab1eb423a2a620645c820a52

commit r10-10611-g0372a414e7500dccab1eb423a2a620645c820a52
Author: liuhongt <hongtao.liu@intel.com>
Date:   Mon Jan 18 16:55:32 2021 +0800

    Fix incorrect optimization by cprop_hardreg.
    
    If SRC had been assigned a mode narrower than the copy, we can't
    always link DEST into the chain even they have same
    hard_regno_nregs(i.e. HImode/SImode in i386 backend).
    
    i.e
            kmovw   %k0, %edi
            vmovd   %edi, %xmm2
            vpshuflw        $0, %xmm2, %xmm0
            kmovw   %k0, %r8d
            kmovd   %k0, %r9d
    ...
    -        movl %r9d, %r11d
    +        vmovd %xmm2, %r11d
    
    gcc/ChangeLog:
    
            PR rtl-optimization/98694
            * regcprop.c (copy_value): If SRC had been assigned a mode
            narrower than the copy, we can't link DEST into the chain even
            they have same hard_regno_nregs(i.e. HImode/SImode in i386
            backend).
    
    gcc/testsuite/ChangeLog:
    
            PR rtl-optimization/98694
            * gcc.target/i386/pr98694.c: New test.
    
    (cherry picked from commit e711b67a9081ae84c66174a50705dc98ba993a43)

Diff:
---
 gcc/regcprop.c                          | 29 +++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr98694.c | 41 +++++++++++++++++++++++++++++++++
 2 files changed, 70 insertions(+)

diff --git a/gcc/regcprop.c b/gcc/regcprop.c
index d2a01130fe1..73d8bcbfec6 100644
--- a/gcc/regcprop.c
+++ b/gcc/regcprop.c
@@ -358,6 +358,35 @@ copy_value (rtx dest, rtx src, struct value_data *vd)
   else if (sn > hard_regno_nregs (sr, vd->e[sr].mode))
     return;
 
+  /* It is not safe to link DEST into the chain if SRC was defined in some
+     narrower mode M and if M is also narrower than the mode of the first
+     register in the chain.  For example:
+     (set (reg:DI r1) (reg:DI r0))
+     (set (reg:HI r2) (reg:HI r1))
+     (set (reg:SI r3) (reg:SI r2)) //Should be a new chain start at r3
+     (set (reg:SI r4) (reg:SI r1))
+     (set (reg:SI r5) (reg:SI r4))
+
+     the upper part of r3 is undefined.  If we added it to the chain,
+     it may be used to replace r5, which has defined upper bits.
+     See PR98694 for details.
+
+     [A] partial_subreg_p (vd->e[sr].mode, GET_MODE (src))
+     [B] partial_subreg_p (vd->e[sr].mode, vd->e[vd->e[sr].oldest_regno].mode)
+     Condition B is added to to catch optimization opportunities of
+
+     (set (reg:HI R1) (reg:HI R0))
+     (set (reg:SI R2) (reg:SI R1)) // [A]
+     (set (reg:DI R3) (reg:DI R2)) // [A]
+     (set (reg:SI R4) (reg:SI R[0-3]))
+     (set (reg:HI R5) (reg:HI R[0-4]))
+
+     in which all registers have only 16 defined bits.  */
+  else if (partial_subreg_p (vd->e[sr].mode, GET_MODE (src))
+	   && partial_subreg_p (vd->e[sr].mode,
+				vd->e[vd->e[sr].oldest_regno].mode))
+    return;
+
   /* Link DR at the end of the value chain used by SR.  */
 
   vd->e[dr].oldest_regno = vd->e[sr].oldest_regno;
diff --git a/gcc/testsuite/gcc.target/i386/pr98694.c b/gcc/testsuite/gcc.target/i386/pr98694.c
new file mode 100644
index 00000000000..45889d482c1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr98694.c
@@ -0,0 +1,41 @@
+/* PR rtl-optimization/98694 */
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512bw" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include<immintrin.h>
+typedef short v4hi __attribute__ ((vector_size (8)));
+typedef int v2si __attribute__ ((vector_size (8)));
+v4hi b;
+
+__attribute__ ((noipa))
+v2si
+foo (__m512i src1, __m512i src2)
+{
+  __mmask64 m = _mm512_cmpeq_epu8_mask (src1, src2);
+  short s = (short) m;
+  int i = (int)m;
+  b = __extension__ (v4hi) {s, s, s, s};
+  return __extension__ (v2si) {i, i};
+}
+
+int main ()
+{
+  if (!__builtin_cpu_supports ("avx512bw"))
+    return 0;
+
+  __m512i src1 = _mm512_setzero_si512 ();
+  __m512i src2 = _mm512_set_epi8 (0, 1, 0, 1, 0, 1, 0, 1,
+				  0, 1, 0, 1, 0, 1, 0, 1,
+				  0, 1, 0, 1, 0, 1, 0, 1,
+				  0, 1, 0, 1, 0, 1, 0, 1,
+				  0, 1, 0, 1, 0, 1, 0, 1,
+				  0, 1, 0, 1, 0, 1, 0, 1,
+				  0, 1, 0, 1, 0, 1, 0, 1,
+				  0, 1, 0, 1, 0, 1, 0, 1);
+  __mmask64 m = _mm512_cmpeq_epu8_mask (src1, src2);
+  v2si a = foo (src1, src2);
+  if (a[0] != (int)m)
+    __builtin_abort ();
+  return 0;
+}


                 reply	other threads:[~2022-05-10  8:17 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220510081757.005053836013@sourceware.org \
    --to=jakub@gcc.gnu.org \
    --cc=gcc-cvs@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).