From a52b3c8a90a0bf6cbda8ce86d99c82c6182863a7 Mon Sep 17 00:00:00 2001 From: liuhongt Date: Mon, 18 Jan 2021 16:55:32 +0800 Subject: [PATCH] [PR rtl/optimization/98694] Fix incorrect optimization by cprop_hardreg. If SRC had been assigned a mode narrower than the copy, we can't link DEST into the chain even they have same hard_regno_nregs(i.e. HImode/SImode in i386 backend). i.e kmovw %k0, %edi vmovd %edi, %xmm2 vpshuflw $0, %xmm2, %xmm0 kmovw %k0, %r8d kmovd %k0, %r9d ... - movl %r9d, %r11d + vmovd %xmm2, %r11d gcc/ChangeLog: PR rtl-optimization/98694 * regcprop.c (copy_value): If SRC had been assigned a mode narrower than the copy, we can't link DEST into the chain even they have same hard_regno_nregs(i.e. HImode/SImode in i386 backend). gcc/testsuite/ChangeLog: PR rtl-optimization/98694 * gcc.target/i386/pr98694.c: New test. --- gcc/regcprop.c | 33 +++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr98694.c | 38 +++++++++++++++++++++++++ 2 files changed, 71 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/pr98694.c diff --git a/gcc/regcprop.c b/gcc/regcprop.c index dd62cb36013..908298beaea 100644 --- a/gcc/regcprop.c +++ b/gcc/regcprop.c @@ -358,6 +358,39 @@ copy_value (rtx dest, rtx src, struct value_data *vd) else if (sn > hard_regno_nregs (sr, vd->e[sr].mode)) return; + /* If SRC had been assigned a mode narrower than the copy, Although + they have same hard_regno_nregs, it's not safe to link DEST into the + chain. .i.e. + (set (reg:DI r1) (reg:DI r0)) + (set (reg:HI r2) (reg:HI r1)) + (set (reg:SI r3) (reg:SI r2)) //Should be a new chain start at r3 + (set (reg:SI r4) (reg:SI r1)) + (set (reg:SI r5) (reg:SI r4)) + the upper part of r3 is undefined, if adding it to the chain, it may be + prop to r5 which has defined upper bits, .i.e. pr98694. + + [A] partial_subreg_p (vd->e[sr].mode, GET_MODE (src)) + [B] partial_subreg_p (vd->e[sr].mode, vd->e[vd->e[sr].oldest_regno].mode) + Condition B is added to to catch optimization opportunities of + + (set (reg:HI R1) (reg:HI R0)) + (set (reg:SI R2) (reg:SI R1)) // [A] + (set (reg:DI R3) (reg:DI R2)) // [A] + (set (reg:SI R4) (reg:SI R[0-3])) + (set (reg:HI R5) (reg:HI R[0-4])) + + but problematic for + + (set (reg:SI R1) (reg:SI R0)) + (set (reg:HI R2) (reg:HI R1)) + (set (reg:SI R3) (reg:SI R2)) // [A] && [B] + + to be fixed???? */ + else if (partial_subreg_p (vd->e[sr].mode, GET_MODE (src)) + && partial_subreg_p (vd->e[sr].mode, + vd->e[vd->e[sr].oldest_regno].mode)) + return; + /* Link DR at the end of the value chain used by SR. */ vd->e[dr].oldest_regno = vd->e[sr].oldest_regno; diff --git a/gcc/testsuite/gcc.target/i386/pr98694.c b/gcc/testsuite/gcc.target/i386/pr98694.c new file mode 100644 index 00000000000..611f9e77627 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr98694.c @@ -0,0 +1,38 @@ +/* PR rtl-optimization/98694 */ +/* { dg-do run { target { ! ia32 } } } */ +/* { dg-options "-O2 -mavx512bw" } */ +/* { dg-require-effective-target avx512bw } */ + +#include +typedef short v4hi __attribute__ ((vector_size (8))); +typedef int v2si __attribute__ ((vector_size (8))); +v4hi b; + +__attribute__ ((noipa)) +v2si +foo (__m512i src1, __m512i src2) +{ + __mmask64 m = _mm512_cmpeq_epu8_mask (src1, src2); + short s = (short) m; + int i = (int)m; + b = __extension__ (v4hi) {s, s, s, s}; + return __extension__ (v2si) {i, i}; +} + +int main () +{ + __m512i src1 = _mm512_setzero_si512 (); + __m512i src2 = _mm512_set_epi8 (0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1); + __mmask64 m = _mm512_cmpeq_epu8_mask (src1, src2); + v2si a = foo (src1, src2); + if (a[0] != (int)m) + __builtin_abort (); + return 0; +} -- 2.18.1