From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 95273 invoked by alias); 28 Feb 2019 19:10:19 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Received: (qmail 95260 invoked by uid 89); 28 Feb 2019 19:10:18 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-26.2 required=5.0 tests=BAYES_00,FREEMAIL_FROM,GIT_PATCH_0,GIT_PATCH_1,GIT_PATCH_2,GIT_PATCH_3,SPF_SOFTFAIL autolearn=ham version=3.3.2 spammy=ia32, ymm9,1, evex, ymm6 X-HELO: mga18.intel.com Received: from mga18.intel.com (HELO mga18.intel.com) (134.134.136.126) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Thu, 28 Feb 2019 19:10:15 +0000 Received: from orsmga002.jf.intel.com ([10.7.209.21]) by orsmga106.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 28 Feb 2019 11:10:11 -0800 Received: from gnu-cfl-1.sc.intel.com ([172.25.70.237]) by orsmga002.jf.intel.com with ESMTP; 28 Feb 2019 11:10:11 -0800 From: "H.J. Lu" To: gcc-patches@gcc.gnu.org Cc: Uros Bizjak Subject: [PATCH] x32: Add addr32 prefix to UNSPEC_VSIBADDR instructions Date: Thu, 28 Feb 2019 19:22:00 -0000 Message-Id: <20190228191011.22517-1-hjl.tools@gmail.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-IsSubscribed: yes X-SW-Source: 2019-02/txt/msg02111.txt.bz2 32-bit indices in VSIB address are sign-extended to 64 bits. In x32, when 32-bit indices are used as addresses, like in vgatherdps %ymm7, 0(,%ymm9,1), %ymm6 32-bit indices, 0xf7fa3010, is sign-extended to 0xfffffffff7fa3010 which is invalid address. Add addr32 prefix to UNSPEC_VSIBADDR instructions for x32 if there is no base register nor symbol. This fixes 175.vpr and 254.gap in SPEC CPU 2000 on x32 with -Ofast -funroll-loops -march=haswell gcc/ PR target/89523 * config/i386/i386.c (ix86_print_operand): Also handle '_' to add addr32 prefix if required. (ix86_print_operand_punct_valid_p): Allow '_'. * config/i386/sse.md (*avx512pf_gatherpfsf_mask): Prepend "%_". (*avx512pf_gatherpfdf_mask): Likewise. (*avx512pf_scatterpfsf_mask): Likewise. (*avx512pf_scatterpfdf_mask): Likewise. (*avx2_gathersi): Likewise. (*avx2_gathersi_2): Likewise. (*avx2_gatherdi): Likewise. (*avx2_gatherdi_2): Likewise. (*avx2_gatherdi_3): Likewise. (*avx2_gatherdi_4): Likewise. (*avx512f_gathersi): Likewise. (*avx512f_gathersi_2): Likewise. (*avx512f_gatherdi): Likewise. (*avx512f_gatherdi_2): Likewise. (*avx512f_scattersi): Likewise. (*avx512f_scatterdi): Likewise. gcc/testsuite/ PR target/89523 * gcc.target/i386/pr89523-1.c: New test. * gcc.target/i386/pr89523-2.c: Likewise. * gcc.target/i386/pr89523-3.c: Likewise. * gcc.target/i386/pr89523-4.c: Likewise. * gcc.target/i386/pr89523-5.c: Likewise. * gcc.target/i386/pr89523-6.c: Likewise. * gcc.target/i386/pr89523-7.c: Likewise. * gcc.target/i386/pr89523-8.c: Likewise. * gcc.target/i386/pr89523-9.c: Likewise. xxx --- gcc/config/i386/i386.c | 39 ++++++++++++++++++- gcc/config/i386/sse.md | 46 +++++++++++------------ gcc/testsuite/gcc.target/i386/pr89523-1.c | 24 ++++++++++++ gcc/testsuite/gcc.target/i386/pr89523-2.c | 17 +++++++++ gcc/testsuite/gcc.target/i386/pr89523-3.c | 17 +++++++++ gcc/testsuite/gcc.target/i386/pr89523-4.c | 16 ++++++++ gcc/testsuite/gcc.target/i386/pr89523-5.c | 18 +++++++++ gcc/testsuite/gcc.target/i386/pr89523-6.c | 17 +++++++++ gcc/testsuite/gcc.target/i386/pr89523-7.c | 19 ++++++++++ gcc/testsuite/gcc.target/i386/pr89523-8.c | 19 ++++++++++ gcc/testsuite/gcc.target/i386/pr89523-9.c | 16 ++++++++ 11 files changed, 224 insertions(+), 24 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-2.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-3.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-4.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-5.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-6.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-7.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-8.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-9.c diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index b8357a7db5d..336696136de 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -17805,6 +17805,7 @@ print_reg (rtx x, int code, FILE *file) ~ -- print "i" if TARGET_AVX2, "f" otherwise. ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode ! -- print NOTRACK prefix for jxx/call/ret instructions if required. + _ -- print addr32 prefix if required. */ void @@ -18356,6 +18357,42 @@ ix86_print_operand (FILE *file, rtx x, int code) fputs ("addr32 ", file); return; + case '_': + if (TARGET_X32) + { + subrtx_var_iterator::array_type array; + FOR_EACH_SUBRTX_VAR (iter, array, + PATTERN (current_output_insn), ALL) + { + rtx addr = *iter; + if (!MEM_P (addr)) + continue; + addr = XEXP (addr, 0); + if (GET_CODE (addr) == UNSPEC + && XINT (addr, 1) == UNSPEC_VSIBADDR) + { + /* NB: 32-bit indices in VSIB address are + sign-extended to 64 bits. In x32, if 32-bit + address 0xf7fa3010 is sign-extended to + 0xfffffffff7fa3010 which is invalid address. + Add addr32 prefix if there is no base register + nor symbol. */ + bool ok; + struct ix86_address parts; + ok = ix86_decompose_address (XVECEXP (addr, 0, 0), + &parts); + gcc_assert (ok && parts.index == NULL_RTX); + if (parts.base == NULL_RTX + && (parts.disp == NULL_RTX + || !symbolic_operand (parts.disp, + GET_MODE (parts.disp)))) + fputs ("addr32 ", file); + break; + } + } + } + return; + case '!': if (ix86_notrack_prefixed_insn_p (current_output_insn)) fputs ("notrack ", file); @@ -18507,7 +18544,7 @@ static bool ix86_print_operand_punct_valid_p (unsigned char code) { return (code == '*' || code == '+' || code == '&' || code == ';' - || code == '~' || code == '^' || code == '!'); + || code == '~' || code == '^' || code == '!' || code == '_'); } /* Print a memory operand whose address is ADDR. */ diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index ac299495b2c..13692e47123 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -17401,9 +17401,9 @@ case 3: /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as gas changed what it requires incompatibly. */ - return "vgatherpf0ps\t{%5%{%0%}|%X5%{%0%}}"; + return "%_vgatherpf0ps\t{%5%{%0%}|%X5%{%0%}}"; case 2: - return "vgatherpf1ps\t{%5%{%0%}|%X5%{%0%}}"; + return "%_vgatherpf1ps\t{%5%{%0%}|%X5%{%0%}}"; default: gcc_unreachable (); } @@ -17448,9 +17448,9 @@ case 3: /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as gas changed what it requires incompatibly. */ - return "vgatherpf0pd\t{%5%{%0%}|%X5%{%0%}}"; + return "%_vgatherpf0pd\t{%5%{%0%}|%X5%{%0%}}"; case 2: - return "vgatherpf1pd\t{%5%{%0%}|%X5%{%0%}}"; + return "%_vgatherpf1pd\t{%5%{%0%}|%X5%{%0%}}"; default: gcc_unreachable (); } @@ -17496,10 +17496,10 @@ case 7: /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as gas changed what it requires incompatibly. */ - return "vscatterpf0ps\t{%5%{%0%}|%X5%{%0%}}"; + return "%_vscatterpf0ps\t{%5%{%0%}|%X5%{%0%}}"; case 2: case 6: - return "vscatterpf1ps\t{%5%{%0%}|%X5%{%0%}}"; + return "%_vscatterpf1ps\t{%5%{%0%}|%X5%{%0%}}"; default: gcc_unreachable (); } @@ -17545,10 +17545,10 @@ case 7: /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as gas changed what it requires incompatibly. */ - return "vscatterpf0pd\t{%5%{%0%}|%X5%{%0%}}"; + return "%_vscatterpf0pd\t{%5%{%0%}|%X5%{%0%}}"; case 2: case 6: - return "vscatterpf1pd\t{%5%{%0%}|%X5%{%0%}}"; + return "%_vscatterpf1pd\t{%5%{%0%}|%X5%{%0%}}"; default: gcc_unreachable (); } @@ -20292,7 +20292,7 @@ UNSPEC_GATHER)) (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))] "TARGET_AVX2" - "vgatherd\t{%1, %7, %0|%0, %7, %1}" + "%_vgatherd\t{%1, %7, %0|%0, %7, %1}" [(set_attr "type" "ssemov") (set_attr "prefix" "vex") (set_attr "mode" "")]) @@ -20312,7 +20312,7 @@ UNSPEC_GATHER)) (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))] "TARGET_AVX2" - "vgatherd\t{%1, %6, %0|%0, %6, %1}" + "%_vgatherd\t{%1, %6, %0|%0, %6, %1}" [(set_attr "type" "ssemov") (set_attr "prefix" "vex") (set_attr "mode" "")]) @@ -20353,7 +20353,7 @@ UNSPEC_GATHER)) (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))] "TARGET_AVX2" - "vgatherq\t{%5, %7, %2|%2, %7, %5}" + "%_vgatherq\t{%5, %7, %2|%2, %7, %5}" [(set_attr "type" "ssemov") (set_attr "prefix" "vex") (set_attr "mode" "")]) @@ -20375,8 +20375,8 @@ "TARGET_AVX2" { if (mode != mode) - return "vgatherq\t{%4, %6, %x0|%x0, %6, %4}"; - return "vgatherq\t{%4, %6, %0|%0, %6, %4}"; + return "%_vgatherq\t{%4, %6, %x0|%x0, %6, %4}"; + return "%_vgatherq\t{%4, %6, %0|%0, %6, %4}"; } [(set_attr "type" "ssemov") (set_attr "prefix" "vex") @@ -20400,7 +20400,7 @@ (const_int 2) (const_int 3)]))) (clobber (match_scratch:VI4F_256 1 "=&x"))] "TARGET_AVX2" - "vgatherq\t{%5, %7, %0|%0, %7, %5}" + "%_vgatherq\t{%5, %7, %0|%0, %7, %5}" [(set_attr "type" "ssemov") (set_attr "prefix" "vex") (set_attr "mode" "")]) @@ -20423,7 +20423,7 @@ (const_int 2) (const_int 3)]))) (clobber (match_scratch:VI4F_256 1 "=&x"))] "TARGET_AVX2" - "vgatherq\t{%4, %6, %0|%0, %6, %4}" + "%_vgatherq\t{%4, %6, %0|%0, %6, %4}" [(set_attr "type" "ssemov") (set_attr "prefix" "vex") (set_attr "mode" "")]) @@ -20463,7 +20463,7 @@ "TARGET_AVX512F" ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as ;; gas changed what it requires incompatibly. - "vgatherd\t{%6, %0%{%2%}|%0%{%2%}, %X6}" + "%_vgatherd\t{%6, %0%{%2%}|%0%{%2%}, %X6}" [(set_attr "type" "ssemov") (set_attr "prefix" "evex") (set_attr "mode" "")]) @@ -20484,7 +20484,7 @@ "TARGET_AVX512F" ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as ;; gas changed what it requires incompatibly. - "vgatherd\t{%5, %0%{%1%}|%0%{%1%}, %X5}" + "%_vgatherd\t{%5, %0%{%1%}|%0%{%1%}, %X5}" [(set_attr "type" "ssemov") (set_attr "prefix" "evex") (set_attr "mode" "")]) @@ -20525,7 +20525,7 @@ "TARGET_AVX512F" ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as ;; gas changed what it requires incompatibly. - "vgatherq\t{%6, %1%{%2%}|%1%{%2%}, %X6}" + "%_vgatherq\t{%6, %1%{%2%}|%1%{%2%}, %X6}" [(set_attr "type" "ssemov") (set_attr "prefix" "evex") (set_attr "mode" "")]) @@ -20550,11 +20550,11 @@ if (mode != mode) { if ( != 64) - return "vgatherq\t{%5, %x0%{%1%}|%x0%{%1%}, %X5}"; + return "%_vgatherq\t{%5, %x0%{%1%}|%x0%{%1%}, %X5}"; else - return "vgatherq\t{%5, %t0%{%1%}|%t0%{%1%}, %X5}"; + return "%_vgatherq\t{%5, %t0%{%1%}|%t0%{%1%}, %X5}"; } - return "vgatherq\t{%5, %0%{%1%}|%0%{%1%}, %X5}"; + return "%_vgatherq\t{%5, %0%{%1%}|%0%{%1%}, %X5}"; } [(set_attr "type" "ssemov") (set_attr "prefix" "evex") @@ -20593,7 +20593,7 @@ "TARGET_AVX512F" ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as ;; gas changed what it requires incompatibly. - "vscatterd\t{%3, %5%{%1%}|%X5%{%1%}, %3}" + "%_vscatterd\t{%3, %5%{%1%}|%X5%{%1%}, %3}" [(set_attr "type" "ssemov") (set_attr "prefix" "evex") (set_attr "mode" "")]) @@ -20631,7 +20631,7 @@ "TARGET_AVX512F" ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as ;; gas changed what it requires incompatibly. - "vscatterq\t{%3, %5%{%1%}|%X5%{%1%}, %3}" + "%_vscatterq\t{%3, %5%{%1%}|%X5%{%1%}, %3}" [(set_attr "type" "ssemov") (set_attr "prefix" "evex") (set_attr "mode" "")]) diff --git a/gcc/testsuite/gcc.target/i386/pr89523-1.c b/gcc/testsuite/gcc.target/i386/pr89523-1.c new file mode 100644 index 00000000000..f7ed24d1592 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr89523-1.c @@ -0,0 +1,24 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-require-effective-target maybe_x32 } */ +/* { dg-options "-mx32 -Ofast -funroll-loops -march=haswell" } */ +/* { dg-final { scan-assembler-not "\tvgather" } } */ +/* { dg-final { scan-assembler "addr32 vgather" } } */ + +void foo (void); + +extern float *ncost; + +float +bar (int type, int num) +{ + int i; + float cost; + + cost = 0; + for (i = 0; i < num; i++) + if (type) + cost += ncost[i]; + else + foo (); + return (cost); +} diff --git a/gcc/testsuite/gcc.target/i386/pr89523-2.c b/gcc/testsuite/gcc.target/i386/pr89523-2.c new file mode 100644 index 00000000000..7423f579b5e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr89523-2.c @@ -0,0 +1,17 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-require-effective-target maybe_x32 } */ +/* { dg-options "-mx32 -O2 -march=haswell" } */ +/* { dg-final { scan-assembler "\tvgather" } } */ +/* { dg-final { scan-assembler-not "addr32 vgather" } } */ + +#include + +__m128d x; +double *base; +__m128i idx; + +void extern +avx2_test (void) +{ + x = _mm_i32gather_pd (base, idx, 1); +} diff --git a/gcc/testsuite/gcc.target/i386/pr89523-3.c b/gcc/testsuite/gcc.target/i386/pr89523-3.c new file mode 100644 index 00000000000..606f9aac659 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr89523-3.c @@ -0,0 +1,17 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-require-effective-target maybe_x32 } */ +/* { dg-options "-mx32 -O2 -march=haswell" } */ +/* { dg-final { scan-assembler "\tvgather" } } */ +/* { dg-final { scan-assembler-not "addr32 vgather" } } */ + +#include + +__m128d x; +double *base; +__m128i idx; + +void extern +avx2_test (void) +{ + x = _mm_i64gather_pd (base, idx, 1); +} diff --git a/gcc/testsuite/gcc.target/i386/pr89523-4.c b/gcc/testsuite/gcc.target/i386/pr89523-4.c new file mode 100644 index 00000000000..155b818191f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr89523-4.c @@ -0,0 +1,16 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-require-effective-target maybe_x32 } */ +/* { dg-options "-mx32 -O2 -march=haswell" } */ +/* { dg-final { scan-assembler-not "\tvgather" } } */ +/* { dg-final { scan-assembler "addr32 vgather" } } */ + +#include + +__m128d x; +__m128i idx; + +void extern +avx2_test (void) +{ + x = _mm_i32gather_pd (NULL, idx, 1); +} diff --git a/gcc/testsuite/gcc.target/i386/pr89523-5.c b/gcc/testsuite/gcc.target/i386/pr89523-5.c new file mode 100644 index 00000000000..11210ff8f78 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr89523-5.c @@ -0,0 +1,18 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-require-effective-target maybe_x32 } */ +/* { dg-options "-mx32 -O2 -mavx512pf" } */ +/* { dg-final { scan-assembler "\tvgather" } } */ +/* { dg-final { scan-assembler-not "addr32 vgather" } } */ + +#include + +volatile __m256i idx; +volatile __mmask8 m8; +void *base; + +void extern +avx512pf_test (void) +{ + _mm512_prefetch_i32gather_pd (idx, base, 8, _MM_HINT_T0); + _mm512_mask_prefetch_i32gather_pd (idx, m8, base, 8, _MM_HINT_T0); +} diff --git a/gcc/testsuite/gcc.target/i386/pr89523-6.c b/gcc/testsuite/gcc.target/i386/pr89523-6.c new file mode 100644 index 00000000000..0254ad435e3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr89523-6.c @@ -0,0 +1,17 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-require-effective-target maybe_x32 } */ +/* { dg-options "-mx32 -O2 -mavx512pf" } */ +/* { dg-final { scan-assembler-not "\tvgather" } } */ +/* { dg-final { scan-assembler "addr32 vgather" } } */ + +#include + +volatile __m256i idx; +volatile __mmask8 m8; + +void extern +avx512pf_test (void) +{ + _mm512_prefetch_i32gather_pd (idx, NULL, 8, _MM_HINT_T0); + _mm512_mask_prefetch_i32gather_pd (idx, m8, NULL, 8, _MM_HINT_T0); +} diff --git a/gcc/testsuite/gcc.target/i386/pr89523-7.c b/gcc/testsuite/gcc.target/i386/pr89523-7.c new file mode 100644 index 00000000000..1c357bc8505 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr89523-7.c @@ -0,0 +1,19 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-require-effective-target maybe_x32 } */ +/* { dg-options "-mx32 -O2 -mavx512f" } */ +/* { dg-final { scan-assembler "\tvscatter" } } */ +/* { dg-final { scan-assembler-not "addr32 vscatter" } } */ + +#include + +volatile __m512d src; +volatile __m256i idx; +volatile __mmask8 m8; +double *addr; + +void extern +avx512f_test (void) +{ + _mm512_i32scatter_pd (addr, idx, src, 8); + _mm512_mask_i32scatter_pd (addr, m8, idx, src, 8); +} diff --git a/gcc/testsuite/gcc.target/i386/pr89523-8.c b/gcc/testsuite/gcc.target/i386/pr89523-8.c new file mode 100644 index 00000000000..37b0a0bebb7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr89523-8.c @@ -0,0 +1,19 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-require-effective-target maybe_x32 } */ +/* { dg-options "-mx32 -O2 -mavx512f" } */ +/* { dg-final { scan-assembler "\tvscatter" } } */ +/* { dg-final { scan-assembler-not "addr32 vscatter" } } */ + +#include + +volatile __m512d src; +volatile __m512i idx; +volatile __mmask8 m8; +double *addr; + +void extern +avx512f_test (void) +{ + _mm512_i64scatter_pd (addr, idx, src, 8); + _mm512_mask_i64scatter_pd (addr, m8, idx, src, 8); +} diff --git a/gcc/testsuite/gcc.target/i386/pr89523-9.c b/gcc/testsuite/gcc.target/i386/pr89523-9.c new file mode 100644 index 00000000000..a878f1e9efb --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr89523-9.c @@ -0,0 +1,16 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-require-effective-target maybe_x32 } */ +/* { dg-options "-mx32 -O2 -mavx512f" } */ +/* { dg-final { scan-assembler-not "\tvscatter" } } */ +/* { dg-final { scan-assembler "addr32 vscatter" } } */ + +#include + +volatile __m512d src; +volatile __m256i idx; + +void extern +avx512f_test (void) +{ + _mm512_i32scatter_pd (NULL, idx, src, 8); +} -- 2.20.1