From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2078) id 098AF385AC1C; Tue, 12 Dec 2023 10:49:42 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 098AF385AC1C DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1702378182; bh=om0hu8CpIGnpdmoIC9fjVuAzsE4xoOp323SQEOg2UJw=; h=From:To:Subject:Date:From; b=NfMfhwCYhAxFAFrbcH6jAZiw0Tou43WhA2ry9Ev7gbONF4eRbgVGXHljaGe4yg+Qh UGtJqnLH/bcy1YDCLCkICD74h7m7MH3nN58agjCYPiQ9hX06srxX1pBlL06+knAc9K B3C1gGLRWhq43bPzDtc81I8fELNRGWw1qmnU18ls= MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: hongtao Liu To: gcc-cvs@gcc.gnu.org Subject: [gcc r11-11135] Don't assume it's AVX_U128_CLEAN after call_insn whose abi.mode_clobber(V4DImode) deosn't contains a X-Act-Checkin: gcc X-Git-Author: liuhongt X-Git-Refname: refs/heads/releases/gcc-11 X-Git-Oldrev: 15c189a60376f03c5e3d6e32d468e6f3cce6d83a X-Git-Newrev: 172b7ad97c46594d44aeb73dce03daff5f575cfb Message-Id: <20231212104942.098AF385AC1C@sourceware.org> Date: Tue, 12 Dec 2023 10:49:42 +0000 (GMT) List-Id: https://gcc.gnu.org/g:172b7ad97c46594d44aeb73dce03daff5f575cfb commit r11-11135-g172b7ad97c46594d44aeb73dce03daff5f575cfb Author: liuhongt Date: Thu Dec 7 09:17:27 2023 +0800 Don't assume it's AVX_U128_CLEAN after call_insn whose abi.mode_clobber(V4DImode) deosn't contains all SSE_REGS. If the function desn't clobber any sse registers or only clobber 128-bit part, then vzeroupper isn't issued before the function exit. the status not CLEAN but ANY after the function. Also for sibling_call, it's safe to issue an vzeroupper. Also there could be missing vzeroupper since there's no mode_exit for sibling_call_p. gcc/ChangeLog: PR target/112891 * config/i386/i386.c (ix86_avx_u128_mode_after): Return AVX_U128_ANY if callee_abi doesn't clobber all_sse_regs to align with ix86_avx_u128_mode_needed. (ix86_avx_u128_mode_needed): Return AVX_U128_ClEAN for sibling_call. gcc/testsuite/ChangeLog: * gcc.target/i386/pr112891.c: New test. * gcc.target/i386/pr112891-2.c: New test. (cherry picked from commit fc189a08f5b7ad5889bd4c6b320c1dd99dd5d642) Diff: --- gcc/config/i386/i386.c | 22 +++++++++++++++++++--- gcc/testsuite/gcc.target/i386/pr112891-2.c | 30 ++++++++++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr112891.c | 29 +++++++++++++++++++++++++++++ 3 files changed, 78 insertions(+), 3 deletions(-) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index e9fcb0a16a6..87034a22633 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -14119,8 +14119,12 @@ ix86_avx_u128_mode_needed (rtx_insn *insn) modes wider than 256 bits. It's only safe to issue a vzeroupper if all SSE registers are clobbered. */ const function_abi &abi = insn_callee_abi (insn); - if (!hard_reg_set_subset_p (reg_class_contents[SSE_REGS], - abi.mode_clobbers (V4DImode))) + /* Should be safe to issue an vzeroupper before sibling_call_p. + Also there not mode_exit for sibling_call, so there could be + missing vzeroupper for that. */ + if (!(SIBLING_CALL_P (insn) + || hard_reg_set_subset_p (reg_class_contents[SSE_REGS], + abi.mode_clobbers (V4DImode)))) return AVX_U128_ANY; return AVX_U128_CLEAN; @@ -14244,7 +14248,19 @@ ix86_avx_u128_mode_after (int mode, rtx_insn *insn) bool avx_upper_reg_found = false; note_stores (insn, ix86_check_avx_upper_stores, &avx_upper_reg_found); - return avx_upper_reg_found ? AVX_U128_DIRTY : AVX_U128_CLEAN; + if (avx_upper_reg_found) + return AVX_U128_DIRTY; + + /* If the function desn't clobber any sse registers or only clobber + 128-bit part, Then vzeroupper isn't issued before the function exit. + the status not CLEAN but ANY after the function. */ + const function_abi &abi = insn_callee_abi (insn); + if (!(SIBLING_CALL_P (insn) + || hard_reg_set_subset_p (reg_class_contents[SSE_REGS], + abi.mode_clobbers (V4DImode)))) + return AVX_U128_ANY; + + return AVX_U128_CLEAN; } /* Otherwise, return current mode. Remember that if insn diff --git a/gcc/testsuite/gcc.target/i386/pr112891-2.c b/gcc/testsuite/gcc.target/i386/pr112891-2.c new file mode 100644 index 00000000000..164c3985d50 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr112891-2.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx2 -O3" } */ +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */ + +void +__attribute__((noinline)) +bar (double* a) +{ + a[0] = 1.0; + a[1] = 2.0; +} + +double +__attribute__((noinline)) +foo (double* __restrict a, double* b) +{ + a[0] += b[0]; + a[1] += b[1]; + a[2] += b[2]; + a[3] += b[3]; + bar (b); + return a[5] + b[5]; +} + +double +foo1 (double* __restrict a, double* b) +{ + double c = foo (a, b); + return __builtin_exp (c); +} diff --git a/gcc/testsuite/gcc.target/i386/pr112891.c b/gcc/testsuite/gcc.target/i386/pr112891.c new file mode 100644 index 00000000000..dbf6c67948a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr112891.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx2 -O3" } */ +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */ + +void +__attribute__((noinline)) +bar (double* a) +{ + a[0] = 1.0; + a[1] = 2.0; +} + +void +__attribute__((noinline)) +foo (double* __restrict a, double* b) +{ + a[0] += b[0]; + a[1] += b[1]; + a[2] += b[2]; + a[3] += b[3]; + bar (b); +} + +double +foo1 (double* __restrict a, double* b) +{ + foo (a, b); + return __builtin_exp (b[1]); +}