public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r11-11135] Don't assume it's AVX_U128_CLEAN after call_insn whose abi.mode_clobber(V4DImode) deosn't contains a
@ 2023-12-12 10:49 hongtao Liu
  0 siblings, 0 replies; only message in thread
From: hongtao Liu @ 2023-12-12 10:49 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:172b7ad97c46594d44aeb73dce03daff5f575cfb

commit r11-11135-g172b7ad97c46594d44aeb73dce03daff5f575cfb
Author: liuhongt <hongtao.liu@intel.com>
Date:   Thu Dec 7 09:17:27 2023 +0800

    Don't assume it's AVX_U128_CLEAN after call_insn whose abi.mode_clobber(V4DImode) deosn't contains all SSE_REGS.
    
    If the function desn't clobber any sse registers or only clobber
    128-bit part, then vzeroupper isn't issued before the function exit.
    the status not CLEAN but ANY after the function.
    
    Also for sibling_call, it's safe to issue an vzeroupper. Also there
    could be missing vzeroupper since there's no mode_exit for
    sibling_call_p.
    
    gcc/ChangeLog:
    
            PR target/112891
            * config/i386/i386.c (ix86_avx_u128_mode_after): Return
            AVX_U128_ANY if callee_abi doesn't clobber all_sse_regs to
            align with ix86_avx_u128_mode_needed.
            (ix86_avx_u128_mode_needed): Return AVX_U128_ClEAN for
            sibling_call.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/i386/pr112891.c: New test.
            * gcc.target/i386/pr112891-2.c: New test.
    
    (cherry picked from commit fc189a08f5b7ad5889bd4c6b320c1dd99dd5d642)

Diff:
---
 gcc/config/i386/i386.c                     | 22 +++++++++++++++++++---
 gcc/testsuite/gcc.target/i386/pr112891-2.c | 30 ++++++++++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr112891.c   | 29 +++++++++++++++++++++++++++++
 3 files changed, 78 insertions(+), 3 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index e9fcb0a16a6..87034a22633 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -14119,8 +14119,12 @@ ix86_avx_u128_mode_needed (rtx_insn *insn)
 	 modes wider than 256 bits.  It's only safe to issue a
 	 vzeroupper if all SSE registers are clobbered.  */
       const function_abi &abi = insn_callee_abi (insn);
-      if (!hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
-				  abi.mode_clobbers (V4DImode)))
+      /* Should be safe to issue an vzeroupper before sibling_call_p.
+	 Also there not mode_exit for sibling_call, so there could be
+	 missing vzeroupper for that.  */
+      if (!(SIBLING_CALL_P (insn)
+	    || hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
+				      abi.mode_clobbers (V4DImode))))
 	return AVX_U128_ANY;
 
       return AVX_U128_CLEAN;
@@ -14244,7 +14248,19 @@ ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
       bool avx_upper_reg_found = false;
       note_stores (insn, ix86_check_avx_upper_stores, &avx_upper_reg_found);
 
-      return avx_upper_reg_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
+      if (avx_upper_reg_found)
+	return AVX_U128_DIRTY;
+
+      /* If the function desn't clobber any sse registers or only clobber
+	 128-bit part, Then vzeroupper isn't issued before the function exit.
+	 the status not CLEAN but ANY after the function.  */
+      const function_abi &abi = insn_callee_abi (insn);
+      if (!(SIBLING_CALL_P (insn)
+	    || hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
+				      abi.mode_clobbers (V4DImode))))
+	return AVX_U128_ANY;
+
+      return AVX_U128_CLEAN;
     }
 
   /* Otherwise, return current mode.  Remember that if insn
diff --git a/gcc/testsuite/gcc.target/i386/pr112891-2.c b/gcc/testsuite/gcc.target/i386/pr112891-2.c
new file mode 100644
index 00000000000..164c3985d50
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr112891-2.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx2 -O3" } */
+/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
+
+void
+__attribute__((noinline))
+bar (double* a)
+{
+  a[0] = 1.0;
+  a[1] = 2.0;
+}
+
+double
+__attribute__((noinline))
+foo (double* __restrict a, double* b)
+{
+  a[0] += b[0];
+  a[1] += b[1];
+  a[2] += b[2];
+  a[3] += b[3];
+  bar (b);
+  return a[5] + b[5];
+}
+
+double
+foo1 (double* __restrict a, double* b)
+{
+  double c = foo (a, b);
+  return __builtin_exp (c);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr112891.c b/gcc/testsuite/gcc.target/i386/pr112891.c
new file mode 100644
index 00000000000..dbf6c67948a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr112891.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx2 -O3" } */
+/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
+
+void
+__attribute__((noinline))
+bar (double* a)
+{
+  a[0] = 1.0;
+  a[1] = 2.0;
+}
+
+void
+__attribute__((noinline))
+foo (double* __restrict a, double* b)
+{
+  a[0] += b[0];
+  a[1] += b[1];
+  a[2] += b[2];
+  a[3] += b[3];
+  bar (b);
+}
+
+double
+foo1 (double* __restrict a, double* b)
+{
+  foo (a, b);
+  return __builtin_exp (b[1]);
+}

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-12-12 10:49 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-12-12 10:49 [gcc r11-11135] Don't assume it's AVX_U128_CLEAN after call_insn whose abi.mode_clobber(V4DImode) deosn't contains a hongtao Liu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).