public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-2571] x86: Don't set AVX_U128_DIRTY when zeroing YMM/ZMM register
@ 2021-07-28 14:29 H.J. Lu
0 siblings, 0 replies; only message in thread
From: H.J. Lu @ 2021-07-28 14:29 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:9775e465c1fbfc32656de77c618c61acf5bd905d
commit r12-2571-g9775e465c1fbfc32656de77c618c61acf5bd905d
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Tue Jul 27 07:46:04 2021 -0700
x86: Don't set AVX_U128_DIRTY when zeroing YMM/ZMM register
There is no SSE <-> AVX transition penalty if the upper bits of YMM/ZMM
registers are unchanged and YMM/ZMM store doesn't change the upper bits
of YMM/ZMM registers.
1. Since zeroing YMM/ZMM register is implemented with zeroing XMM
register, don't set AVX_U128_DIRTY when zeroing YMM/ZMM register.
2. Since store doesn't change the INIT state on the upper bits of
YMM/ZMM register, don't set AVX_U128_DIRTY on store if the source
of store was never non-zero.
Here are the vzeroupper count differences on SPEC CPU 2017 with
-Ofast -march=skylake-avx512
Before After Diff
500.perlbench_r 226 225 -0.44%
502.gcc_r 1263 1103 -12.67%
503.bwaves_r 14 14 0.00%
505.mcf_r 29 28 -3.45%
507.cactuBSSN_r 4651 4628 -0.49%
508.namd_r 433 432 -0.23%
510.parest_r 20380 19347 -5.07%
511.povray_r 495 452 -8.69%
519.lbm_r 2 2 0.00%
520.omnetpp_r 5954 5677 -4.65%
521.wrf_r 12353 12339 -0.11%
523.xalancbmk_r 13137 13001 -1.04%
525.x264_r 192 191 -0.52%
526.blender_r 2515 2366 -5.92%
527.cam4_r 4601 4583 -0.39%
531.deepsjeng_r 20 19 -5.00%
538.imagick_r 898 805 -10.36%
541.leela_r 427 399 -6.56%
544.nab_r 74 74 0.00%
548.exchange2_r 72 72 0.00%
549.fotonik3d_r 318 318 0.00%
554.roms_r 558 554 -0.72%
557.xz_r 79 52 -34.18%
and performance differences are within noise range.
gcc/
PR target/101456
* config/i386/i386.c (ix86_avx_u128_mode_needed): Don't set
AVX_U128_DIRTY when all bits are zero.
gcc/testsuite/
PR target/101456
* gcc.target/i386/pr101456-1.c: New test.
* gcc.target/i386/pr101456-2.c: Likewise.
Diff:
---
gcc/config/i386/i386.c | 88 ++++++++++++++++++++++++++++++
gcc/testsuite/gcc.target/i386/pr101456-1.c | 33 +++++++++++
gcc/testsuite/gcc.target/i386/pr101456-2.c | 33 +++++++++++
3 files changed, 154 insertions(+)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index ac59ebf8666..12ae37e7103 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -14149,6 +14149,94 @@ ix86_avx_u128_mode_needed (rtx_insn *insn)
return AVX_U128_CLEAN;
}
+ rtx set = single_set (insn);
+ if (set)
+ {
+ rtx dest = SET_DEST (set);
+ rtx src = SET_SRC (set);
+ if (ix86_check_avx_upper_register (dest))
+ {
+ /* This is an YMM/ZMM load. Return AVX_U128_DIRTY if the
+ source isn't zero. */
+ if (standard_sse_constant_p (src, GET_MODE (dest)) != 1)
+ return AVX_U128_DIRTY;
+ else
+ return AVX_U128_ANY;
+ }
+ else if (ix86_check_avx_upper_register (src))
+ {
+ /* This is an YMM/ZMM store. Check for the source operand
+ of SRC DEFs in the same basic block before INSN. */
+ basic_block bb = BLOCK_FOR_INSN (insn);
+ rtx_insn *end = BB_END (bb);
+
+ /* Return AVX_U128_DIRTY if there is no DEF in the same basic
+ block. */
+ int status = AVX_U128_DIRTY;
+
+ for (df_ref def = DF_REG_DEF_CHAIN (REGNO (src));
+ def; def = DF_REF_NEXT_REG (def))
+ if (DF_REF_BB (def) == bb)
+ {
+ /* Ignore DEF from different basic blocks. */
+ rtx_insn *def_insn = DF_REF_INSN (def);
+
+ /* Check if DEF_INSN is before INSN. */
+ rtx_insn *next;
+ for (next = NEXT_INSN (def_insn);
+ next != nullptr && next != end && next != insn;
+ next = NEXT_INSN (next))
+ ;
+
+ /* Skip if DEF_INSN isn't before INSN. */
+ if (next != insn)
+ continue;
+
+ /* Return AVX_U128_DIRTY if the source operand of
+ DEF_INSN isn't constant zero. */
+
+ if (CALL_P (def_insn))
+ {
+ bool avx_upper_reg_found = false;
+ note_stores (def_insn, ix86_check_avx_upper_stores,
+ &avx_upper_reg_found);
+
+ /* Return AVX_U128_DIRTY if call returns AVX. */
+ if (avx_upper_reg_found)
+ return AVX_U128_DIRTY;
+
+ continue;
+ }
+
+ set = single_set (def_insn);
+ if (!set)
+ return AVX_U128_DIRTY;
+
+ dest = SET_DEST (set);
+
+ /* Skip if DEF_INSN is not an AVX load. */
+ if (ix86_check_avx_upper_register (dest))
+ {
+ src = SET_SRC (set);
+ /* Return AVX_U128_DIRTY if the source operand isn't
+ constant zero. */
+ if (standard_sse_constant_p (src, GET_MODE (dest))
+ != 1)
+ return AVX_U128_DIRTY;
+ }
+
+ /* We get here only if all AVX loads are from constant
+ zero. */
+ status = AVX_U128_ANY;
+ }
+
+ return status;
+ }
+
+ /* This isn't YMM/ZMM load/store. */
+ return AVX_U128_ANY;
+ }
+
/* Require DIRTY mode if a 256bit or 512bit AVX register is referenced.
Hardware changes state only when a 256bit register is written to,
but we need to prevent the compiler from moving optimal insertion
diff --git a/gcc/testsuite/gcc.target/i386/pr101456-1.c b/gcc/testsuite/gcc.target/i386/pr101456-1.c
new file mode 100644
index 00000000000..803fc6e0207
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr101456-1.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake" } */
+
+#include <x86intrin.h>
+
+extern __m256 x1;
+extern __m256d x2;
+extern __m256i x3;
+
+extern void bar (void);
+
+void
+foo1 (void)
+{
+ x1 = _mm256_setzero_ps ();
+ bar ();
+}
+
+void
+foo2 (void)
+{
+ x2 = _mm256_setzero_pd ();
+ bar ();
+}
+
+void
+foo3 (void)
+{
+ x3 = _mm256_setzero_si256 ();
+ bar ();
+}
+
+/* { dg-final { scan-assembler-not "vzeroupper" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr101456-2.c b/gcc/testsuite/gcc.target/i386/pr101456-2.c
new file mode 100644
index 00000000000..554a0f1702c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr101456-2.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake" } */
+
+#include <x86intrin.h>
+
+extern __m256 x1;
+extern __m256d x2;
+extern __m256i x3;
+
+extern __m256 bar (void);
+
+void
+foo1 (void)
+{
+ bar ();
+ x1 = _mm256_setzero_ps ();
+}
+
+void
+foo2 (void)
+{
+ bar ();
+ x2 = _mm256_setzero_pd ();
+}
+
+void
+foo3 (void)
+{
+ bar ();
+ x3 = _mm256_setzero_si256 ();
+}
+
+/* { dg-final { scan-assembler-times "vzeroupper" 3 } } */
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2021-07-28 14:29 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-28 14:29 [gcc r12-2571] x86: Don't set AVX_U128_DIRTY when zeroing YMM/ZMM register H.J. Lu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).