From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2078) id 257D53858D32; Wed, 28 Jun 2023 01:54:25 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 257D53858D32 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1687917265; bh=5H+b1w5O8D8TiZkzWtYDDrHoSX6uNHs3Nekj35OIxk0=; h=From:To:Subject:Date:From; b=hpNXVfbQ+V7QWVjopB+/wv8+qV8A/fppXCtQs4O4P14ciTrXtA084ar0WJvOzf0Np vG3c9B9+j/od0JWgRspsU7V1FOxz0YDxs4WWnQAi5eT2b7bp3qIgkuObWQ2g4ppv19 iJoQ1xR3x0iqxLp0Pzxoaxr8NBV6htwI3c8M5qXY= MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: hongtao Liu To: gcc-cvs@gcc.gnu.org Subject: [gcc r12-9734] Make option mvzeroupper independent of optimization level. X-Act-Checkin: gcc X-Git-Author: liuhongt X-Git-Refname: refs/heads/releases/gcc-12 X-Git-Oldrev: 5b9b636513cd17447ea708f2dba21978cb96909b X-Git-Newrev: 41b037966ce4ad9b6381def1e4933111d1974da1 Message-Id: <20230628015425.257D53858D32@sourceware.org> Date: Wed, 28 Jun 2023 01:54:25 +0000 (GMT) List-Id: https://gcc.gnu.org/g:41b037966ce4ad9b6381def1e4933111d1974da1 commit r12-9734-g41b037966ce4ad9b6381def1e4933111d1974da1 Author: liuhongt Date: Mon Jun 26 09:50:25 2023 +0800 Make option mvzeroupper independent of optimization level. pass_insert_vzeroupper is under condition TARGET_AVX && TARGET_VZEROUPPER && flag_expensive_optimizations && !optimize_size But the document of mvzeroupper doesn't mention the insertion required -O2 and above, it may confuse users when they explicitly use -Os -mvzeroupper. ------------ mvzeroupper Target Mask(VZEROUPPER) Save Generate vzeroupper instruction before a transfer of control flow out of the function. ------------ The patch moves flag_expensive_optimizations && !optimize_size to ix86_option_override_internal. It makes -mvzeroupper independent of optimization level, but still keeps the behavior of architecture tuning(emit_vzeroupper) unchanged. gcc/ChangeLog: * config/i386/i386-features.cc (pass_insert_vzeroupper:gate): Move flag_expensive_optimizations && !optimize_size to .. * config/i386/i386-options.cc (ix86_option_override_internal): .. this, it makes -mvzeroupper independent of optimization level, but still keeps the behavior of architecture tuning(emit_vzeroupper) unchanged. gcc/testsuite/ChangeLog: * gcc.target/i386/avx-vzeroupper-29.c: New testcase. * gcc.target/i386/avx-vzeroupper-12.c: Adjust testcase. * gcc.target/i386/avx-vzeroupper-7.c: Ditto. * gcc.target/i386/avx-vzeroupper-9.c: Ditto. Diff: --- gcc/config/i386/i386-features.cc | 3 +-- gcc/config/i386/i386-options.cc | 4 +++- gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c | 3 ++- gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c | 14 ++++++++++++++ gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c | 3 ++- gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c | 3 ++- 6 files changed, 24 insertions(+), 6 deletions(-) diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc index 6fe41c3c24f..6a2444eb6b6 100644 --- a/gcc/config/i386/i386-features.cc +++ b/gcc/config/i386/i386-features.cc @@ -1875,8 +1875,7 @@ public: /* opt_pass methods: */ virtual bool gate (function *) { - return TARGET_AVX && TARGET_VZEROUPPER - && flag_expensive_optimizations && !optimize_size; + return TARGET_AVX && TARGET_VZEROUPPER; } virtual unsigned int execute (function *) diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc index 099cec4b610..70ddcd284c4 100644 --- a/gcc/config/i386/i386-options.cc +++ b/gcc/config/i386/i386-options.cc @@ -2676,7 +2676,9 @@ ix86_option_override_internal (bool main_args_p, sorry ("%<-mcall-ms2sysv-xlogues%> isn%'t currently supported with SEH"); if (!(opts_set->x_target_flags & MASK_VZEROUPPER) - && TARGET_EMIT_VZEROUPPER) + && TARGET_EMIT_VZEROUPPER + && flag_expensive_optimizations + && !optimize_size) opts->x_target_flags |= MASK_VZEROUPPER; if (!(opts_set->x_target_flags & MASK_STV)) opts->x_target_flags |= MASK_STV; diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c index e694d4048bd..5a40e87832c 100644 --- a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c +++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c @@ -16,5 +16,6 @@ foo () _mm256_zeroupper (); } -/* { dg-final { scan-assembler-times "avx_vzeroupper" 4 } } */ +/* { dg-final { scan-assembler-times "avx_vzeroupper" 4 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "avx_vzeroupper" 5 { target { ! ia32 } } } } */ /* { dg-final { scan-assembler-times "\\*avx_vzeroall" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c new file mode 100644 index 00000000000..4af637757f7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O0 -mavx -mtune=generic -mvzeroupper -dp" } */ + +#include + +extern __m256 x, y; + +void +foo () +{ + x = y; +} + +/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c index ab6d68779b3..75fe5889783 100644 --- a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c +++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c @@ -12,4 +12,5 @@ foo () _mm256_zeroupper (); } -/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 } } */ +/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "avx_vzeroupper" 2 { target { ! ia32 } } } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c index 974e1626a6d..fa0a6dfcaac 100644 --- a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c +++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c @@ -15,4 +15,5 @@ foo () _mm256_zeroupper (); } -/* { dg-final { scan-assembler-times "avx_vzeroupper" 4 } } */ +/* { dg-final { scan-assembler-times "avx_vzeroupper" 4 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "avx_vzeroupper" 5 { target { ! ia32 } } } } */