From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2078) id 78A343835425; Mon, 29 Nov 2021 09:46:23 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 78A343835425 MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: hongtao Liu To: gcc-cvs@gcc.gnu.org Subject: [gcc r12-5574] Optimize _Float16 usage for non AVX512FP16. X-Act-Checkin: gcc X-Git-Author: liuhongt X-Git-Refname: refs/heads/master X-Git-Oldrev: 9519b694afbf9a35c36cf9f14d35d1c0e9e8cacc X-Git-Newrev: 11d0a2af33910c6d243e7265fb7ea04d2bc89b25 Message-Id: <20211129094623.78A343835425@sourceware.org> Date: Mon, 29 Nov 2021 09:46:23 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 29 Nov 2021 09:46:23 -0000 https://gcc.gnu.org/g:11d0a2af33910c6d243e7265fb7ea04d2bc89b25 commit r12-5574-g11d0a2af33910c6d243e7265fb7ea04d2bc89b25 Author: liuhongt Date: Mon Nov 29 10:01:42 2021 +0800 Optimize _Float16 usage for non AVX512FP16. 1. No memory is needed to move HI/HFmode between GPR and SSE registers under TARGET_SSE2 and above, pinsrw/pextrw are used for them w/o AVX512FP16. 2. Use gen_sse2_pinsrph/gen_vec_setv4sf_0 to replace ix86_expand_vector_set in extendhfsf2/truncsfhf2 so that redundant initialization cound be eliminated. gcc/ChangeLog: PR target/102811 * config/i386/i386.c (inline_secondary_memory_needed): HImode move between GPR and SSE registers is supported under TARGET_SSE2 and above. * config/i386/i386.md (extendhfsf2): Optimize expander. (truncsfhf2): Ditto. * config/i386/sse.md (sse2p4_1): Adjust attr for V8HFmode to align with V8HImode. gcc/testsuite/ChangeLog: * gcc.target/i386/pr102811-2.c: New test. * gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c: Add new scan-assembler-times. Diff: --- gcc/config/i386/i386.c | 5 +++-- gcc/config/i386/i386.md | 18 ++++++++++++++---- gcc/config/i386/sse.md | 2 +- .../gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c | 2 +- gcc/testsuite/gcc.target/i386/pr102811-2.c | 22 ++++++++++++++++++++++ 5 files changed, 41 insertions(+), 8 deletions(-) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 7cf599f57f7..2657e7817ae 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -19437,8 +19437,9 @@ inline_secondary_memory_needed (machine_mode mode, reg_class_t class1, if (msize > UNITS_PER_WORD) return true; - /* In addition to SImode moves, AVX512FP16 also enables HImode moves. */ - int minsize = GET_MODE_SIZE (TARGET_AVX512FP16 ? HImode : SImode); + /* In addition to SImode moves, HImode moves are supported for SSE2 and above, + Use vmovw with AVX512FP16, or pinsrw/pextrw without AVX512FP16. */ + int minsize = GET_MODE_SIZE (TARGET_SSE2 ? HImode : SImode); if (msize < minsize) return true; diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 12ea5135cd7..a384dae23e2 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -4608,9 +4608,18 @@ if (!TARGET_AVX512FP16) { rtx res = gen_reg_rtx (V4SFmode); - rtx tmp = force_reg (V8HFmode, CONST0_RTX (V8HFmode)); + rtx tmp = gen_reg_rtx (V8HFmode); + rtx zero = force_reg (V8HFmode, CONST0_RTX (V8HFmode)); - ix86_expand_vector_set (false, tmp, operands[1], 0); + if (TARGET_AVX2) + { + rtx dup = gen_reg_rtx (V8HFmode); + emit_move_insn (dup, gen_rtx_VEC_DUPLICATE (V8HFmode, operands[1])); + emit_move_insn (tmp, gen_rtx_VEC_MERGE (V8HFmode, dup, + zero, const1_rtx)); + } + else + emit_insn (gen_sse2_pinsrph (tmp, zero, operands[1], const1_rtx)); emit_insn (gen_vcvtph2ps (res, gen_lowpart (V8HImode, tmp))); emit_move_insn (operands[0], gen_lowpart (SFmode, res)); DONE; @@ -4824,9 +4833,10 @@ if (!TARGET_AVX512FP16) { rtx res = gen_reg_rtx (V8HFmode); - rtx tmp = force_reg (V4SFmode, CONST0_RTX (V4SFmode)); + rtx tmp = gen_reg_rtx (V4SFmode); + rtx zero = force_reg (V4SFmode, CONST0_RTX (V4SFmode)); - ix86_expand_vector_set (false, tmp, operands[1], 0); + emit_insn (gen_vec_setv4sf_0 (tmp, zero, operands[1])); emit_insn (gen_vcvtps2ph (gen_lowpart (V8HImode, res), tmp, GEN_INT (4))); emit_move_insn (operands[0], gen_lowpart (HFmode, res)); DONE; diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 5229b23af98..b371b140eb1 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -17272,7 +17272,7 @@ (V2DI "TARGET_SSE4_1 && TARGET_64BIT")]) (define_mode_attr sse2p4_1 - [(V16QI "sse4_1") (V8HI "sse2") (V8HF "sse4_1") + [(V16QI "sse4_1") (V8HI "sse2") (V8HF "sse2") (V4SI "sse4_1") (V2DI "sse4_1")]) (define_mode_attr pinsr_evex_isa diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c index dfbfb167953..9a6c432c866 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O2 -mf16c -mno-avx512fp16" } */ -/* { dg-final { scan-assembler-times "vpxor\[ \\t\]" 2 } } */ +/* { dg-final { scan-assembler-times "vpxor\[ \\t\]" 1 } } */ /* { dg-final { scan-assembler-times "vcvtph2ps\[ \\t\]" 2 } } */ /* { dg-final { scan-assembler-times "vcvtps2ph\[ \\t\]" 1 } } */ /* { dg-final { scan-assembler-not "__truncsfhf2\[ \\t\]"} } */ diff --git a/gcc/testsuite/gcc.target/i386/pr102811-2.c b/gcc/testsuite/gcc.target/i386/pr102811-2.c new file mode 100644 index 00000000000..e511c665ae8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr102811-2.c @@ -0,0 +1,22 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mf16c -mno-avx512fp16" } */ +/* { dg-final { scan-assembler-times "pextrw" 1 } } */ +/* { dg-final { scan-assembler-times "pinsrw" 1 } } */ +/* { dg-final { scan-assembler-not "\\\(%rsp\\\)"} } */ +short test (_Float16 a) +{ + union{ + short b; + _Float16 a;}u; + u.a = a; + return u.b; +} + +_Float16 test1 (short a) +{ + union{ + _Float16 b; + short a;}u; + u.a = a; + return u.b; +}