From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2078) id 235CF385841A; Fri, 26 Nov 2021 01:30:05 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 235CF385841A MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: hongtao Liu To: gcc-cvs@gcc.gnu.org Subject: [gcc r12-5536] i386: vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c [PR 102811] X-Act-Checkin: gcc X-Git-Author: konglin1 X-Git-Refname: refs/heads/master X-Git-Oldrev: 379be00f45f65e0e8de72a50553dd9d2bab6cc08 X-Git-Newrev: 90cb088ece8d8cc1019d25629d1585e5b0234179 Message-Id: <20211126013005.235CF385841A@sourceware.org> Date: Fri, 26 Nov 2021 01:30:05 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 26 Nov 2021 01:30:05 -0000 https://gcc.gnu.org/g:90cb088ece8d8cc1019d25629d1585e5b0234179 commit r12-5536-g90cb088ece8d8cc1019d25629d1585e5b0234179 Author: konglin1 Date: Wed Nov 10 09:37:32 2021 +0800 i386: vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c [PR 102811] Add define_insn extendhfsf2 and truncsfhf2 for target_f16c. gcc/ChangeLog: PR target/102811 * config/i386/i386.c (ix86_can_change_mode_class): Allow 16 bit data in XMM register for TARGET_SSE2. * config/i386/i386.md (extendhfsf2): Add extenndhfsf2 for TARGET_F16C. (extendhfdf2): Restrict extendhfdf for TARGET_AVX512FP16 only. (*extendhf2): Rename from extendhf2. (truncsfhf2): Likewise. (truncdfhf2): Likewise. (*trunc2): Likewise. gcc/testsuite/ChangeLog: PR target/102811 * gcc.target/i386/pr90773-21.c: Allow pextrw instead of movw. * gcc.target/i386/pr90773-23.c: Ditto. * gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c: New test. Diff: --- gcc/config/i386/i386.c | 5 +- gcc/config/i386/i386.md | 74 ++++++++++++++++++++-- .../gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c | 11 ++++ gcc/testsuite/gcc.target/i386/pr90773-21.c | 2 +- gcc/testsuite/gcc.target/i386/pr90773-23.c | 2 +- 5 files changed, 83 insertions(+), 11 deletions(-) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 10bfa0e7459..3dedf522c42 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -19522,9 +19522,8 @@ ix86_can_change_mode_class (machine_mode from, machine_mode to, disallow a change to these modes, reload will assume it's ok to drop the subreg from (subreg:SI (reg:HI 100) 0). This affects the vec_dupv4hi pattern. - NB: AVX512FP16 supports vmovw which can load 16bit data to sse - register. */ - int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_AVX512FP16 ? 2 : 4; + NB: SSE2 can load 16bit data to sse register via pinsrw. */ + int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 : 4; if (GET_MODE_SIZE (from) < mov_size) return false; } diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 03d401efff8..68606e57e60 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -2525,6 +2525,16 @@ case TYPE_SSEMOV: return ix86_output_ssemov (insn, operands); + case TYPE_SSELOG: + if (SSE_REG_P (operands[0])) + return MEM_P (operands[1]) + ? "pinsrw\t{$0, %1, %0|%0, %1, 0}" + : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}"; + else + return MEM_P (operands[1]) + ? "pextrw\t{$0, %1, %0|%0, %1, 0}" + : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}"; + case TYPE_MSKLOG: if (operands[1] == const0_rtx) return "kxorw\t%0, %0, %0"; @@ -2540,13 +2550,17 @@ } } [(set (attr "isa") - (cond [(eq_attr "alternative" "9,10,11,12,13") - (const_string "avx512fp16") + (cond [(eq_attr "alternative" "9,10,11,12") + (const_string "sse2") + (eq_attr "alternative" "13") + (const_string "sse4") ] (const_string "*"))) (set (attr "type") (cond [(eq_attr "alternative" "9,10,11,12,13") - (const_string "ssemov") + (if_then_else (match_test "TARGET_AVX512FP16") + (const_string "ssemov") + (const_string "sselog")) (eq_attr "alternative" "4,5,6,7") (const_string "mskmov") (eq_attr "alternative" "8") @@ -4574,8 +4588,32 @@ emit_move_insn (operands[0], CONST0_RTX (V2DFmode)); }) -(define_insn "extendhf2" - [(set (match_operand:MODEF 0 "nonimm_ssenomem_operand" "=v") +(define_expand "extendhfsf2" + [(set (match_operand:SF 0 "register_operand") + (float_extend:SF + (match_operand:HF 1 "nonimmediate_operand")))] + "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL" +{ + if (!TARGET_AVX512FP16) + { + rtx res = gen_reg_rtx (V4SFmode); + rtx tmp = force_reg (V8HFmode, CONST0_RTX (V8HFmode)); + + ix86_expand_vector_set (false, tmp, operands[1], 0); + emit_insn (gen_vcvtph2ps (res, gen_lowpart (V8HImode, tmp))); + emit_move_insn (operands[0], gen_lowpart (SFmode, res)); + DONE; + } +}) + +(define_expand "extendhfdf2" + [(set (match_operand:DF 0 "register_operand") + (float_extend:DF + (match_operand:HF 1 "nonimmediate_operand")))] + "TARGET_AVX512FP16") + +(define_insn "*extendhf2" + [(set (match_operand:MODEF 0 "register_operand" "=v") (float_extend:MODEF (match_operand:HF 1 "nonimmediate_operand" "vm")))] "TARGET_AVX512FP16" @@ -4766,7 +4804,31 @@ ;; Conversion from {SF,DF}mode to HFmode. -(define_insn "trunchf2" +(define_expand "truncsfhf2" + [(set (match_operand:HF 0 "register_operand") + (float_truncate:HF + (match_operand:SF 1 "nonimmediate_operand")))] + "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL" + { + if (!TARGET_AVX512FP16) + { + rtx res = gen_reg_rtx (V8HFmode); + rtx tmp = force_reg (V4SFmode, CONST0_RTX (V4SFmode)); + + ix86_expand_vector_set (false, tmp, operands[1], 0); + emit_insn (gen_vcvtps2ph (gen_lowpart (V8HImode, res), tmp, GEN_INT (4))); + emit_move_insn (operands[0], gen_lowpart (HFmode, res)); + DONE; + } + }) + +(define_expand "truncdfhf2" + [(set (match_operand:HF 0 "register_operand") + (float_truncate:HF + (match_operand:DF 1 "nonimmediate_operand")))] + "TARGET_AVX512FP16") + +(define_insn "*trunchf2" [(set (match_operand:HF 0 "register_operand" "=v") (float_truncate:HF (match_operand:MODEF 1 "nonimmediate_operand" "vm")))] diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c new file mode 100644 index 00000000000..dfbfb167953 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mf16c -mno-avx512fp16" } */ +/* { dg-final { scan-assembler-times "vpxor\[ \\t\]" 2 } } */ +/* { dg-final { scan-assembler-times "vcvtph2ps\[ \\t\]" 2 } } */ +/* { dg-final { scan-assembler-times "vcvtps2ph\[ \\t\]" 1 } } */ +/* { dg-final { scan-assembler-not "__truncsfhf2\[ \\t\]"} } */ +/* { dg-final { scan-assembler-not "__extendhfsf2\[ \\t\]"} } */ +_Float16 test (_Float16 a, _Float16 b) +{ + return a + b; +} diff --git a/gcc/testsuite/gcc.target/i386/pr90773-21.c b/gcc/testsuite/gcc.target/i386/pr90773-21.c index 5bbb387a3ea..0d620fff83c 100644 --- a/gcc/testsuite/gcc.target/i386/pr90773-21.c +++ b/gcc/testsuite/gcc.target/i386/pr90773-21.c @@ -10,4 +10,4 @@ foo (int c) } /* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */ -/* { dg-final { scan-assembler-times "movw\[\\t \]%.*, 32\\(%\[\^,\]+\\)" 1 } } */ +/* { dg-final { scan-assembler-times "(?:movw|pextrw)\[\\t \].*, 32\\(%\[\^,\]+\\)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr90773-23.c b/gcc/testsuite/gcc.target/i386/pr90773-23.c index ca4a86f30b8..b7369e802e1 100644 --- a/gcc/testsuite/gcc.target/i386/pr90773-23.c +++ b/gcc/testsuite/gcc.target/i386/pr90773-23.c @@ -10,4 +10,4 @@ foo (void) } /* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */ -/* { dg-final { scan-assembler-times "movw\[\\t \]+.+, 32\\(%\[\^,\]+\\)" 1 } } */ +/* { dg-final { scan-assembler-times "(?:movw|pextrw)\[\\t \]+.+, 32\\(%\[\^,\]+\\)" 1 } } */