From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1363) id 8D2FF385840C; Tue, 14 Dec 2021 17:28:30 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 8D2FF385840C MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Content-Type: text/plain; charset="utf-8" From: Uros Bizjak To: gcc-cvs@gcc.gnu.org Subject: [gcc r12-5966] i386: Implement VxHF vector set/insert/extract with lower ABI levels X-Act-Checkin: gcc X-Git-Author: Uros Bizjak X-Git-Refname: refs/heads/master X-Git-Oldrev: 2cf62ef5aa80e3659a8150a48d93a1d333f1d292 X-Git-Newrev: 7a54d3deecf967029f18aa5ed1fcbdb752e213b9 Message-Id: <20211214172830.8D2FF385840C@sourceware.org> Date: Tue, 14 Dec 2021 17:28:30 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 14 Dec 2021 17:28:30 -0000 https://gcc.gnu.org/g:7a54d3deecf967029f18aa5ed1fcbdb752e213b9 commit r12-5966-g7a54d3deecf967029f18aa5ed1fcbdb752e213b9 Author: Uros Bizjak Date: Tue Dec 14 18:27:22 2021 +0100 i386: Implement VxHF vector set/insert/extract with lower ABI levels This is a preparation patch that moves VxHF vector set/insert/extract expansions from AVX512FP16 ABI to lower ABIs. There are no functional changes for -mavx512fp16 and a follow-up patch is needed to actually enable VxHF vector modes for lower ABIs. 2021-12-14 Uroš Bizjak gcc/ChangeLog: PR target/103571 * config/i386/i386-expand.c (ix86_expand_vector_init_duplicate) : Implement for TARGET_SSE2. : Implement for TARGET_AVX. : Implement for TARGET_AVX512F. (ix86_expand_vector_set_var): Handle V32HFmode without TARGET_AVX512BW. (ix86_expand_vector_extract) : Implement for TARGET_SSE2. : Implement for TARGET_AVX. : Implement for TARGET_AVX512BW. (expand_vec_perm_broadcast_1) : New. * config/i386/sse.md (VI12HF_AVX512VL): Remove TARGET_AVX512FP16 condition. (V): Ditto. (V_256_512): Ditto. (avx_vbroadcastf128_): Use V_256H mode iterator. Diff: --- gcc/config/i386/i386-expand.c | 118 +++++++++++++++++++++++++++++------------- gcc/config/i386/sse.md | 19 +++---- 2 files changed, 91 insertions(+), 46 deletions(-) diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index 2bbb28e5317..7013c20a97a 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -14855,6 +14855,7 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode, goto widen; case E_V8HImode: + case E_V8HFmode: if (TARGET_AVX2) return ix86_vector_duplicate_value (mode, target, val); @@ -14871,15 +14872,22 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode, dperm.op0 = dperm.op1 = gen_reg_rtx (mode); dperm.one_operand_p = true; - /* Extend to SImode using a paradoxical SUBREG. */ - tmp1 = gen_reg_rtx (SImode); - emit_move_insn (tmp1, gen_lowpart (SImode, val)); - - /* Insert the SImode value as low element of a V4SImode vector. */ - tmp2 = gen_reg_rtx (V4SImode); - emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1)); - emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2)); + if (mode == V8HFmode) + tmp1 = lowpart_subreg (V8HFmode, force_reg (HFmode, val), HFmode); + else + { + /* Extend to SImode using a paradoxical SUBREG. */ + tmp1 = gen_reg_rtx (SImode); + emit_move_insn (tmp1, gen_lowpart (SImode, val)); + + /* Insert the SImode value as + low element of a V4SImode vector. */ + tmp2 = gen_reg_rtx (V4SImode); + emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1)); + tmp1 = gen_lowpart (mode, tmp2); + } + emit_move_insn (dperm.op0, tmp1); ok = (expand_vec_perm_1 (&dperm) || expand_vec_perm_broadcast_1 (&dperm)); gcc_assert (ok); @@ -14926,12 +14934,15 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode, } case E_V16HImode: + case E_V16HFmode: case E_V32QImode: if (TARGET_AVX2) return ix86_vector_duplicate_value (mode, target, val); else { - machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode); + machine_mode hvmode = (mode == V16HImode ? V8HImode + : mode == V16HFmode ? V8HFmode + : V16QImode); rtx x = gen_reg_rtx (hvmode); ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val); @@ -14942,13 +14953,16 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode, } return true; - case E_V64QImode: case E_V32HImode: + case E_V32HFmode: + case E_V64QImode: if (TARGET_AVX512BW) return ix86_vector_duplicate_value (mode, target, val); else { - machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode); + machine_mode hvmode = (mode == V32HImode ? V16HImode + : mode == V32HFmode ? V16HFmode + : V32QImode); rtx x = gen_reg_rtx (hvmode); ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val); @@ -14959,11 +14973,6 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode, } return true; - case E_V8HFmode: - case E_V16HFmode: - case E_V32HFmode: - return ix86_vector_duplicate_value (mode, target, val); - default: return false; } @@ -15912,7 +15921,8 @@ ix86_expand_vector_set_var (rtx target, rtx val, rtx idx) /* 512-bits vector byte/word broadcast and comparison only available under TARGET_AVX512BW, break 512-bits vector into two 256-bits vector when without TARGET_AVX512BW. */ - if ((mode == V32HImode || mode == V64QImode) && !TARGET_AVX512BW) + if ((mode == V32HImode || mode == V32HFmode || mode == V64QImode) + && !TARGET_AVX512BW) { gcc_assert (TARGET_AVX512F); rtx vhi, vlo, idx_hi; @@ -15926,6 +15936,12 @@ ix86_expand_vector_set_var (rtx target, rtx val, rtx idx) extract_hi = gen_vec_extract_hi_v32hi; extract_lo = gen_vec_extract_lo_v32hi; } + else if (mode == V32HFmode) + { + half_mode = V16HFmode; + extract_hi = gen_vec_extract_hi_v32hf; + extract_lo = gen_vec_extract_lo_v32hf; + } else { half_mode = V32QImode; @@ -15973,7 +15989,6 @@ ix86_expand_vector_set_var (rtx target, rtx val, rtx idx) case E_V16SFmode: cmp_mode = V16SImode; break; - /* TARGET_AVX512FP16 implies TARGET_AVX512BW. */ case E_V8HFmode: cmp_mode = V8HImode; break; @@ -16538,6 +16553,7 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt) break; case E_V8HImode: + case E_V8HFmode: case E_V2HImode: use_vec_extr = TARGET_SSE2; break; @@ -16704,25 +16720,29 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt) return; case E_V32HFmode: - tmp = gen_reg_rtx (V16HFmode); - if (elt < 16) - emit_insn (gen_vec_extract_lo_v32hf (tmp, vec)); - else - emit_insn (gen_vec_extract_hi_v32hf (tmp, vec)); - ix86_expand_vector_extract (false, target, tmp, elt & 15); - return; + if (TARGET_AVX512BW) + { + tmp = gen_reg_rtx (V16HFmode); + if (elt < 16) + emit_insn (gen_vec_extract_lo_v32hf (tmp, vec)); + else + emit_insn (gen_vec_extract_hi_v32hf (tmp, vec)); + ix86_expand_vector_extract (false, target, tmp, elt & 15); + return; + } + break; case E_V16HFmode: - tmp = gen_reg_rtx (V8HFmode); - if (elt < 8) - emit_insn (gen_vec_extract_lo_v16hf (tmp, vec)); - else - emit_insn (gen_vec_extract_hi_v16hf (tmp, vec)); - ix86_expand_vector_extract (false, target, tmp, elt & 7); - return; - - case E_V8HFmode: - use_vec_extr = true; + if (TARGET_AVX) + { + tmp = gen_reg_rtx (V8HFmode); + if (elt < 8) + emit_insn (gen_vec_extract_lo_v16hf (tmp, vec)); + else + emit_insn (gen_vec_extract_hi_v16hf (tmp, vec)); + ix86_expand_vector_extract (false, target, tmp, elt & 7); + return; + } break; case E_V8QImode: @@ -21443,6 +21463,34 @@ expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d) emit_move_insn (d->target, gen_lowpart (d->vmode, dest)); return true; + case E_V8HFmode: + /* This can be implemented via interleave and pshufd. */ + if (d->testing_p) + return true; + + if (elt >= nelt2) + { + gen = gen_vec_interleave_highv8hf; + elt -= nelt2; + } + else + gen = gen_vec_interleave_lowv8hf; + nelt2 /= 2; + + dest = gen_reg_rtx (vmode); + emit_insn (gen (dest, op0, op0)); + + vmode = V4SImode; + op0 = gen_lowpart (vmode, dest); + + memset (perm2, elt, 4); + dest = gen_reg_rtx (vmode); + ok = expand_vselect (dest, op0, perm2, 4, d->testing_p); + gcc_assert (ok); + + emit_move_insn (d->target, gen_lowpart (d->vmode, dest)); + return true; + case E_V32QImode: case E_V16HImode: case E_V8SImode: diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 5421fb51684..929eef54055 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -266,9 +266,7 @@ (define_mode_iterator VI12HF_AVX512VL [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL") V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL") - (V32HF "TARGET_AVX512FP16") - (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL") - (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")]) + V32HF (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")]) ;; Same iterator, but without supposed TARGET_AVX512BW (define_mode_iterator VI12_AVX512VLBW @@ -285,8 +283,7 @@ (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI - (V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16") - (V8HF "TARGET_AVX512FP16") + (V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") V8HF (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) @@ -311,10 +308,10 @@ ;; All 256bit and 512bit vector modes (define_mode_iterator V_256_512 - [V32QI V16HI V8SI V4DI V8SF V4DF - (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F") - (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F") - (V16HF "TARGET_AVX512FP16") (V32HF "TARGET_AVX512FP16")]) + [V32QI V16HI V16HF V8SI V4DI V8SF V4DF + (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V32HF "TARGET_AVX512F") + (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") + (V8DF "TARGET_AVX512F")]) ;; All vector float modes (define_mode_iterator VF @@ -24892,8 +24889,8 @@ "operands[2] = gen_lowpart (mode, operands[0]);") (define_insn "avx_vbroadcastf128_" - [(set (match_operand:V_256 0 "register_operand" "=x,x,x,v,v,v,v") - (vec_concat:V_256 + [(set (match_operand:V_256H 0 "register_operand" "=x,x,x,v,v,v,v") + (vec_concat:V_256H (match_operand: 1 "nonimmediate_operand" "m,0,?x,m,0,m,0") (match_dup 1)))] "TARGET_AVX"