From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1363) id B9547385741E; Wed, 16 Jun 2021 14:08:03 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org B9547385741E MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Content-Type: text/plain; charset="utf-8" From: Uros Bizjak To: gcc-cvs@gcc.gnu.org Subject: [gcc r12-1537] ii386: Add missing two element 64bit vector permutations [PR89021] X-Act-Checkin: gcc X-Git-Author: Uros Bizjak X-Git-Refname: refs/heads/master X-Git-Oldrev: c25e3bf87975280a603ff18fba387c6707ce4a95 X-Git-Newrev: dd835ec24be9b1a89c6b0c78673de88c81a23966 Message-Id: <20210616140803.B9547385741E@sourceware.org> Date: Wed, 16 Jun 2021 14:08:03 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 16 Jun 2021 14:08:03 -0000 https://gcc.gnu.org/g:dd835ec24be9b1a89c6b0c78673de88c81a23966 commit r12-1537-gdd835ec24be9b1a89c6b0c78673de88c81a23966 Author: Uros Bizjak Date: Wed Jun 16 16:07:01 2021 +0200 ii386: Add missing two element 64bit vector permutations [PR89021] In addition to V8QI permutations, several other missing permutations are added for 64bit vector modes for TARGET_SSSE3 and TARGET_SSE4_1 targets. 2021-06-16 Uroš Bizjak gcc/ PR target/89021 * config/i386/i386-expand.c (expand_vec_perm_2perm_pblendv): Handle 64bit modes for TARGET_SSE4_1. (expand_vec_perm_pshufb2): Handle 64bit modes for TARGET_SSSE3. (expand_vec_perm_even_odd_pack): Handle V4HI mode. (expand_vec_perm_even_odd_1) : Expand via expand_vec_perm_pshufb2 for TARGET_SSSE3 and via expand_vec_perm_even_odd_pack for TARGET_SSE4_1. * config/i386/mmx.md (mmx_packusdw): New insn pattern. Diff: --- gcc/config/i386/i386-expand.c | 91 +++++++++++++++++++++++++++++++------------ gcc/config/i386/mmx.md | 16 ++++++++ 2 files changed, 82 insertions(+), 25 deletions(-) diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index dee3df2e3a0..eb6f9b0684e 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -17633,8 +17633,10 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d) if (vmode == V8QImode) { + rtx m128 = GEN_INT (-128); + for (i = nelt; i < 16; ++i) - rperm[i] = constm1_rtx; + rperm[i] = m128; vpmode = V16QImode; } @@ -18972,7 +18974,8 @@ expand_vec_perm_2perm_pblendv (struct expand_vec_perm_d *d, bool two_insn) ; else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode)) ; - else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16) + else if (TARGET_SSE4_1 && (GET_MODE_SIZE (vmode) == 16 + || GET_MODE_SIZE (vmode) == 8)) ; else return false; @@ -19229,14 +19232,31 @@ expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d) { rtx rperm[2][16], vperm, l, h, op, m128; unsigned int i, nelt, eltsz; + machine_mode mode; + rtx (*gen) (rtx, rtx, rtx); - if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16) + if (!TARGET_SSSE3 || (GET_MODE_SIZE (d->vmode) != 16 + && GET_MODE_SIZE (d->vmode) != 8)) return false; gcc_assert (!d->one_operand_p); if (d->testing_p) return true; + switch (GET_MODE_SIZE (d->vmode)) + { + case 8: + mode = V8QImode; + gen = gen_mmx_pshufbv8qi3; + break; + case 16: + mode = V16QImode; + gen = gen_ssse3_pshufbv16qi3; + break; + default: + gcc_unreachable (); + } + nelt = d->nelt; eltsz = GET_MODE_UNIT_SIZE (d->vmode); @@ -19247,7 +19267,7 @@ expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d) m128 = GEN_INT (-128); for (i = 0; i < nelt; ++i) { - unsigned j, e = d->perm[i]; + unsigned j, k, e = d->perm[i]; unsigned which = (e >= nelt); if (e >= nelt) e -= nelt; @@ -19257,26 +19277,29 @@ expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d) rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j); rperm[1-which][i*eltsz + j] = m128; } + + for (k = i*eltsz + j; k < 16; ++k) + rperm[0][k] = rperm[1][k] = m128; } vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0])); vperm = force_reg (V16QImode, vperm); - l = gen_reg_rtx (V16QImode); - op = gen_lowpart (V16QImode, d->op0); - emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm)); + l = gen_reg_rtx (mode); + op = gen_lowpart (mode, d->op0); + emit_insn (gen (l, op, vperm)); vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1])); vperm = force_reg (V16QImode, vperm); - h = gen_reg_rtx (V16QImode); - op = gen_lowpart (V16QImode, d->op1); - emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm)); + h = gen_reg_rtx (mode); + op = gen_lowpart (mode, d->op1); + emit_insn (gen (h, op, vperm)); op = d->target; - if (d->vmode != V16QImode) - op = gen_reg_rtx (V16QImode); - emit_insn (gen_iorv16qi3 (op, l, h)); + if (d->vmode != mode) + op = gen_reg_rtx (mode); + emit_insn (gen_rtx_SET (op, gen_rtx_IOR (mode, l, h))); if (op != d->target) emit_move_insn (d->target, gen_lowpart (d->vmode, op)); @@ -19455,6 +19478,17 @@ expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d) switch (d->vmode) { + case E_V4HImode: + /* Required for "pack". */ + if (!TARGET_SSE4_1) + return false; + c = 0xffff; + s = 16; + half_mode = V2SImode; + gen_and = gen_andv2si3; + gen_pack = gen_mmx_packusdw; + gen_shift = gen_lshrv2si3; + break; case E_V8HImode: /* Required for "pack". */ if (!TARGET_SSE4_1) @@ -19507,7 +19541,7 @@ expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d) end_perm = true; break; default: - /* Only V8QI, V8HI, V16QI, V16HI and V32QI modes + /* Only V4HI, V8QI, V8HI, V16QI, V16HI and V32QI modes are more profitable than general shuffles. */ return false; } @@ -19698,18 +19732,25 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd) break; case E_V4HImode: - if (d->testing_p) - break; - /* We need 2*log2(N)-1 operations to achieve odd/even - with interleave. */ - t1 = gen_reg_rtx (V4HImode); - emit_insn (gen_mmx_punpckhwd (t1, d->op0, d->op1)); - emit_insn (gen_mmx_punpcklwd (d->target, d->op0, d->op1)); - if (odd) - t2 = gen_mmx_punpckhwd (d->target, d->target, t1); + if (TARGET_SSE4_1) + return expand_vec_perm_even_odd_pack (d); + else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB) + return expand_vec_perm_pshufb2 (d); else - t2 = gen_mmx_punpcklwd (d->target, d->target, t1); - emit_insn (t2); + { + if (d->testing_p) + break; + /* We need 2*log2(N)-1 operations to achieve odd/even + with interleave. */ + t1 = gen_reg_rtx (V4HImode); + emit_insn (gen_mmx_punpckhwd (t1, d->op0, d->op1)); + emit_insn (gen_mmx_punpcklwd (d->target, d->op0, d->op1)); + if (odd) + t2 = gen_mmx_punpckhwd (d->target, d->target, t1); + else + t2 = gen_mmx_punpcklwd (d->target, d->target, t1); + emit_insn (t2); + } break; case E_V8HImode: diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 1a9e7b024dd..59a16f4cd50 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -2477,6 +2477,22 @@ (set_attr "type" "mmxshft,sselog,sselog") (set_attr "mode" "DI,TI,TI")]) +(define_insn_and_split "mmx_packusdw" + [(set (match_operand:V4HI 0 "register_operand" "=Yr,*x,Yw") + (vec_concat:V4HI + (us_truncate:V2HI + (match_operand:V2SI 1 "register_operand" "0,0,Yw")) + (us_truncate:V2HI + (match_operand:V2SI 2 "register_operand" "Yr,*x,Yw"))))] + "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_mmx_pack (operands, US_TRUNCATE); DONE;" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "sselog") + (set_attr "mode" "TI")]) + (define_insn_and_split "mmx_punpckhbw" [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw") (vec_select:V8QI