From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1363) id EAACB3858039; Thu, 8 Jul 2021 10:21:07 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org EAACB3858039 MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Content-Type: text/plain; charset="utf-8" From: Uros Bizjak To: gcc-cvs@gcc.gnu.org Subject: [gcc r12-2136] i386: Add pack/unpack patterns for 32bit vectors [PR100637] X-Act-Checkin: gcc X-Git-Author: Uros Bizjak X-Git-Refname: refs/heads/master X-Git-Oldrev: 852b11da11a181df517c0348df044354ff0656d6 X-Git-Newrev: 663a014e77709bfbd4145c605b178169eaf334fc Message-Id: <20210708102107.EAACB3858039@sourceware.org> Date: Thu, 8 Jul 2021 10:21:07 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 08 Jul 2021 10:21:08 -0000 https://gcc.gnu.org/g:663a014e77709bfbd4145c605b178169eaf334fc commit r12-2136-g663a014e77709bfbd4145c605b178169eaf334fc Author: Uros Bizjak Date: Thu Jul 8 12:19:54 2021 +0200 i386: Add pack/unpack patterns for 32bit vectors [PR100637] V1SI mode shift is needed to shift 32bit operands and consequently we need to implement V1SI moves and pushes. 2021-07-08 Uroš Bizjak gcc/ PR target/100637 * config/i386/i386-expand.c (ix86_expand_sse_unpack): Handle V4QI mode. * config/i386/mmx.md (V_32): New mode iterator. (mov): Use V_32 mode iterator. (*mov_internal): Ditto. (*push2_rex64): Ditto. (*push2): Ditto. (movmisalign): Ditto. (mmx_v1si3): New insn pattern. (sse4_1_v2qiv2hi2): Ditto. (vec_unpacks_lo_v4qi): New expander. (vec_unpacks_hi_v4qi): Ditto. (vec_unpacku_lo_v4qi): Ditto. (vec_unpacku_hi_v4qi): Ditto. * config/i386/i386.h (VALID_SSE2_REG_MODE): Add V1SImode. (VALID_INT_MODE_P): Ditto. Diff: --- gcc/config/i386/i386-expand.c | 18 ++++++++ gcc/config/i386/i386.h | 4 +- gcc/config/i386/mmx.md | 100 +++++++++++++++++++++++++++++++++++------- 3 files changed, 105 insertions(+), 17 deletions(-) diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index 58c208e166b..65764ad88c5 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -5355,6 +5355,12 @@ ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p) else unpack = gen_sse4_1_sign_extendv2hiv2si2; break; + case E_V4QImode: + if (unsigned_p) + unpack = gen_sse4_1_zero_extendv2qiv2hi2; + else + unpack = gen_sse4_1_sign_extendv2qiv2hi2; + break; default: gcc_unreachable (); } @@ -5380,6 +5386,12 @@ ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p) emit_insn (gen_mmx_lshrv1di3 (tmp, gen_lowpart (V1DImode, src), GEN_INT (32))); break; + case 4: + /* Shift higher 2 bytes to lower 2 bytes. */ + tmp = gen_reg_rtx (V1SImode); + emit_insn (gen_mmx_lshrv1si3 (tmp, gen_lowpart (V1SImode, src), + GEN_INT (16))); + break; default: gcc_unreachable (); } @@ -5427,6 +5439,12 @@ ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p) else unpack = gen_mmx_punpcklwd; break; + case E_V4QImode: + if (high_p) + unpack = gen_mmx_punpckhbw_low; + else + unpack = gen_mmx_punpcklbw_low; + break; default: gcc_unreachable (); } diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 03d176143fe..8c3eace56da 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -1016,7 +1016,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); #define VALID_SSE2_REG_MODE(MODE) \ ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode \ - || (MODE) == V4QImode || (MODE) == V2HImode \ + || (MODE) == V4QImode || (MODE) == V2HImode || (MODE) == V1SImode \ || (MODE) == V2DImode || (MODE) == DFmode) #define VALID_SSE_REG_MODE(MODE) \ @@ -1048,7 +1048,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); || (MODE) == SImode || (MODE) == DImode \ || (MODE) == CQImode || (MODE) == CHImode \ || (MODE) == CSImode || (MODE) == CDImode \ - || (MODE) == V4QImode || (MODE) == V2HImode \ + || (MODE) == V4QImode || (MODE) == V2HImode || (MODE) == V1SImode \ || (TARGET_64BIT \ && ((MODE) == TImode || (MODE) == CTImode \ || (MODE) == TFmode || (MODE) == TCmode \ diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 7e83b64ab59..986b758396a 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -57,10 +57,13 @@ (define_mode_iterator MMXMODE24 [V4HI V2SI]) (define_mode_iterator MMXMODE248 [V4HI V2SI V1DI]) -;; All 32bit integer vector modes +;; All 4-byte integer vector modes +(define_mode_iterator V_32 [V4QI V2HI V1SI]) + +;; 4-byte integer vector modes (define_mode_iterator VI_32 [V4QI V2HI]) -;; All V2S* modes +;; V2S* modes (define_mode_iterator V2FI [V2SF V2SI]) ;; Mapping from integer vector mode to mnemonic suffix @@ -238,8 +241,8 @@ }) (define_expand "mov" - [(set (match_operand:VI_32 0 "nonimmediate_operand") - (match_operand:VI_32 1 "nonimmediate_operand"))] + [(set (match_operand:V_32 0 "nonimmediate_operand") + (match_operand:V_32 1 "nonimmediate_operand"))] "TARGET_SSE2" { ix86_expand_vector_move (mode, operands); @@ -247,9 +250,9 @@ }) (define_insn "*mov_internal" - [(set (match_operand:VI_32 0 "nonimmediate_operand" + [(set (match_operand:V_32 0 "nonimmediate_operand" "=r ,m ,v,v,v,m,r,v") - (match_operand:VI_32 1 "general_operand" + (match_operand:V_32 1 "general_operand" "rmC,rC,C,v,m,v,v,r"))] "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" @@ -304,8 +307,8 @@ ;; For TARGET_64BIT we always round up to 8 bytes. (define_insn "*push2_rex64" - [(set (match_operand:VI_32 0 "push_operand" "=X,X") - (match_operand:VI_32 1 "nonmemory_no_elim_operand" "rC,*v"))] + [(set (match_operand:V_32 0 "push_operand" "=X,X") + (match_operand:V_32 1 "nonmemory_no_elim_operand" "rC,*v"))] "TARGET_SSE2 && TARGET_64BIT" "@ push{q}\t%q1 @@ -314,8 +317,8 @@ (set_attr "mode" "DI")]) (define_insn "*push2" - [(set (match_operand:VI_32 0 "push_operand" "=<,<") - (match_operand:VI_32 1 "general_no_elim_operand" "rC*m,*v"))] + [(set (match_operand:V_32 0 "push_operand" "=<,<") + (match_operand:V_32 1 "general_no_elim_operand" "rC*m,*v"))] "TARGET_SSE2 && !TARGET_64BIT" "@ push{l}\t%1 @@ -324,20 +327,20 @@ (set_attr "mode" "SI")]) (define_split - [(set (match_operand:VI_32 0 "push_operand") - (match_operand:VI_32 1 "sse_reg_operand"))] + [(set (match_operand:V_32 0 "push_operand") + (match_operand:V_32 1 "sse_reg_operand"))] "TARGET_SSE2 && reload_completed" [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2))) (set (match_dup 0) (match_dup 1))] { - operands[2] = GEN_INT (-PUSH_ROUNDING (GET_MODE_SIZE (mode))); + operands[2] = GEN_INT (-PUSH_ROUNDING (GET_MODE_SIZE (mode))); /* Preserve memory attributes. */ operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx); }) (define_expand "movmisalign" - [(set (match_operand:VI_32 0 "nonimmediate_operand") - (match_operand:VI_32 1 "nonimmediate_operand"))] + [(set (match_operand:V_32 0 "nonimmediate_operand") + (match_operand:V_32 1 "nonimmediate_operand"))] "TARGET_SSE2" { ix86_expand_vector_move (mode, operands); @@ -2006,6 +2009,23 @@ (match_operand:DI 2 "nonmemory_operand")))] "TARGET_MMX_WITH_SSE") +(define_insn "mmx_v1si3" + [(set (match_operand:V1SI 0 "register_operand" "=x,Yw") + (any_lshift:V1SI + (match_operand:V1SI 1 "register_operand" "0,Yw") + (match_operand:DI 2 "nonmemory_operand" "xN,YwN")))] + "TARGET_SSE2" + "@ + pd\t{%2, %0|%0, %2} + vpd\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseishft") + (set (attr "length_immediate") + (if_then_else (match_operand 2 "const_int_operand") + (const_string "1") + (const_string "0"))) + (set_attr "mode" "TI")]) + (define_insn "v2hi3" [(set (match_operand:V2HI 0 "register_operand" "=x,Yw") (any_shift:V2HI @@ -2732,6 +2752,20 @@ (set_attr "prefix" "orig,orig,maybe_evex") (set_attr "mode" "TI")]) +(define_insn "sse4_1_v2qiv2hi2" + [(set (match_operand:V2HI 0 "register_operand" "=Yr,*x,Yw") + (any_extend:V2HI + (vec_select:V2QI + (match_operand:V4QI 1 "register_operand" "Yr,*x,Yw") + (parallel [(const_int 0) (const_int 1)]))))] + "TARGET_SSE4_1" + "%vpmovbw\t{%1, %0|%0, %1}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "orig,orig,maybe_evex") + (set_attr "mode" "TI")]) + ;; Pack/unpack vector modes (define_mode_attr mmxpackmode [(V4HI "V8QI") (V2SI "V4HI")]) @@ -2748,6 +2782,18 @@ DONE; }) +(define_expand "vec_pack_trunc_v2hi" + [(match_operand:V4QI 0 "register_operand") + (match_operand:V2HI 1 "register_operand") + (match_operand:V2HI 2 "register_operand")] + "TARGET_SSE2" +{ + rtx op1 = gen_lowpart (V4QImode, operands[1]); + rtx op2 = gen_lowpart (V4QImode, operands[2]); + ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0); + DONE; +}) + (define_mode_attr mmxunpackmode [(V8QI "V4HI") (V4HI "V2SI")]) @@ -2775,6 +2821,30 @@ "TARGET_MMX_WITH_SSE" "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;") +(define_expand "vec_unpacks_lo_v4qi" + [(match_operand:V2HI 0 "register_operand") + (match_operand:V4QI 1 "register_operand")] + "TARGET_SSE2" + "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;") + +(define_expand "vec_unpacks_hi_v4qi" + [(match_operand:V2HI 0 "register_operand") + (match_operand:V4QI 1 "register_operand")] + "TARGET_SSE2" + "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;") + +(define_expand "vec_unpacku_lo_v4qi" + [(match_operand:V2HI 0 "register_operand") + (match_operand:V4QI 1 "register_operand")] + "TARGET_SSE2" + "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;") + +(define_expand "vec_unpacku_hi_v4qi" + [(match_operand:V2HI 0 "register_operand") + (match_operand:V4QI 1 "register_operand")] + "TARGET_SSE2" + "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;") + (define_insn "*mmx_pinsrd" [(set (match_operand:V2SI 0 "register_operand" "=x,Yv") (vec_merge:V2SI