diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index 58c208e166b..65764ad88c5 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -5355,6 +5355,12 @@ ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p) else unpack = gen_sse4_1_sign_extendv2hiv2si2; break; + case E_V4QImode: + if (unsigned_p) + unpack = gen_sse4_1_zero_extendv2qiv2hi2; + else + unpack = gen_sse4_1_sign_extendv2qiv2hi2; + break; default: gcc_unreachable (); } @@ -5380,6 +5386,12 @@ ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p) emit_insn (gen_mmx_lshrv1di3 (tmp, gen_lowpart (V1DImode, src), GEN_INT (32))); break; + case 4: + /* Shift higher 2 bytes to lower 2 bytes. */ + tmp = gen_reg_rtx (V1SImode); + emit_insn (gen_mmx_lshrv1si3 (tmp, gen_lowpart (V1SImode, src), + GEN_INT (16))); + break; default: gcc_unreachable (); } @@ -5427,6 +5439,12 @@ ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p) else unpack = gen_mmx_punpcklwd; break; + case E_V4QImode: + if (high_p) + unpack = gen_mmx_punpckhbw_low; + else + unpack = gen_mmx_punpcklbw_low; + break; default: gcc_unreachable (); } diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 03d176143fe..8c3eace56da 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -1016,7 +1016,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); #define VALID_SSE2_REG_MODE(MODE) \ ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode \ - || (MODE) == V4QImode || (MODE) == V2HImode \ + || (MODE) == V4QImode || (MODE) == V2HImode || (MODE) == V1SImode \ || (MODE) == V2DImode || (MODE) == DFmode) #define VALID_SSE_REG_MODE(MODE) \ @@ -1048,7 +1048,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); || (MODE) == SImode || (MODE) == DImode \ || (MODE) == CQImode || (MODE) == CHImode \ || (MODE) == CSImode || (MODE) == CDImode \ - || (MODE) == V4QImode || (MODE) == V2HImode \ + || (MODE) == V4QImode || (MODE) == V2HImode || (MODE) == V1SImode \ || (TARGET_64BIT \ && ((MODE) == TImode || (MODE) == CTImode \ || (MODE) == TFmode || (MODE) == TCmode \ diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 7e83b64ab59..986b758396a 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -57,10 +57,13 @@ (define_mode_iterator MMXMODE14 [V8QI V2SI]) (define_mode_iterator MMXMODE24 [V4HI V2SI]) (define_mode_iterator MMXMODE248 [V4HI V2SI V1DI]) -;; All 32bit integer vector modes +;; All 4-byte integer vector modes +(define_mode_iterator V_32 [V4QI V2HI V1SI]) + +;; 4-byte integer vector modes (define_mode_iterator VI_32 [V4QI V2HI]) -;; All V2S* modes +;; V2S* modes (define_mode_iterator V2FI [V2SF V2SI]) ;; Mapping from integer vector mode to mnemonic suffix @@ -238,8 +241,8 @@ (define_expand "movmisalign" }) (define_expand "mov" - [(set (match_operand:VI_32 0 "nonimmediate_operand") - (match_operand:VI_32 1 "nonimmediate_operand"))] + [(set (match_operand:V_32 0 "nonimmediate_operand") + (match_operand:V_32 1 "nonimmediate_operand"))] "TARGET_SSE2" { ix86_expand_vector_move (mode, operands); @@ -247,9 +250,9 @@ (define_expand "mov" }) (define_insn "*mov_internal" - [(set (match_operand:VI_32 0 "nonimmediate_operand" + [(set (match_operand:V_32 0 "nonimmediate_operand" "=r ,m ,v,v,v,m,r,v") - (match_operand:VI_32 1 "general_operand" + (match_operand:V_32 1 "general_operand" "rmC,rC,C,v,m,v,v,r"))] "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" @@ -304,8 +307,8 @@ (define_insn "*mov_internal" ;; For TARGET_64BIT we always round up to 8 bytes. (define_insn "*push2_rex64" - [(set (match_operand:VI_32 0 "push_operand" "=X,X") - (match_operand:VI_32 1 "nonmemory_no_elim_operand" "rC,*v"))] + [(set (match_operand:V_32 0 "push_operand" "=X,X") + (match_operand:V_32 1 "nonmemory_no_elim_operand" "rC,*v"))] "TARGET_SSE2 && TARGET_64BIT" "@ push{q}\t%q1 @@ -314,8 +317,8 @@ (define_insn "*push2_rex64" (set_attr "mode" "DI")]) (define_insn "*push2" - [(set (match_operand:VI_32 0 "push_operand" "=<,<") - (match_operand:VI_32 1 "general_no_elim_operand" "rC*m,*v"))] + [(set (match_operand:V_32 0 "push_operand" "=<,<") + (match_operand:V_32 1 "general_no_elim_operand" "rC*m,*v"))] "TARGET_SSE2 && !TARGET_64BIT" "@ push{l}\t%1 @@ -324,20 +327,20 @@ (define_insn "*push2" (set_attr "mode" "SI")]) (define_split - [(set (match_operand:VI_32 0 "push_operand") - (match_operand:VI_32 1 "sse_reg_operand"))] + [(set (match_operand:V_32 0 "push_operand") + (match_operand:V_32 1 "sse_reg_operand"))] "TARGET_SSE2 && reload_completed" [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2))) (set (match_dup 0) (match_dup 1))] { - operands[2] = GEN_INT (-PUSH_ROUNDING (GET_MODE_SIZE (mode))); + operands[2] = GEN_INT (-PUSH_ROUNDING (GET_MODE_SIZE (mode))); /* Preserve memory attributes. */ operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx); }) (define_expand "movmisalign" - [(set (match_operand:VI_32 0 "nonimmediate_operand") - (match_operand:VI_32 1 "nonimmediate_operand"))] + [(set (match_operand:V_32 0 "nonimmediate_operand") + (match_operand:V_32 1 "nonimmediate_operand"))] "TARGET_SSE2" { ix86_expand_vector_move (mode, operands); @@ -2006,6 +2009,23 @@ (define_expand "3" (match_operand:DI 2 "nonmemory_operand")))] "TARGET_MMX_WITH_SSE") +(define_insn "mmx_v1si3" + [(set (match_operand:V1SI 0 "register_operand" "=x,Yw") + (any_lshift:V1SI + (match_operand:V1SI 1 "register_operand" "0,Yw") + (match_operand:DI 2 "nonmemory_operand" "xN,YwN")))] + "TARGET_SSE2" + "@ + pd\t{%2, %0|%0, %2} + vpd\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseishft") + (set (attr "length_immediate") + (if_then_else (match_operand 2 "const_int_operand") + (const_string "1") + (const_string "0"))) + (set_attr "mode" "TI")]) + (define_insn "v2hi3" [(set (match_operand:V2HI 0 "register_operand" "=x,Yw") (any_shift:V2HI @@ -2732,6 +2752,20 @@ (define_insn "sse4_1_v2hiv2si2" (set_attr "prefix" "orig,orig,maybe_evex") (set_attr "mode" "TI")]) +(define_insn "sse4_1_v2qiv2hi2" + [(set (match_operand:V2HI 0 "register_operand" "=Yr,*x,Yw") + (any_extend:V2HI + (vec_select:V2QI + (match_operand:V4QI 1 "register_operand" "Yr,*x,Yw") + (parallel [(const_int 0) (const_int 1)]))))] + "TARGET_SSE4_1" + "%vpmovbw\t{%1, %0|%0, %1}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "orig,orig,maybe_evex") + (set_attr "mode" "TI")]) + ;; Pack/unpack vector modes (define_mode_attr mmxpackmode [(V4HI "V8QI") (V2SI "V4HI")]) @@ -2748,6 +2782,18 @@ (define_expand "vec_pack_trunc_" DONE; }) +(define_expand "vec_pack_trunc_v2hi" + [(match_operand:V4QI 0 "register_operand") + (match_operand:V2HI 1 "register_operand") + (match_operand:V2HI 2 "register_operand")] + "TARGET_SSE2" +{ + rtx op1 = gen_lowpart (V4QImode, operands[1]); + rtx op2 = gen_lowpart (V4QImode, operands[2]); + ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0); + DONE; +}) + (define_mode_attr mmxunpackmode [(V8QI "V4HI") (V4HI "V2SI")]) @@ -2775,6 +2821,30 @@ (define_expand "vec_unpacku_hi_" "TARGET_MMX_WITH_SSE" "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;") +(define_expand "vec_unpacks_lo_v4qi" + [(match_operand:V2HI 0 "register_operand") + (match_operand:V4QI 1 "register_operand")] + "TARGET_SSE2" + "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;") + +(define_expand "vec_unpacks_hi_v4qi" + [(match_operand:V2HI 0 "register_operand") + (match_operand:V4QI 1 "register_operand")] + "TARGET_SSE2" + "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;") + +(define_expand "vec_unpacku_lo_v4qi" + [(match_operand:V2HI 0 "register_operand") + (match_operand:V4QI 1 "register_operand")] + "TARGET_SSE2" + "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;") + +(define_expand "vec_unpacku_hi_v4qi" + [(match_operand:V2HI 0 "register_operand") + (match_operand:V4QI 1 "register_operand")] + "TARGET_SSE2" + "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;") + (define_insn "*mmx_pinsrd" [(set (match_operand:V2SI 0 "register_operand" "=x,Yv") (vec_merge:V2SI