Index: sse.md =================================================================== --- sse.md (revision 172365) +++ sse.md (working copy) @@ -7015,41 +7015,16 @@ "TARGET_SSE2" "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);") -(define_insn "*avx_uavgv16qi3" - [(set (match_operand:V16QI 0 "register_operand" "=x") - (truncate:V16QI - (lshiftrt:V16HI - (plus:V16HI - (plus:V16HI - (zero_extend:V16HI - (match_operand:V16QI 1 "nonimmediate_operand" "%x")) - (zero_extend:V16HI - (match_operand:V16QI 2 "nonimmediate_operand" "xm"))) - (const_vector:V16QI [(const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1)])) - (const_int 1))))] - "TARGET_AVX && ix86_binary_operator_ok (PLUS, V16QImode, operands)" - "vpavgb\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sseiadd") - (set_attr "prefix" "vex") - (set_attr "mode" "TI")]) - (define_insn "*sse2_uavgv16qi3" - [(set (match_operand:V16QI 0 "register_operand" "=x") + [(set (match_operand:V16QI 0 "register_operand" "=x,x") (truncate:V16QI (lshiftrt:V16HI (plus:V16HI (plus:V16HI (zero_extend:V16HI - (match_operand:V16QI 1 "nonimmediate_operand" "%0")) + (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")) (zero_extend:V16HI - (match_operand:V16QI 2 "nonimmediate_operand" "xm"))) + (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))) (const_vector:V16QI [(const_int 1) (const_int 1) (const_int 1) (const_int 1) (const_int 1) (const_int 1) @@ -7060,9 +7035,13 @@ (const_int 1) (const_int 1)])) (const_int 1))))] "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)" - "pavgb\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "prefix_data16" "1") + "@ + pavgb\t{%2, %0|%0, %2} + vpavgb\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseiadd") + (set_attr "prefix_data16" "1,*") + (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) (define_expand "sse2_uavgv8hi3" @@ -7083,71 +7062,47 @@ "TARGET_SSE2" "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);") -(define_insn "*avx_uavgv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (truncate:V8HI - (lshiftrt:V8SI - (plus:V8SI - (plus:V8SI - (zero_extend:V8SI - (match_operand:V8HI 1 "nonimmediate_operand" "%x")) - (zero_extend:V8SI - (match_operand:V8HI 2 "nonimmediate_operand" "xm"))) - (const_vector:V8HI [(const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1)])) - (const_int 1))))] - "TARGET_AVX && ix86_binary_operator_ok (PLUS, V8HImode, operands)" - "vpavgw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sseiadd") - (set_attr "prefix" "vex") - (set_attr "mode" "TI")]) - (define_insn "*sse2_uavgv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x") + [(set (match_operand:V8HI 0 "register_operand" "=x,x") (truncate:V8HI (lshiftrt:V8SI (plus:V8SI (plus:V8SI (zero_extend:V8SI - (match_operand:V8HI 1 "nonimmediate_operand" "%0")) + (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")) (zero_extend:V8SI - (match_operand:V8HI 2 "nonimmediate_operand" "xm"))) + (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))) (const_vector:V8HI [(const_int 1) (const_int 1) (const_int 1) (const_int 1) (const_int 1) (const_int 1) (const_int 1) (const_int 1)])) (const_int 1))))] "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)" - "pavgw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "prefix_data16" "1") + "@ + pavgw\t{%2, %0|%0, %2} + vpavgw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseiadd") + (set_attr "prefix_data16" "1,*") + (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) ;; The correct representation for this is absolutely enormous, and ;; surely not generally useful. -(define_insn "*avx_psadbw" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "x") - (match_operand:V16QI 2 "nonimmediate_operand" "xm")] - UNSPEC_PSADBW))] - "TARGET_AVX" - "vpsadbw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sseiadd") - (set_attr "prefix" "vex") - (set_attr "mode" "TI")]) - (define_insn "sse2_psadbw" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0") - (match_operand:V16QI 2 "nonimmediate_operand" "xm")] + [(set (match_operand:V2DI 0 "register_operand" "=x,x") + (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0,x") + (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")] UNSPEC_PSADBW))] "TARGET_SSE2" - "psadbw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") + "@ + psadbw\t{%2, %0|%0, %2} + vpsadbw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseiadd") (set_attr "atom_unit" "simul") - (set_attr "prefix_data16" "1") + (set_attr "prefix_data16" "1,*") + (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) (define_insn "avx_movmsk256" @@ -7348,58 +7303,14 @@ ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(define_insn "*avx_phaddwv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (vec_concat:V8HI - (vec_concat:V4HI - (vec_concat:V2HI - (plus:HI - (vec_select:HI - (match_operand:V8HI 1 "register_operand" "x") - (parallel [(const_int 0)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) - (plus:HI - (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) - (vec_concat:V2HI - (plus:HI - (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) - (plus:HI - (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) - (vec_concat:V4HI - (vec_concat:V2HI - (plus:HI - (vec_select:HI - (match_operand:V8HI 2 "nonimmediate_operand" "xm") - (parallel [(const_int 0)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) - (plus:HI - (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) - (vec_concat:V2HI - (plus:HI - (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) - (plus:HI - (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] - "TARGET_AVX" - "vphaddw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sseiadd") - (set_attr "prefix_extra" "1") - (set_attr "prefix" "vex") - (set_attr "mode" "TI")]) - (define_insn "ssse3_phaddwv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x") + [(set (match_operand:V8HI 0 "register_operand" "=x,x") (vec_concat:V8HI (vec_concat:V4HI (vec_concat:V2HI (plus:HI (vec_select:HI - (match_operand:V8HI 1 "register_operand" "0") + (match_operand:V8HI 1 "register_operand" "0,x") (parallel [(const_int 0)])) (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) (plus:HI @@ -7416,7 +7327,7 @@ (vec_concat:V2HI (plus:HI (vec_select:HI - (match_operand:V8HI 2 "nonimmediate_operand" "xm") + (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm") (parallel [(const_int 0)])) (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) (plus:HI @@ -7430,11 +7341,15 @@ (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] "TARGET_SSSE3" - "phaddw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") + "@ + phaddw\t{%2, %0|%0, %2} + vphaddw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseiadd") (set_attr "atom_unit" "complex") - (set_attr "prefix_data16" "1") + (set_attr "prefix_data16" "1,*") (set_attr "prefix_extra" "1") + (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) (define_insn "ssse3_phaddwv4hi3" @@ -7466,41 +7381,13 @@ (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) (set_attr "mode" "DI")]) -(define_insn "*avx_phadddv4si3" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (vec_concat:V4SI - (vec_concat:V2SI - (plus:SI - (vec_select:SI - (match_operand:V4SI 1 "register_operand" "x") - (parallel [(const_int 0)])) - (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) - (plus:SI - (vec_select:SI (match_dup 1) (parallel [(const_int 2)])) - (vec_select:SI (match_dup 1) (parallel [(const_int 3)])))) - (vec_concat:V2SI - (plus:SI - (vec_select:SI - (match_operand:V4SI 2 "nonimmediate_operand" "xm") - (parallel [(const_int 0)])) - (vec_select:SI (match_dup 2) (parallel [(const_int 1)]))) - (plus:SI - (vec_select:SI (match_dup 2) (parallel [(const_int 2)])) - (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))] - "TARGET_AVX" - "vphaddd\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sseiadd") - (set_attr "prefix_extra" "1") - (set_attr "prefix" "vex") - (set_attr "mode" "TI")]) - (define_insn "ssse3_phadddv4si3" - [(set (match_operand:V4SI 0 "register_operand" "=x") + [(set (match_operand:V4SI 0 "register_operand" "=x,x") (vec_concat:V4SI (vec_concat:V2SI (plus:SI (vec_select:SI - (match_operand:V4SI 1 "register_operand" "0") + (match_operand:V4SI 1 "register_operand" "0,x") (parallel [(const_int 0)])) (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) (plus:SI @@ -7509,18 +7396,22 @@ (vec_concat:V2SI (plus:SI (vec_select:SI - (match_operand:V4SI 2 "nonimmediate_operand" "xm") + (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm") (parallel [(const_int 0)])) (vec_select:SI (match_dup 2) (parallel [(const_int 1)]))) (plus:SI (vec_select:SI (match_dup 2) (parallel [(const_int 2)])) (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))] "TARGET_SSSE3" - "phaddd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") + "@ + phaddd\t{%2, %0|%0, %2} + vphaddd\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseiadd") (set_attr "atom_unit" "complex") - (set_attr "prefix_data16" "1") + (set_attr "prefix_data16" "1,*") (set_attr "prefix_extra" "1") + (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) (define_insn "ssse3_phadddv2si3" @@ -7544,58 +7435,14 @@ (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) (set_attr "mode" "DI")]) -(define_insn "*avx_phaddswv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (vec_concat:V8HI - (vec_concat:V4HI - (vec_concat:V2HI - (ss_plus:HI - (vec_select:HI - (match_operand:V8HI 1 "register_operand" "x") - (parallel [(const_int 0)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) - (ss_plus:HI - (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) - (vec_concat:V2HI - (ss_plus:HI - (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) - (ss_plus:HI - (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) - (vec_concat:V4HI - (vec_concat:V2HI - (ss_plus:HI - (vec_select:HI - (match_operand:V8HI 2 "nonimmediate_operand" "xm") - (parallel [(const_int 0)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) - (ss_plus:HI - (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) - (vec_concat:V2HI - (ss_plus:HI - (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) - (ss_plus:HI - (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] - "TARGET_AVX" - "vphaddsw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sseiadd") - (set_attr "prefix_extra" "1") - (set_attr "prefix" "vex") - (set_attr "mode" "TI")]) - (define_insn "ssse3_phaddswv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x") + [(set (match_operand:V8HI 0 "register_operand" "=x,x") (vec_concat:V8HI (vec_concat:V4HI (vec_concat:V2HI (ss_plus:HI (vec_select:HI - (match_operand:V8HI 1 "register_operand" "0") + (match_operand:V8HI 1 "register_operand" "0,x") (parallel [(const_int 0)])) (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) (ss_plus:HI @@ -7612,7 +7459,7 @@ (vec_concat:V2HI (ss_plus:HI (vec_select:HI - (match_operand:V8HI 2 "nonimmediate_operand" "xm") + (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm") (parallel [(const_int 0)])) (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) (ss_plus:HI @@ -7626,11 +7473,15 @@ (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] "TARGET_SSSE3" - "phaddsw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") + "@ + phaddsw\t{%2, %0|%0, %2} + vphaddsw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseiadd") (set_attr "atom_unit" "complex") - (set_attr "prefix_data16" "1") + (set_attr "prefix_data16" "1,*") (set_attr "prefix_extra" "1") + (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) (define_insn "ssse3_phaddswv4hi3" @@ -7662,58 +7513,14 @@ (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) (set_attr "mode" "DI")]) -(define_insn "*avx_phsubwv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (vec_concat:V8HI - (vec_concat:V4HI - (vec_concat:V2HI - (minus:HI - (vec_select:HI - (match_operand:V8HI 1 "register_operand" "x") - (parallel [(const_int 0)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) - (minus:HI - (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) - (vec_concat:V2HI - (minus:HI - (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) - (minus:HI - (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) - (vec_concat:V4HI - (vec_concat:V2HI - (minus:HI - (vec_select:HI - (match_operand:V8HI 2 "nonimmediate_operand" "xm") - (parallel [(const_int 0)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) - (minus:HI - (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) - (vec_concat:V2HI - (minus:HI - (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) - (minus:HI - (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] - "TARGET_AVX" - "vphsubw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sseiadd") - (set_attr "prefix_extra" "1") - (set_attr "prefix" "vex") - (set_attr "mode" "TI")]) - (define_insn "ssse3_phsubwv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x") + [(set (match_operand:V8HI 0 "register_operand" "=x,x") (vec_concat:V8HI (vec_concat:V4HI (vec_concat:V2HI (minus:HI (vec_select:HI - (match_operand:V8HI 1 "register_operand" "0") + (match_operand:V8HI 1 "register_operand" "0,x") (parallel [(const_int 0)])) (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) (minus:HI @@ -7730,7 +7537,7 @@ (vec_concat:V2HI (minus:HI (vec_select:HI - (match_operand:V8HI 2 "nonimmediate_operand" "xm") + (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm") (parallel [(const_int 0)])) (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) (minus:HI @@ -7744,11 +7551,15 @@ (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] "TARGET_SSSE3" - "phsubw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") + "@ + phsubw\t{%2, %0|%0, %2} + vphsubw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseiadd") (set_attr "atom_unit" "complex") - (set_attr "prefix_data16" "1") + (set_attr "prefix_data16" "1,*") (set_attr "prefix_extra" "1") + (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) (define_insn "ssse3_phsubwv4hi3" @@ -7780,41 +7591,13 @@ (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) (set_attr "mode" "DI")]) -(define_insn "*avx_phsubdv4si3" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (vec_concat:V4SI - (vec_concat:V2SI - (minus:SI - (vec_select:SI - (match_operand:V4SI 1 "register_operand" "x") - (parallel [(const_int 0)])) - (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) - (minus:SI - (vec_select:SI (match_dup 1) (parallel [(const_int 2)])) - (vec_select:SI (match_dup 1) (parallel [(const_int 3)])))) - (vec_concat:V2SI - (minus:SI - (vec_select:SI - (match_operand:V4SI 2 "nonimmediate_operand" "xm") - (parallel [(const_int 0)])) - (vec_select:SI (match_dup 2) (parallel [(const_int 1)]))) - (minus:SI - (vec_select:SI (match_dup 2) (parallel [(const_int 2)])) - (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))] - "TARGET_AVX" - "vphsubd\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sseiadd") - (set_attr "prefix_extra" "1") - (set_attr "prefix" "vex") - (set_attr "mode" "TI")]) - (define_insn "ssse3_phsubdv4si3" - [(set (match_operand:V4SI 0 "register_operand" "=x") + [(set (match_operand:V4SI 0 "register_operand" "=x,x") (vec_concat:V4SI (vec_concat:V2SI (minus:SI (vec_select:SI - (match_operand:V4SI 1 "register_operand" "0") + (match_operand:V4SI 1 "register_operand" "0,x") (parallel [(const_int 0)])) (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) (minus:SI @@ -7823,18 +7606,23 @@ (vec_concat:V2SI (minus:SI (vec_select:SI - (match_operand:V4SI 2 "nonimmediate_operand" "xm") + (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm") (parallel [(const_int 0)])) (vec_select:SI (match_dup 2) (parallel [(const_int 1)]))) (minus:SI (vec_select:SI (match_dup 2) (parallel [(const_int 2)])) (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))] "TARGET_SSSE3" - "phsubd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") + "@ + phsubd\t{%2, %0|%0, %2} + vphsubd\t{%2, %1, %0|%0, %1, %2}" + + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseiadd") (set_attr "atom_unit" "complex") - (set_attr "prefix_data16" "1") + (set_attr "prefix_data16" "1,*") (set_attr "prefix_extra" "1") + (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) (define_insn "ssse3_phsubdv2si3" @@ -7858,58 +7646,14 @@ (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) (set_attr "mode" "DI")]) -(define_insn "*avx_phsubswv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (vec_concat:V8HI - (vec_concat:V4HI - (vec_concat:V2HI - (ss_minus:HI - (vec_select:HI - (match_operand:V8HI 1 "register_operand" "x") - (parallel [(const_int 0)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) - (ss_minus:HI - (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) - (vec_concat:V2HI - (ss_minus:HI - (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) - (ss_minus:HI - (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) - (vec_concat:V4HI - (vec_concat:V2HI - (ss_minus:HI - (vec_select:HI - (match_operand:V8HI 2 "nonimmediate_operand" "xm") - (parallel [(const_int 0)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) - (ss_minus:HI - (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) - (vec_concat:V2HI - (ss_minus:HI - (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) - (ss_minus:HI - (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] - "TARGET_AVX" - "vphsubsw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sseiadd") - (set_attr "prefix_extra" "1") - (set_attr "prefix" "vex") - (set_attr "mode" "TI")]) - (define_insn "ssse3_phsubswv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x") + [(set (match_operand:V8HI 0 "register_operand" "=x,x") (vec_concat:V8HI (vec_concat:V4HI (vec_concat:V2HI (ss_minus:HI (vec_select:HI - (match_operand:V8HI 1 "register_operand" "0") + (match_operand:V8HI 1 "register_operand" "0,x") (parallel [(const_int 0)])) (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) (ss_minus:HI @@ -7926,7 +7670,7 @@ (vec_concat:V2HI (ss_minus:HI (vec_select:HI - (match_operand:V8HI 2 "nonimmediate_operand" "xm") + (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm") (parallel [(const_int 0)])) (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) (ss_minus:HI @@ -7940,11 +7684,15 @@ (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] "TARGET_SSSE3" - "phsubsw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") + "@ + phsubsw\t{%2, %0|%0, %2} + vphsubsw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseiadd") (set_attr "atom_unit" "complex") - (set_attr "prefix_data16" "1") + (set_attr "prefix_data16" "1,*") (set_attr "prefix_extra" "1") + (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) (define_insn "ssse3_phsubswv4hi3" @@ -7976,67 +7724,13 @@ (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) (set_attr "mode" "DI")]) -(define_insn "*avx_pmaddubsw128" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (ss_plus:V8HI - (mult:V8HI - (zero_extend:V8HI - (vec_select:V8QI - (match_operand:V16QI 1 "register_operand" "x") - (parallel [(const_int 0) - (const_int 2) - (const_int 4) - (const_int 6) - (const_int 8) - (const_int 10) - (const_int 12) - (const_int 14)]))) - (sign_extend:V8HI - (vec_select:V8QI - (match_operand:V16QI 2 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 2) - (const_int 4) - (const_int 6) - (const_int 8) - (const_int 10) - (const_int 12) - (const_int 14)])))) - (mult:V8HI - (zero_extend:V8HI - (vec_select:V8QI (match_dup 1) - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7) - (const_int 9) - (const_int 11) - (const_int 13) - (const_int 15)]))) - (sign_extend:V8HI - (vec_select:V8QI (match_dup 2) - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7) - (const_int 9) - (const_int 11) - (const_int 13) - (const_int 15)]))))))] - "TARGET_AVX" - "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sseiadd") - (set_attr "prefix_extra" "1") - (set_attr "prefix" "vex") - (set_attr "mode" "TI")]) - (define_insn "ssse3_pmaddubsw128" - [(set (match_operand:V8HI 0 "register_operand" "=x") + [(set (match_operand:V8HI 0 "register_operand" "=x,x") (ss_plus:V8HI (mult:V8HI (zero_extend:V8HI (vec_select:V8QI - (match_operand:V16QI 1 "register_operand" "0") + (match_operand:V16QI 1 "register_operand" "0,x") (parallel [(const_int 0) (const_int 2) (const_int 4) @@ -8047,7 +7741,7 @@ (const_int 14)]))) (sign_extend:V8HI (vec_select:V8QI - (match_operand:V16QI 2 "nonimmediate_operand" "xm") + (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm") (parallel [(const_int 0) (const_int 2) (const_int 4) @@ -8078,11 +7772,15 @@ (const_int 13) (const_int 15)]))))))] "TARGET_SSSE3" - "pmaddubsw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") + "@ + pmaddubsw\t{%2, %0|%0, %2} + vpmaddubsw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseiadd") (set_attr "atom_unit" "simul") - (set_attr "prefix_data16" "1") + (set_attr "prefix_data16" "1,*") (set_attr "prefix_extra" "1") + (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) (define_insn "ssse3_pmaddubsw" @@ -8144,41 +7842,17 @@ "TARGET_SSSE3" "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);") -(define_insn "*avx_pmulhrswv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (truncate:V8HI - (lshiftrt:V8SI - (plus:V8SI - (lshiftrt:V8SI - (mult:V8SI - (sign_extend:V8SI - (match_operand:V8HI 1 "nonimmediate_operand" "%x")) - (sign_extend:V8SI - (match_operand:V8HI 2 "nonimmediate_operand" "xm"))) - (const_int 14)) - (const_vector:V8HI [(const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1)])) - (const_int 1))))] - "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)" - "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sseimul") - (set_attr "prefix_extra" "1") - (set_attr "prefix" "vex") - (set_attr "mode" "TI")]) - (define_insn "*ssse3_pmulhrswv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x") + [(set (match_operand:V8HI 0 "register_operand" "=x,x") (truncate:V8HI (lshiftrt:V8SI (plus:V8SI (lshiftrt:V8SI (mult:V8SI (sign_extend:V8SI - (match_operand:V8HI 1 "nonimmediate_operand" "%0")) + (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")) (sign_extend:V8SI - (match_operand:V8HI 2 "nonimmediate_operand" "xm"))) + (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))) (const_int 14)) (const_vector:V8HI [(const_int 1) (const_int 1) (const_int 1) (const_int 1) @@ -8186,10 +7860,14 @@ (const_int 1) (const_int 1)])) (const_int 1))))] "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)" - "pmulhrsw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseimul") - (set_attr "prefix_data16" "1") + "@ + pmulhrsw\t{%2, %0|%0, %2} + vpmulhrsw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseimul") + (set_attr "prefix_data16" "1,*") (set_attr "prefix_extra" "1") + (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) (define_expand "ssse3_pmulhrswv4hi3" @@ -8232,28 +7910,20 @@ (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) (set_attr "mode" "DI")]) -(define_insn "*avx_pshufbv16qi3" - [(set (match_operand:V16QI 0 "register_operand" "=x") - (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x") - (match_operand:V16QI 2 "nonimmediate_operand" "xm")] - UNSPEC_PSHUFB))] - "TARGET_AVX" - "vpshufb\t{%2, %1, %0|%0, %1, %2}"; - [(set_attr "type" "sselog1") - (set_attr "prefix_extra" "1") - (set_attr "prefix" "vex") - (set_attr "mode" "TI")]) - (define_insn "ssse3_pshufbv16qi3" - [(set (match_operand:V16QI 0 "register_operand" "=x") - (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0") - (match_operand:V16QI 2 "nonimmediate_operand" "xm")] + [(set (match_operand:V16QI 0 "register_operand" "=x,x") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,x") + (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")] UNSPEC_PSHUFB))] "TARGET_SSSE3" - "pshufb\t{%2, %0|%0, %2}"; - [(set_attr "type" "sselog1") - (set_attr "prefix_data16" "1") + "@ + pshufb\t{%2, %0|%0, %2} + vpshufb\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sselog1") + (set_attr "prefix_data16" "1,*") (set_attr "prefix_extra" "1") + (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) (define_insn "ssse3_pshufbv8qi3" @@ -8268,30 +7938,21 @@ (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) (set_attr "mode" "DI")]) -(define_insn "*avx_psign3" - [(set (match_operand:SSEMODE124 0 "register_operand" "=x") - (unspec:SSEMODE124 - [(match_operand:SSEMODE124 1 "register_operand" "x") - (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")] - UNSPEC_PSIGN))] - "TARGET_AVX" - "vpsign\t{%2, %1, %0|%0, %1, %2}"; - [(set_attr "type" "sselog1") - (set_attr "prefix_extra" "1") - (set_attr "prefix" "vex") - (set_attr "mode" "TI")]) - (define_insn "ssse3_psign3" - [(set (match_operand:SSEMODE124 0 "register_operand" "=x") + [(set (match_operand:SSEMODE124 0 "register_operand" "=x,x") (unspec:SSEMODE124 - [(match_operand:SSEMODE124 1 "register_operand" "0") - (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")] + [(match_operand:SSEMODE124 1 "register_operand" "0,x") + (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm,xm")] UNSPEC_PSIGN))] "TARGET_SSSE3" - "psign\t{%2, %0|%0, %2}"; - [(set_attr "type" "sselog1") - (set_attr "prefix_data16" "1") + "@ + psign\t{%2, %0|%0, %2} + vpsign\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sselog1") + (set_attr "prefix_data16" "1,*") (set_attr "prefix_extra" "1") + (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) (define_insn "ssse3_psign3" @@ -8307,39 +7968,33 @@ (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) (set_attr "mode" "DI")]) -(define_insn "*avx_palignrti" - [(set (match_operand:TI 0 "register_operand" "=x") - (unspec:TI [(match_operand:TI 1 "register_operand" "x") - (match_operand:TI 2 "nonimmediate_operand" "xm") - (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")] - UNSPEC_PALIGNR))] - "TARGET_AVX" -{ - operands[3] = GEN_INT (INTVAL (operands[3]) / 8); - return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}"; -} - [(set_attr "type" "sseishft") - (set_attr "prefix_extra" "1") - (set_attr "length_immediate" "1") - (set_attr "prefix" "vex") - (set_attr "mode" "TI")]) - (define_insn "ssse3_palignrti" - [(set (match_operand:TI 0 "register_operand" "=x") - (unspec:TI [(match_operand:TI 1 "register_operand" "0") - (match_operand:TI 2 "nonimmediate_operand" "xm") - (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")] + [(set (match_operand:TI 0 "register_operand" "=x,x") + (unspec:TI [(match_operand:TI 1 "register_operand" "0,x") + (match_operand:TI 2 "nonimmediate_operand" "xm,xm") + (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")] UNSPEC_PALIGNR))] "TARGET_SSSE3" { operands[3] = GEN_INT (INTVAL (operands[3]) / 8); - return "palignr\t{%3, %2, %0|%0, %2, %3}"; + + switch (which_alternative) + { + case 0: + return "palignr\t{%3, %2, %0|%0, %2, %3}"; + case 1: + return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}"; + default: + gcc_unreachable (); + } } - [(set_attr "type" "sseishft") + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseishft") (set_attr "atom_unit" "sishuf") - (set_attr "prefix_data16" "1") + (set_attr "prefix_data16" "1,*") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") + (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) (define_insn "ssse3_palignrdi" @@ -8362,7 +8017,8 @@ (define_insn "abs2" [(set (match_operand:SSEMODE124 0 "register_operand" "=x") - (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))] + (abs:SSEMODE124 + (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))] "TARGET_SSSE3" "%vpabs\t{%1, %0|%0, %1}" [(set_attr "type" "sselog1") @@ -8373,7 +8029,8 @@ (define_insn "abs2" [(set (match_operand:MMXMODEI 0 "register_operand" "=y") - (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))] + (abs:MMXMODEI + (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))] "TARGET_SSSE3" "pabs\t{%1, %0|%0, %1}"; [(set_attr "type" "sselog1")