diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index ef342fcee9b..af72b6c48a9 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -50,6 +50,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define TARGET_16BIT_P(x) TARGET_CODE16_P(x) #define TARGET_MMX_WITH_SSE (TARGET_64BIT && TARGET_SSE2) +#define TARGET_MMXFP_WITH_SSE (TARGET_MMX_WITH_SSE && ix86_mmxfp_with_sse) #include "config/vxworks-dummy.h" diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index 1cc8563477a..1b65fed5daf 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -670,6 +670,10 @@ m3dnowa Target Mask(ISA_3DNOW_A) Var(ix86_isa_flags) Save Support Athlon 3Dnow! built-in functions. +mmmxfp-with-sse +Target Var(ix86_mmxfp_with_sse) Init(1) +Enable MMX floating point vectors in SSE registers + msse Target Mask(ISA_SSE) Var(ix86_isa_flags) Save Support MMX and SSE built-in functions and code generation. diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 896af76a33f..0555da9022b 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -597,7 +597,18 @@ (define_expand "movq__to_sse" (match_operand:V2FI 1 "nonimmediate_operand") (match_dup 2)))] "TARGET_SSE2" - "operands[2] = CONST0_RTX (mode);") +{ + if (mode == V2SFmode + && !flag_trapping_math) + { + rtx op1 = force_reg (mode, operands[1]); + emit_move_insn (operands[0], lowpart_subreg (mode, + op1, mode)); + DONE; + } + + operands[2] = CONST0_RTX (mode); +}) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; @@ -650,7 +661,7 @@ (define_expand "v2sf3" (plusminusmult:V2SF (match_operand:V2SF 1 "nonimmediate_operand") (match_operand:V2SF 2 "nonimmediate_operand")))] - "TARGET_MMX_WITH_SSE" + "TARGET_MMXFP_WITH_SSE" { rtx op2 = gen_reg_rtx (V4SFmode); rtx op1 = gen_reg_rtx (V4SFmode); @@ -728,7 +739,7 @@ (define_expand "divv2sf3" [(set (match_operand:V2SF 0 "register_operand") (div:V2SF (match_operand:V2SF 1 "register_operand") (match_operand:V2SF 2 "register_operand")))] - "TARGET_MMX_WITH_SSE" + "TARGET_MMXFP_WITH_SSE" { rtx op2 = gen_reg_rtx (V4SFmode); rtx op1 = gen_reg_rtx (V4SFmode); @@ -750,7 +761,7 @@ (define_expand "v2sf3" (smaxmin:V2SF (match_operand:V2SF 1 "register_operand") (match_operand:V2SF 2 "register_operand")))] - "TARGET_MMX_WITH_SSE" + "TARGET_MMXFP_WITH_SSE" { rtx op2 = gen_reg_rtx (V4SFmode); rtx op1 = gen_reg_rtx (V4SFmode); @@ -852,7 +863,7 @@ (define_insn "mmx_rcpit2v2sf3" (define_expand "sqrtv2sf2" [(set (match_operand:V2SF 0 "register_operand") (sqrt:V2SF (match_operand:V2SF 1 "nonimmediate_operand")))] - "TARGET_MMX_WITH_SSE" + "TARGET_MMXFP_WITH_SSE" { rtx op1 = gen_reg_rtx (V4SFmode); rtx op0 = gen_reg_rtx (V4SFmode); @@ -933,7 +944,7 @@ (define_insn_and_split "*mmx_haddv2sf3_low" (vec_select:SF (match_dup 1) (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))] - "TARGET_SSE3 && TARGET_MMX_WITH_SSE + "TARGET_SSE3 && TARGET_MMXFP_WITH_SSE && INTVAL (operands[2]) != INTVAL (operands[3]) && ix86_pre_reload_split ()" "#" @@ -979,7 +990,7 @@ (define_insn_and_split "*mmx_hsubv2sf3_low" (vec_select:SF (match_dup 1) (parallel [(const_int 1)]))))] - "TARGET_SSE3 && TARGET_MMX_WITH_SSE + "TARGET_SSE3 && TARGET_MMXFP_WITH_SSE && ix86_pre_reload_split ()" "#" "&& 1" @@ -1041,7 +1052,7 @@ (define_expand "vec_addsubv2sf3" (match_operand:V2SF 2 "nonimmediate_operand")) (plus:V2SF (match_dup 1) (match_dup 2)) (const_int 1)))] - "TARGET_SSE3 && TARGET_MMX_WITH_SSE" + "TARGET_SSE3 && TARGET_MMXFP_WITH_SSE" { rtx op2 = gen_reg_rtx (V4SFmode); rtx op1 = gen_reg_rtx (V4SFmode); @@ -1104,7 +1115,7 @@ (define_expand "vec_cmpv2sfv2si" (match_operator:V2SI 1 "" [(match_operand:V2SF 2 "nonimmediate_operand") (match_operand:V2SF 3 "nonimmediate_operand")]))] - "TARGET_MMX_WITH_SSE" + "TARGET_MMXFP_WITH_SSE" { rtx ops[4]; ops[3] = gen_reg_rtx (V4SFmode); @@ -1130,7 +1141,7 @@ (define_expand "vcondv2sf" (match_operand:V2SF 5 "nonimmediate_operand")]) (match_operand:V2FI 1 "general_operand") (match_operand:V2FI 2 "general_operand")))] - "TARGET_MMX_WITH_SSE" + "TARGET_MMXFP_WITH_SSE" { rtx ops[6]; ops[5] = gen_reg_rtx (V4SFmode); @@ -1320,7 +1331,7 @@ (define_expand "fmav2sf4" (match_operand:V2SF 2 "nonimmediate_operand") (match_operand:V2SF 3 "nonimmediate_operand")))] "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL) - && TARGET_MMX_WITH_SSE" + && TARGET_MMXFP_WITH_SSE" { rtx op3 = gen_reg_rtx (V4SFmode); rtx op2 = gen_reg_rtx (V4SFmode); @@ -1345,7 +1356,7 @@ (define_expand "fmsv2sf4" (neg:V2SF (match_operand:V2SF 3 "nonimmediate_operand"))))] "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL) - && TARGET_MMX_WITH_SSE" + && TARGET_MMXFP_WITH_SSE" { rtx op3 = gen_reg_rtx (V4SFmode); rtx op2 = gen_reg_rtx (V4SFmode); @@ -1370,7 +1381,7 @@ (define_expand "fnmav2sf4" (match_operand:V2SF 2 "nonimmediate_operand") (match_operand:V2SF 3 "nonimmediate_operand")))] "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL) - && TARGET_MMX_WITH_SSE" + && TARGET_MMXFP_WITH_SSE" { rtx op3 = gen_reg_rtx (V4SFmode); rtx op2 = gen_reg_rtx (V4SFmode); @@ -1396,7 +1407,7 @@ (define_expand "fnmsv2sf4" (neg:V2SF (match_operand:V2SF 3 "nonimmediate_operand"))))] "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL) - && TARGET_MMX_WITH_SSE" + && TARGET_MMXFP_WITH_SSE" { rtx op3 = gen_reg_rtx (V4SFmode); rtx op2 = gen_reg_rtx (V4SFmode); @@ -1422,7 +1433,7 @@ (define_expand "fnmsv2sf4" (define_expand "fix_truncv2sfv2si2" [(set (match_operand:V2SI 0 "register_operand") (fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand")))] - "TARGET_MMX_WITH_SSE" + "TARGET_MMXFP_WITH_SSE" { rtx op1 = gen_reg_rtx (V4SFmode); rtx op0 = gen_reg_rtx (V4SImode); @@ -1438,7 +1449,7 @@ (define_expand "fix_truncv2sfv2si2" (define_expand "fixuns_truncv2sfv2si2" [(set (match_operand:V2SI 0 "register_operand") (unsigned_fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand")))] - "TARGET_AVX512VL && TARGET_MMX_WITH_SSE" + "TARGET_AVX512VL && TARGET_MMXFP_WITH_SSE" { rtx op1 = gen_reg_rtx (V4SFmode); rtx op0 = gen_reg_rtx (V4SImode); @@ -1463,7 +1474,7 @@ (define_insn "mmx_fix_truncv2sfv2si2" (define_expand "floatv2siv2sf2" [(set (match_operand:V2SF 0 "register_operand") (float:V2SF (match_operand:V2SI 1 "nonimmediate_operand")))] - "TARGET_MMX_WITH_SSE" + "TARGET_MMXFP_WITH_SSE" { rtx op1 = gen_reg_rtx (V4SImode); rtx op0 = gen_reg_rtx (V4SFmode); @@ -1479,7 +1490,7 @@ (define_expand "floatv2siv2sf2" (define_expand "floatunsv2siv2sf2" [(set (match_operand:V2SF 0 "register_operand") (unsigned_float:V2SF (match_operand:V2SI 1 "nonimmediate_operand")))] - "TARGET_AVX512VL && TARGET_MMX_WITH_SSE" + "TARGET_AVX512VL && TARGET_MMXFP_WITH_SSE" { rtx op1 = gen_reg_rtx (V4SImode); rtx op0 = gen_reg_rtx (V4SFmode); @@ -1756,7 +1767,7 @@ (define_expand "vec_initv2sfsf" (define_expand "nearbyintv2sf2" [(match_operand:V2SF 0 "register_operand") (match_operand:V2SF 1 "nonimmediate_operand")] - "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE" + "TARGET_SSE4_1 && TARGET_MMXFP_WITH_SSE" { rtx op1 = gen_reg_rtx (V4SFmode); rtx op0 = gen_reg_rtx (V4SFmode); @@ -1772,7 +1783,7 @@ (define_expand "nearbyintv2sf2" (define_expand "rintv2sf2" [(match_operand:V2SF 0 "register_operand") (match_operand:V2SF 1 "nonimmediate_operand")] - "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE" + "TARGET_SSE4_1 && TARGET_MMXFP_WITH_SSE" { rtx op1 = gen_reg_rtx (V4SFmode); rtx op0 = gen_reg_rtx (V4SFmode); @@ -1788,8 +1799,8 @@ (define_expand "rintv2sf2" (define_expand "lrintv2sfv2si2" [(match_operand:V2SI 0 "register_operand") (match_operand:V2SF 1 "nonimmediate_operand")] - "TARGET_SSE4_1 && !flag_trapping_math - && TARGET_MMX_WITH_SSE" + "TARGET_SSE4_1 && !flag_trapping_math + && TARGET_MMXFP_WITH_SSE" { rtx op1 = gen_reg_rtx (V4SFmode); rtx op0 = gen_reg_rtx (V4SImode); @@ -1806,7 +1817,7 @@ (define_expand "ceilv2sf2" [(match_operand:V2SF 0 "register_operand") (match_operand:V2SF 1 "nonimmediate_operand")] "TARGET_SSE4_1 && !flag_trapping_math - && TARGET_MMX_WITH_SSE" + && TARGET_MMXFP_WITH_SSE" { rtx op1 = gen_reg_rtx (V4SFmode); rtx op0 = gen_reg_rtx (V4SFmode); @@ -1822,8 +1833,8 @@ (define_expand "ceilv2sf2" (define_expand "lceilv2sfv2si2" [(match_operand:V2SI 0 "register_operand") (match_operand:V2SF 1 "nonimmediate_operand")] - "TARGET_SSE4_1 && !flag_trapping_math - && TARGET_MMX_WITH_SSE" + "TARGET_SSE4_1 && !flag_trapping_math + && TARGET_MMXFP_WITH_SSE" { rtx op1 = gen_reg_rtx (V4SFmode); rtx op0 = gen_reg_rtx (V4SImode); @@ -1840,7 +1851,7 @@ (define_expand "floorv2sf2" [(match_operand:V2SF 0 "register_operand") (match_operand:V2SF 1 "nonimmediate_operand")] "TARGET_SSE4_1 && !flag_trapping_math - && TARGET_MMX_WITH_SSE" + && TARGET_MMXFP_WITH_SSE" { rtx op1 = gen_reg_rtx (V4SFmode); rtx op0 = gen_reg_rtx (V4SFmode); @@ -1856,8 +1867,8 @@ (define_expand "floorv2sf2" (define_expand "lfloorv2sfv2si2" [(match_operand:V2SI 0 "register_operand") (match_operand:V2SF 1 "nonimmediate_operand")] - "TARGET_SSE4_1 && !flag_trapping_math - && TARGET_MMX_WITH_SSE" + "TARGET_SSE4_1 && !flag_trapping_math + && TARGET_MMXFP_WITH_SSE" { rtx op1 = gen_reg_rtx (V4SFmode); rtx op0 = gen_reg_rtx (V4SImode); @@ -1874,7 +1885,7 @@ (define_expand "btruncv2sf2" [(match_operand:V2SF 0 "register_operand") (match_operand:V2SF 1 "nonimmediate_operand")] "TARGET_SSE4_1 && !flag_trapping_math - && TARGET_MMX_WITH_SSE" + && TARGET_MMXFP_WITH_SSE" { rtx op1 = gen_reg_rtx (V4SFmode); rtx op0 = gen_reg_rtx (V4SFmode); @@ -1891,7 +1902,7 @@ (define_expand "roundv2sf2" [(match_operand:V2SF 0 "register_operand") (match_operand:V2SF 1 "nonimmediate_operand")] "TARGET_SSE4_1 && !flag_trapping_math - && TARGET_MMX_WITH_SSE" + && TARGET_MMXFP_WITH_SSE" { rtx op1 = gen_reg_rtx (V4SFmode); rtx op0 = gen_reg_rtx (V4SFmode); @@ -1907,8 +1918,8 @@ (define_expand "roundv2sf2" (define_expand "lroundv2sfv2si2" [(match_operand:V2SI 0 "register_operand") (match_operand:V2SF 1 "nonimmediate_operand")] - "TARGET_SSE4_1 && !flag_trapping_math - && TARGET_MMX_WITH_SSE" + "TARGET_SSE4_1 && !flag_trapping_math + && TARGET_MMXFP_WITH_SSE" { rtx op1 = gen_reg_rtx (V4SFmode); rtx op0 = gen_reg_rtx (V4SImode);