diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index d85b175..0d1fc34 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -679,7 +679,7 @@ BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_ BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF) -BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF) +BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_movss_v4sf, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF) BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF) BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF) BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF) @@ -781,7 +781,7 @@ BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86 BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF) -BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF) +BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_movsd_v2df, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF) BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF) BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF) diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index a45640f..e144b5e 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -1774,9 +1774,9 @@ ix86_split_convert_uns_si_sse (rtx operands[]) input = gen_rtx_REG (vecmode, REGNO (input)); emit_move_insn (value, CONST0_RTX (vecmode)); if (vecmode == V4SFmode) - emit_insn (gen_sse_movss (value, value, input)); + emit_insn (gen_sse_movss_v4sf (value, value, input)); else - emit_insn (gen_sse2_movsd (value, value, input)); + emit_insn (gen_sse2_movsd_v2df (value, value, input)); } emit_move_insn (large, two31); @@ -18903,8 +18903,10 @@ expand_vec_perm_movs (struct expand_vec_perm_d *d) return false; if (!(TARGET_SSE && vmode == V4SFmode) + && !(TARGET_SSE && vmode == V4SImode) && !(TARGET_MMX_WITH_SSE && vmode == V2SFmode) - && !(TARGET_SSE2 && vmode == V2DFmode)) + && !(TARGET_SSE2 && vmode == V2DFmode) + && !(TARGET_SSE2 && vmode == V2DImode)) return false; /* Only the first element is changed. */ diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index de632b2..d50627a 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -6825,7 +6825,7 @@ if (!MEM_P (operands[3])) operands[3] = force_reg (V8HFmode, operands[3]); op1 = lowpart_subreg (V4SFmode, operands[3], V8HFmode); - emit_insn (gen_sse_movss (dest, op1, op0)); + emit_insn (gen_sse_movss_v4sf (dest, op1, op0)); emit_move_insn (operands[0], lowpart_subreg (V8HFmode, dest, V4SFmode)); DONE; }) @@ -6855,7 +6855,7 @@ if (!MEM_P (operands[3])) operands[3] = force_reg (V8HFmode, operands[3]); op1 = lowpart_subreg (V4SFmode, operands[3], V8HFmode); - emit_insn (gen_sse_movss (dest, op1, op0)); + emit_insn (gen_sse_movss_v4sf (dest, op1, op0)); emit_move_insn (operands[0], lowpart_subreg (V8HFmode, dest, V4SFmode)); DONE; }) @@ -10498,11 +10498,11 @@ (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex") (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")]) -(define_insn "sse_movss" - [(set (match_operand:V4SF 0 "register_operand" "=x,v") - (vec_merge:V4SF - (match_operand:V4SF 2 "register_operand" " x,v") - (match_operand:V4SF 1 "register_operand" " 0,v") +(define_insn "sse_movss_" + [(set (match_operand:VI4F_128 0 "register_operand" "=x,v") + (vec_merge:VI4F_128 + (match_operand:VI4F_128 2 "register_operand" " x,v") + (match_operand:VI4F_128 1 "register_operand" " 0,v") (const_int 1)))] "TARGET_SSE" "@ @@ -13481,11 +13481,11 @@ [(set (match_dup 0) (match_dup 1))] "operands[0] = adjust_address (operands[0], DFmode, 0);") -(define_insn "sse2_movsd" - [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,x,v,m,x,x,v,o") - (vec_merge:V2DF - (match_operand:V2DF 2 "nonimmediate_operand" " x,v,m,m,v,0,0,v,0") - (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,0,v,0,x,o,o,v") +(define_insn "sse2_movsd_" + [(set (match_operand:VI8F_128 0 "nonimmediate_operand" "=x,v,x,v,m,x,x,v,o") + (vec_merge:VI8F_128 + (match_operand:VI8F_128 2 "nonimmediate_operand" " x,v,m,m,v,0,0,v,0") + (match_operand:VI8F_128 1 "nonimmediate_operand" " 0,v,0,v,0,x,o,o,v") (const_int 1)))] "TARGET_SSE2" "@ diff --git a/gcc/testsuite/gcc.target/i386/sse-movss-4.c b/gcc/testsuite/gcc.target/i386/sse-movss-4.c new file mode 100644 index 0000000..ec3019c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse-movss-4.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse" } */ + +typedef unsigned int v4si __attribute__((vector_size(16))); +typedef float v4sf __attribute__((vector_size(16))); + +v4si foo(v4si x,v4si y) { return (v4si){y[0],x[1],x[2],x[3]}; } +v4sf bar(v4sf x,v4sf y) { return (v4sf){y[0],x[1],x[2],x[3]}; } + +/* { dg-final { scan-assembler-times "\tv?movss\t" 2 } } */ +/* { dg-final { scan-assembler-not "movaps" } } */ +/* { dg-final { scan-assembler-not "shufps" } } */ +/* { dg-final { scan-assembler-not "vpblendw" } } */ diff --git a/gcc/testsuite/gcc.target/i386/sse2-movsd-3.c b/gcc/testsuite/gcc.target/i386/sse2-movsd-3.c new file mode 100644 index 0000000..fadbe2b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse2-movsd-3.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2" } */ + +typedef unsigned long long v2di __attribute__((vector_size(16))); +typedef double v2df __attribute__((vector_size(16))); + +v2di foo(v2di x,v2di y) { return (v2di){y[0],x[1]}; } +v2df bar(v2df x,v2df y) { return (v2df){y[0],x[1]}; } + +/* { dg-final { scan-assembler-times "\tv?movsd\t" 2 } } */ +/* { dg-final { scan-assembler-not "v?shufpd" } } */ +/* { dg-final { scan-assembler-not "movdqa" } } */ +/* { dg-final { scan-assembler-not "pshufd" } } */ +/* { dg-final { scan-assembler-not "v?punpckldq" } } */ +/* { dg-final { scan-assembler-not "v?movq" } } */