Index: gcc/config/i386/i386.c =================================================================== --- gcc/config/i386/i386.c (revision 211397) +++ gcc/config/i386/i386.c (working copy) @@ -29793,23 +29793,23 @@ static const struct builtin_description { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND }, { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF }, { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_movv8si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI }, + { OPTION_MASK_ISA_AVX, CODE_FOR_movv8sf, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_movv4df, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF }, { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI }, { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF }, { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST }, Index: gcc/config/i386/sse.md =================================================================== --- gcc/config/i386/sse.md (revision 211397) +++ gcc/config/i386/sse.md (working copy) @@ -66,21 +66,20 @@ UNSPEC_AESKEYGENASSIST ;; For PCLMUL support UNSPEC_PCLMUL ;; For AVX support UNSPEC_PCMP UNSPEC_VPERMIL UNSPEC_VPERMIL2 UNSPEC_VPERMIL2F128 - UNSPEC_CAST UNSPEC_VTESTP UNSPEC_VCVTPH2PS UNSPEC_VCVTPS2PH ;; For AVX2 support UNSPEC_VPERMVAR UNSPEC_VPERMTI UNSPEC_GATHER UNSPEC_VSIBADDR @@ -14816,40 +14815,20 @@ (define_expand "maskstore" [(set (match_operand:V48_AVX2 0 "memory_operand") (unspec:V48_AVX2 [(match_operand: 2 "register_operand") (match_operand:V48_AVX2 1 "register_operand") (match_dup 0)] UNSPEC_MASKMOV))] "TARGET_AVX") -(define_insn_and_split "avx__" - [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m") - (unspec:AVX256MODE2P - [(match_operand: 1 "nonimmediate_operand" "xm,x")] - UNSPEC_CAST))] - "TARGET_AVX" - "#" - "&& reload_completed" - [(const_int 0)] -{ - rtx op0 = operands[0]; - rtx op1 = operands[1]; - if (REG_P (op0)) - op0 = gen_rtx_REG (mode, REGNO (op0)); - else - op1 = gen_rtx_REG (mode, REGNO (op1)); - emit_move_insn (op0, op1); - DONE; -}) - (define_expand "vec_init" [(match_operand:V_256 0 "register_operand") (match_operand 1)] "TARGET_AVX" { ix86_expand_vector_init (false, operands[0], operands[1]); DONE; }) (define_expand "vec_init" Index: gcc/emit-rtl.c =================================================================== --- gcc/emit-rtl.c (revision 211397) +++ gcc/emit-rtl.c (working copy) @@ -775,20 +775,23 @@ validate_subreg (enum machine_mode omode else if ((COMPLEX_MODE_P (imode) || VECTOR_MODE_P (imode)) && GET_MODE_INNER (imode) == omode) ; /* ??? x86 sse code makes heavy use of *paradoxical* vector subregs, i.e. (subreg:V4SF (reg:SF) 0). This surely isn't the cleanest way to represent this. It's questionable if this ought to be represented at all -- why can't this all be hidden in post-reload splitters that make arbitrarily mode changes to the registers themselves. */ else if (VECTOR_MODE_P (omode) && GET_MODE_INNER (omode) == imode) ; + else if (VECTOR_MODE_P (omode) && VECTOR_MODE_P (imode) + && GET_MODE_INNER (omode) == GET_MODE_INNER (imode)) + ; /* Subregs involving floating point modes are not allowed to change size. Therefore (subreg:DI (reg:DF) 0) is fine, but (subreg:SI (reg:DF) 0) isn't. */ else if (FLOAT_MODE_P (imode) || FLOAT_MODE_P (omode)) { if (! (isize == osize /* LRA can use subreg to store a floating point value in an integer mode. Although the floating point and the integer modes need the same number of hard registers, the size of floating point mode can be less than the Index: gcc/testsuite/gcc.target/i386/pr50829-1.c =================================================================== --- gcc/testsuite/gcc.target/i386/pr50829-1.c (revision 0) +++ gcc/testsuite/gcc.target/i386/pr50829-1.c (working copy) @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx" } */ + +#include + +__m256d +concat (__m128d x) +{ + __m256d z = _mm256_castpd128_pd256 (x); + return _mm256_insertf128_pd (z, x, 1); +} + +/* { dg-final { scan-assembler-not "vmov" } } */ Index: gcc/testsuite/gcc.target/i386/pr50829-2.c =================================================================== --- gcc/testsuite/gcc.target/i386/pr50829-2.c (revision 0) +++ gcc/testsuite/gcc.target/i386/pr50829-2.c (working copy) @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O0 -mavx" } */ + +#include + +__m256d +concat (__m128d x) +{ + __m256d z = _mm256_castpd128_pd256 (x); + return _mm256_insertf128_pd (z, x, 1); +}