* [PATCH 15/42] i386: Emulate MMX sse_cvtpi2ps with SSE
2019-02-16 0:34 [PATCH 00/42] V7: Emulate MMX intrinsics with SSE H.J. Lu
@ 2019-02-16 0:34 ` H.J. Lu
2019-02-16 0:34 ` [PATCH 20/42] i386: Emulate MMX mmx_umulv4hi3_highpart " H.J. Lu
` (40 subsequent siblings)
41 siblings, 0 replies; 50+ messages in thread
From: H.J. Lu @ 2019-02-16 0:34 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX sse_cvtpi2ps with SSE2 cvtdq2ps, preserving upper 64 bits of
destination XMM register. Only SSE register source operand is allowed.
PR target/89021
* config/i386/sse.md (sse_cvtpi2ps): Changed to
define_insn_and_split. Also allow TARGET_MMX_WITH_SSE. Add
SSE emulation.
---
gcc/config/i386/sse.md | 64 ++++++++++++++++++++++++++++++++++++------
1 file changed, 56 insertions(+), 8 deletions(-)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 70e3669d115..06c9b5b58f1 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -4561,16 +4561,64 @@
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(define_insn "sse_cvtpi2ps"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
+(define_insn_and_split "sse_cvtpi2ps"
+ [(set (match_operand:V4SF 0 "register_operand" "=x,x,Yv")
(vec_merge:V4SF
(vec_duplicate:V4SF
- (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
- (match_operand:V4SF 1 "register_operand" "0")
- (const_int 3)))]
- "TARGET_SSE"
- "cvtpi2ps\t{%2, %0|%0, %2}"
- [(set_attr "type" "ssecvt")
+ (float:V2SF (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv")))
+ (match_operand:V4SF 1 "register_operand" "0,0,Yv")
+ (const_int 3)))
+ (clobber (match_scratch:V4SF 3 "=X,x,Yv"))]
+ "TARGET_SSE || TARGET_MMX_WITH_SSE"
+ "@
+ cvtpi2ps\t{%2, %0|%0, %2}
+ #
+ #"
+ "TARGET_MMX_WITH_SSE && reload_completed"
+ [(const_int 0)]
+{
+ rtx op2 = lowpart_subreg (V4SImode, operands[2],
+ GET_MODE (operands[2]));
+ /* Generate SSE2 cvtdq2ps. */
+ rtx insn = gen_floatv4siv4sf2 (operands[3], op2);
+ emit_insn (insn);
+
+ /* Merge operands[3] with operands[0]. */
+ rtx mask, op1;
+ if (TARGET_AVX)
+ {
+ mask = gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (4, GEN_INT (0), GEN_INT (1),
+ GEN_INT (6), GEN_INT (7)));
+ op1 = gen_rtx_VEC_CONCAT (V8SFmode, operands[3], operands[1]);
+ op2 = gen_rtx_VEC_SELECT (V4SFmode, op1, mask);
+ insn = gen_rtx_SET (operands[0], op2);
+ }
+ else
+ {
+ /* NB: SSE can only concatenate OP0 and OP3 to OP0. */
+ mask = gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (4, GEN_INT (2), GEN_INT (3),
+ GEN_INT (4), GEN_INT (5)));
+ op1 = gen_rtx_VEC_CONCAT (V8SFmode, operands[0], operands[3]);
+ op2 = gen_rtx_VEC_SELECT (V4SFmode, op1, mask);
+ insn = gen_rtx_SET (operands[0], op2);
+ emit_insn (insn);
+
+ /* Swap bits 0:63 with bits 64:127. */
+ mask = gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (4, GEN_INT (2), GEN_INT (3),
+ GEN_INT (0), GEN_INT (1)));
+ rtx dest = lowpart_subreg (V4SImode, operands[0],
+ GET_MODE (operands[0]));
+ op1 = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
+ insn = gen_rtx_SET (dest, op1);
+ }
+ emit_insn (insn);
+ DONE;
+}
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "ssecvt")
(set_attr "mode" "V4SF")])
(define_insn "sse_cvtps2pi"
--
2.20.1
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 20/42] i386: Emulate MMX mmx_umulv4hi3_highpart with SSE
2019-02-16 0:34 [PATCH 00/42] V7: Emulate MMX intrinsics with SSE H.J. Lu
2019-02-16 0:34 ` [PATCH 15/42] i386: Emulate MMX sse_cvtpi2ps " H.J. Lu
@ 2019-02-16 0:34 ` H.J. Lu
2019-02-16 0:34 ` [PATCH 13/42] i386: Emulate MMX pshufw " H.J. Lu
` (39 subsequent siblings)
41 siblings, 0 replies; 50+ messages in thread
From: H.J. Lu @ 2019-02-16 0:34 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX mmx_umulv4hi3_highpart with SSE. Only SSE register source
operand is allowed.
PR target/89021
* config/i386/mmx.md (mmx_umulv4hi3_highpart): Also check
TARGET_MMX and TARGET_MMX_WITH_SSE.
(*mmx_umulv4hi3_highpart): Add SSE emulation.
---
gcc/config/i386/mmx.md | 26 ++++++++++++++++----------
1 file changed, 16 insertions(+), 10 deletions(-)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 9c552f929f1..d78c6a31962 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -781,28 +781,34 @@
(lshiftrt:V4SI
(mult:V4SI
(zero_extend:V4SI
- (match_operand:V4HI 1 "nonimmediate_operand"))
+ (match_operand:V4HI 1 "register_mmxmem_operand"))
(zero_extend:V4SI
- (match_operand:V4HI 2 "nonimmediate_operand")))
+ (match_operand:V4HI 2 "register_mmxmem_operand")))
(const_int 16))))]
- "TARGET_SSE || TARGET_3DNOW_A"
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && (TARGET_SSE || TARGET_3DNOW_A)"
"ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
(define_insn "*mmx_umulv4hi3_highpart"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
+ [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(truncate:V4HI
(lshiftrt:V4SI
(mult:V4SI
(zero_extend:V4SI
- (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+ (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv"))
(zero_extend:V4SI
- (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+ (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))
(const_int 16))))]
- "(TARGET_SSE || TARGET_3DNOW_A)
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && (TARGET_SSE || TARGET_3DNOW_A)
&& ix86_binary_operator_ok (MULT, V4HImode, operands)"
- "pmulhuw\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxmul")
- (set_attr "mode" "DI")])
+ "@
+ pmulhuw\t{%2, %0|%0, %2}
+ pmulhuw\t{%2, %0|%0, %2}
+ vpmulhuw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxmul,ssemul,ssemul")
+ (set_attr "mode" "DI,TI,TI")])
(define_expand "mmx_pmaddwd"
[(set (match_operand:V2SI 0 "register_operand")
--
2.20.1
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 13/42] i386: Emulate MMX pshufw with SSE
2019-02-16 0:34 [PATCH 00/42] V7: Emulate MMX intrinsics with SSE H.J. Lu
2019-02-16 0:34 ` [PATCH 15/42] i386: Emulate MMX sse_cvtpi2ps " H.J. Lu
2019-02-16 0:34 ` [PATCH 20/42] i386: Emulate MMX mmx_umulv4hi3_highpart " H.J. Lu
@ 2019-02-16 0:34 ` H.J. Lu
2019-02-16 0:34 ` [PATCH 11/42] i386: Emulate MMX mmx_eq/mmx_gt<mode>3 " H.J. Lu
` (38 subsequent siblings)
41 siblings, 0 replies; 50+ messages in thread
From: H.J. Lu @ 2019-02-16 0:34 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX pshufw with SSE. Only SSE register source operand is allowed.
PR target/89021
* config/i386/mmx.md (mmx_pshufw): Also check TARGET_MMX and
TARGET_MMX_WITH_SSE.
(mmx_pshufw_1): Add SSE emulation.
(*vec_dupv4hi): Changed to define_insn_and_split and also allow
TARGET_MMX_WITH_SSE to support SSE emulation.
---
gcc/config/i386/mmx.md | 81 +++++++++++++++++++++++++++++++++---------
1 file changed, 65 insertions(+), 16 deletions(-)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index d568a534956..43f85064cd9 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1323,9 +1323,10 @@
(define_expand "mmx_pshufw"
[(match_operand:V4HI 0 "register_operand")
- (match_operand:V4HI 1 "nonimmediate_operand")
+ (match_operand:V4HI 1 "register_mmxmem_operand")
(match_operand:SI 2 "const_int_operand")]
- "TARGET_SSE || TARGET_3DNOW_A"
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && (TARGET_SSE || TARGET_3DNOW_A)"
{
int mask = INTVAL (operands[2]);
emit_insn (gen_mmx_pshufw_1 (operands[0], operands[1],
@@ -1337,14 +1338,15 @@
})
(define_insn "mmx_pshufw_1"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
+ [(set (match_operand:V4HI 0 "register_operand" "=y,Yv")
(vec_select:V4HI
- (match_operand:V4HI 1 "nonimmediate_operand" "ym")
+ (match_operand:V4HI 1 "register_mmxmem_operand" "ym,Yv")
(parallel [(match_operand 2 "const_0_to_3_operand")
(match_operand 3 "const_0_to_3_operand")
(match_operand 4 "const_0_to_3_operand")
(match_operand 5 "const_0_to_3_operand")])))]
- "TARGET_SSE || TARGET_3DNOW_A"
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && (TARGET_SSE || TARGET_3DNOW_A)"
{
int mask = 0;
mask |= INTVAL (operands[2]) << 0;
@@ -1353,11 +1355,20 @@
mask |= INTVAL (operands[5]) << 6;
operands[2] = GEN_INT (mask);
- return "pshufw\t{%2, %1, %0|%0, %1, %2}";
+ switch (which_alternative)
+ {
+ case 0:
+ return "pshufw\t{%2, %1, %0|%0, %1, %2}";
+ case 1:
+ return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
+ default:
+ gcc_unreachable ();
+ }
}
- [(set_attr "type" "mmxcvt")
+ [(set_attr "mmx_isa" "native,x64")
+ (set_attr "type" "mmxcvt,sselog")
(set_attr "length_immediate" "1")
- (set_attr "mode" "DI")])
+ (set_attr "mode" "DI,TI")])
(define_insn "mmx_pswapdv2si2"
[(set (match_operand:V2SI 0 "register_operand" "=y")
@@ -1370,16 +1381,54 @@
(set_attr "prefix_extra" "1")
(set_attr "mode" "DI")])
-(define_insn "*vec_dupv4hi"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
+(define_insn_and_split "*vec_dupv4hi"
+ [(set (match_operand:V4HI 0 "register_operand" "=y,Yv,Yw")
(vec_duplicate:V4HI
(truncate:HI
- (match_operand:SI 1 "register_operand" "0"))))]
- "TARGET_SSE || TARGET_3DNOW_A"
- "pshufw\t{$0, %0, %0|%0, %0, 0}"
- [(set_attr "type" "mmxcvt")
- (set_attr "length_immediate" "1")
- (set_attr "mode" "DI")])
+ (match_operand:SI 1 "register_operand" "0,Yv,r"))))]
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && (TARGET_SSE || TARGET_3DNOW_A)"
+ "@
+ pshufw\t{$0, %0, %0|%0, %0, 0}
+ #
+ #"
+ "TARGET_MMX_WITH_SSE && reload_completed"
+ [(const_int 0)]
+{
+ rtx op;
+ operands[0] = lowpart_subreg (V8HImode, operands[0],
+ GET_MODE (operands[0]));
+ if (TARGET_AVX2)
+ {
+ operands[1] = lowpart_subreg (HImode, operands[1],
+ GET_MODE (operands[1]));
+ op = gen_rtx_VEC_DUPLICATE (V8HImode, operands[1]);
+ }
+ else
+ {
+ operands[1] = lowpart_subreg (V8HImode, operands[1],
+ GET_MODE (operands[1]));
+ rtx mask = gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (8,
+ GEN_INT (0),
+ GEN_INT (0),
+ GEN_INT (0),
+ GEN_INT (0),
+ GEN_INT (4),
+ GEN_INT (5),
+ GEN_INT (6),
+ GEN_INT (7)));
+
+ op = gen_rtx_VEC_SELECT (V8HImode, operands[1], mask);
+ }
+ rtx insn = gen_rtx_SET (operands[0], op);
+ emit_insn (insn);
+ DONE;
+}
+ [(set_attr "mmx_isa" "native,x64,x64_avx")
+ (set_attr "type" "mmxcvt,sselog1,ssemov")
+ (set_attr "length_immediate" "1,1,0")
+ (set_attr "mode" "DI,TI,TI")])
(define_insn_and_split "*vec_dupv2si"
[(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv,Yw")
--
2.20.1
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 11/42] i386: Emulate MMX mmx_eq/mmx_gt<mode>3 with SSE
2019-02-16 0:34 [PATCH 00/42] V7: Emulate MMX intrinsics with SSE H.J. Lu
` (2 preceding siblings ...)
2019-02-16 0:34 ` [PATCH 13/42] i386: Emulate MMX pshufw " H.J. Lu
@ 2019-02-16 0:34 ` H.J. Lu
2019-02-16 0:34 ` [PATCH 01/42] i386: Allow MMX register modes in SSE registers H.J. Lu
` (37 subsequent siblings)
41 siblings, 0 replies; 50+ messages in thread
From: H.J. Lu @ 2019-02-16 0:34 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX mmx_eq/mmx_gt<mode>3 with SSE. Only SSE register source
operand is allowed.
PR target/89021
* config/i386/mmx.md (mmx_eq<mode>3): Also allow
TARGET_MMX_WITH_SSE.
(*mmx_eq<mode>3): Also allow TARGET_MMX_WITH_SSE. Add SSE
support.
(mmx_gt<mode>3): Likewise.
---
gcc/config/i386/mmx.md | 43 +++++++++++++++++++++++++-----------------
1 file changed, 26 insertions(+), 17 deletions(-)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 510d453f0fd..b0c6a8c8077 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1017,30 +1017,39 @@
(define_expand "mmx_eq<mode>3"
[(set (match_operand:MMXMODEI 0 "register_operand")
(eq:MMXMODEI
- (match_operand:MMXMODEI 1 "nonimmediate_operand")
- (match_operand:MMXMODEI 2 "nonimmediate_operand")))]
- "TARGET_MMX"
+ (match_operand:MMXMODEI 1 "register_mmxmem_operand")
+ (match_operand:MMXMODEI 2 "register_mmxmem_operand")))]
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
"ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
(define_insn "*mmx_eq<mode>3"
- [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+ [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
(eq:MMXMODEI
- (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0")
- (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
- "TARGET_MMX && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
- "pcmpeq<mmxvecsize>\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxcmp")
- (set_attr "mode" "DI")])
+ (match_operand:MMXMODEI 1 "register_mmxmem_operand" "%0,0,Yv")
+ (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
+ "@
+ pcmpeq<mmxvecsize>\t{%2, %0|%0, %2}
+ pcmpeq<mmxvecsize>\t{%2, %0|%0, %2}
+ vpcmpeq<mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxcmp,ssecmp,ssecmp")
+ (set_attr "mode" "DI,TI,TI")])
(define_insn "mmx_gt<mode>3"
- [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+ [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
(gt:MMXMODEI
- (match_operand:MMXMODEI 1 "register_operand" "0")
- (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
- "TARGET_MMX"
- "pcmpgt<mmxvecsize>\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxcmp")
- (set_attr "mode" "DI")])
+ (match_operand:MMXMODEI 1 "register_operand" "0,0,Yv")
+ (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "@
+ pcmpgt<mmxvecsize>\t{%2, %0|%0, %2}
+ pcmpgt<mmxvecsize>\t{%2, %0|%0, %2}
+ vpcmpgt<mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxcmp,ssecmp,ssecmp")
+ (set_attr "mode" "DI,TI,TI")])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
--
2.20.1
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 01/42] i386: Allow MMX register modes in SSE registers
2019-02-16 0:34 [PATCH 00/42] V7: Emulate MMX intrinsics with SSE H.J. Lu
` (3 preceding siblings ...)
2019-02-16 0:34 ` [PATCH 11/42] i386: Emulate MMX mmx_eq/mmx_gt<mode>3 " H.J. Lu
@ 2019-02-16 0:34 ` H.J. Lu
2019-02-16 0:34 ` [PATCH 17/42] i386: Emulate MMX mmx_pinsrw with SSE H.J. Lu
` (36 subsequent siblings)
41 siblings, 0 replies; 50+ messages in thread
From: H.J. Lu @ 2019-02-16 0:34 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
In 64-bit mode, SSE2 can be used to emulate MMX instructions without
3DNOW. We can use SSE2 to support MMX register modes.
PR target/89021
* config/i386/i386-c.c (ix86_target_macros_internal): Define
__MMX_WITH_SSE__ for TARGET_MMX_WITH_SSE.
* config/i386/i386.c (ix86_set_reg_reg_cost): Add support for
TARGET_MMX_WITH_SSE with VALID_MMX_REG_MODE.
(ix86_vector_mode_supported_p): Likewise.
* config/i386/i386.h (TARGET_MMX_WITH_SSE): New.
---
gcc/config/i386/i386-c.c | 2 ++
gcc/config/i386/i386.c | 5 +++--
gcc/config/i386/i386.h | 2 ++
3 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
index 5e7e46fcebe..213e1b56c6b 100644
--- a/gcc/config/i386/i386-c.c
+++ b/gcc/config/i386/i386-c.c
@@ -548,6 +548,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
def_or_undef (parse_in, "__CLDEMOTE__");
if (isa_flag2 & OPTION_MASK_ISA_PTWRITE)
def_or_undef (parse_in, "__PTWRITE__");
+ if (TARGET_MMX_WITH_SSE)
+ def_or_undef (parse_in, "__MMX_WITH_SSE__");
if (TARGET_IAMCU)
{
def_or_undef (parse_in, "__iamcu");
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 3e5f52175d2..7d7dd80930e 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -40490,7 +40490,8 @@ ix86_set_reg_reg_cost (machine_mode mode)
|| (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
|| (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
|| (TARGET_SSE && VALID_SSE_REG_MODE (mode))
- || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
+ || ((TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && VALID_MMX_REG_MODE (mode)))
units = GET_MODE_SIZE (mode);
}
@@ -44316,7 +44317,7 @@ ix86_vector_mode_supported_p (machine_mode mode)
return true;
if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
return true;
- if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
+ if ((TARGET_MMX ||TARGET_MMX_WITH_SSE) && VALID_MMX_REG_MODE (mode))
return true;
if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
return true;
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 4fd8bc40a34..91b233022c2 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -201,6 +201,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define TARGET_16BIT TARGET_CODE16
#define TARGET_16BIT_P(x) TARGET_CODE16_P(x)
+#define TARGET_MMX_WITH_SSE (TARGET_64BIT && TARGET_SSE2)
+
#include "config/vxworks-dummy.h"
#include "config/i386/i386-opts.h"
--
2.20.1
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 17/42] i386: Emulate MMX mmx_pinsrw with SSE
2019-02-16 0:34 [PATCH 00/42] V7: Emulate MMX intrinsics with SSE H.J. Lu
` (4 preceding siblings ...)
2019-02-16 0:34 ` [PATCH 01/42] i386: Allow MMX register modes in SSE registers H.J. Lu
@ 2019-02-16 0:34 ` H.J. Lu
2019-02-16 9:08 ` Uros Bizjak
2019-02-16 0:34 ` [PATCH 09/42] i386: Emulate MMX <any_logic><mode>3 " H.J. Lu
` (35 subsequent siblings)
41 siblings, 1 reply; 50+ messages in thread
From: H.J. Lu @ 2019-02-16 0:34 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX mmx_pinsrw with SSE. Only SSE register source operand is
allowed.
PR target/89021
* config/i386/mmx.md (mmx_pinsrw): Also check TARGET_MMX and
TARGET_MMX_WITH_SSE.
(*mmx_pinsrw): Add SSE emulation.
---
gcc/config/i386/mmx.md | 33 +++++++++++++++++++++++----------
1 file changed, 23 insertions(+), 10 deletions(-)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 22547c7da6f..1e68d1bb338 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1282,32 +1282,45 @@
(match_operand:SI 2 "nonimmediate_operand"))
(match_operand:V4HI 1 "register_operand")
(match_operand:SI 3 "const_0_to_3_operand")))]
- "TARGET_SSE || TARGET_3DNOW_A"
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && (TARGET_SSE || TARGET_3DNOW_A)"
{
operands[2] = gen_lowpart (HImode, operands[2]);
operands[3] = GEN_INT (1 << INTVAL (operands[3]));
})
(define_insn "*mmx_pinsrw"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
+ [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(vec_merge:V4HI
(vec_duplicate:V4HI
- (match_operand:HI 2 "nonimmediate_operand" "rm"))
- (match_operand:V4HI 1 "register_operand" "0")
+ (match_operand:HI 2 "nonimmediate_operand" "rm,rm,rm"))
+ (match_operand:V4HI 1 "register_operand" "0,0,Yv")
(match_operand:SI 3 "const_int_operand")))]
- "(TARGET_SSE || TARGET_3DNOW_A)
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && (TARGET_SSE || TARGET_3DNOW_A)
&& ((unsigned) exact_log2 (INTVAL (operands[3]))
< GET_MODE_NUNITS (V4HImode))"
{
operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
- if (MEM_P (operands[2]))
- return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
+ if (TARGET_MMX_WITH_SSE && TARGET_AVX)
+ {
+ if (MEM_P (operands[2]))
+ return "vpinsrw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+ else
+ return "vpinsrw\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
+ }
else
- return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
+ {
+ if (MEM_P (operands[2]))
+ return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
+ else
+ return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
+ }
}
- [(set_attr "type" "mmxcvt")
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxcvt,sselog,sselog")
(set_attr "length_immediate" "1")
- (set_attr "mode" "DI")])
+ (set_attr "mode" "DI,TI,TI")])
(define_insn "mmx_pextrw"
[(set (match_operand:SI 0 "register_operand" "=r,r")
--
2.20.1
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [PATCH 17/42] i386: Emulate MMX mmx_pinsrw with SSE
2019-02-16 0:34 ` [PATCH 17/42] i386: Emulate MMX mmx_pinsrw with SSE H.J. Lu
@ 2019-02-16 9:08 ` Uros Bizjak
2019-02-16 14:57 ` H.J. Lu
0 siblings, 1 reply; 50+ messages in thread
From: Uros Bizjak @ 2019-02-16 9:08 UTC (permalink / raw)
To: H.J. Lu; +Cc: gcc-patches
On 2/16/19, H.J. Lu <hjl.tools@gmail.com> wrote:
> Emulate MMX mmx_pinsrw with SSE. Only SSE register source operand is
> allowed.
Here we allow general register and memory operands in both cases,, so
the above sentence is misleading.
Uros.
>
> PR target/89021
> * config/i386/mmx.md (mmx_pinsrw): Also check TARGET_MMX and
> TARGET_MMX_WITH_SSE.
> (*mmx_pinsrw): Add SSE emulation.
> ---
> gcc/config/i386/mmx.md | 33 +++++++++++++++++++++++----------
> 1 file changed, 23 insertions(+), 10 deletions(-)
>
> diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
> index 22547c7da6f..1e68d1bb338 100644
> --- a/gcc/config/i386/mmx.md
> +++ b/gcc/config/i386/mmx.md
> @@ -1282,32 +1282,45 @@
> (match_operand:SI 2 "nonimmediate_operand"))
> (match_operand:V4HI 1 "register_operand")
> (match_operand:SI 3 "const_0_to_3_operand")))]
> - "TARGET_SSE || TARGET_3DNOW_A"
> + "(TARGET_MMX || TARGET_MMX_WITH_SSE)
> + && (TARGET_SSE || TARGET_3DNOW_A)"
> {
> operands[2] = gen_lowpart (HImode, operands[2]);
> operands[3] = GEN_INT (1 << INTVAL (operands[3]));
> })
>
> (define_insn "*mmx_pinsrw"
> - [(set (match_operand:V4HI 0 "register_operand" "=y")
> + [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
> (vec_merge:V4HI
> (vec_duplicate:V4HI
> - (match_operand:HI 2 "nonimmediate_operand" "rm"))
> - (match_operand:V4HI 1 "register_operand" "0")
> + (match_operand:HI 2 "nonimmediate_operand" "rm,rm,rm"))
> + (match_operand:V4HI 1 "register_operand" "0,0,Yv")
> (match_operand:SI 3 "const_int_operand")))]
> - "(TARGET_SSE || TARGET_3DNOW_A)
> + "(TARGET_MMX || TARGET_MMX_WITH_SSE)
> + && (TARGET_SSE || TARGET_3DNOW_A)
> && ((unsigned) exact_log2 (INTVAL (operands[3]))
> < GET_MODE_NUNITS (V4HImode))"
> {
> operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
> - if (MEM_P (operands[2]))
> - return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
> + if (TARGET_MMX_WITH_SSE && TARGET_AVX)
> + {
> + if (MEM_P (operands[2]))
> + return "vpinsrw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
> + else
> + return "vpinsrw\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
> + }
> else
> - return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
> + {
> + if (MEM_P (operands[2]))
> + return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
> + else
> + return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
> + }
> }
> - [(set_attr "type" "mmxcvt")
> + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
> + (set_attr "type" "mmxcvt,sselog,sselog")
> (set_attr "length_immediate" "1")
> - (set_attr "mode" "DI")])
> + (set_attr "mode" "DI,TI,TI")])
>
> (define_insn "mmx_pextrw"
> [(set (match_operand:SI 0 "register_operand" "=r,r")
> --
> 2.20.1
>
>
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [PATCH 17/42] i386: Emulate MMX mmx_pinsrw with SSE
2019-02-16 9:08 ` Uros Bizjak
@ 2019-02-16 14:57 ` H.J. Lu
0 siblings, 0 replies; 50+ messages in thread
From: H.J. Lu @ 2019-02-16 14:57 UTC (permalink / raw)
To: Uros Bizjak; +Cc: GCC Patches
On Sat, Feb 16, 2019 at 1:08 AM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On 2/16/19, H.J. Lu <hjl.tools@gmail.com> wrote:
> > Emulate MMX mmx_pinsrw with SSE. Only SSE register source operand is
> > allowed.
>
> Here we allow general register and memory operands in both cases,, so
> the above sentence is misleading.
Changed to
Emulate MMX mmx_pinsrw with SSE. Only SSE register destination operand
is allowed.
> Uros.
>
> >
> > PR target/89021
> > * config/i386/mmx.md (mmx_pinsrw): Also check TARGET_MMX and
> > TARGET_MMX_WITH_SSE.
> > (*mmx_pinsrw): Add SSE emulation.
> > ---
> > gcc/config/i386/mmx.md | 33 +++++++++++++++++++++++----------
> > 1 file changed, 23 insertions(+), 10 deletions(-)
> >
> > diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
> > index 22547c7da6f..1e68d1bb338 100644
> > --- a/gcc/config/i386/mmx.md
> > +++ b/gcc/config/i386/mmx.md
> > @@ -1282,32 +1282,45 @@
> > (match_operand:SI 2 "nonimmediate_operand"))
> > (match_operand:V4HI 1 "register_operand")
> > (match_operand:SI 3 "const_0_to_3_operand")))]
> > - "TARGET_SSE || TARGET_3DNOW_A"
> > + "(TARGET_MMX || TARGET_MMX_WITH_SSE)
> > + && (TARGET_SSE || TARGET_3DNOW_A)"
> > {
> > operands[2] = gen_lowpart (HImode, operands[2]);
> > operands[3] = GEN_INT (1 << INTVAL (operands[3]));
> > })
> >
> > (define_insn "*mmx_pinsrw"
> > - [(set (match_operand:V4HI 0 "register_operand" "=y")
> > + [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
> > (vec_merge:V4HI
> > (vec_duplicate:V4HI
> > - (match_operand:HI 2 "nonimmediate_operand" "rm"))
> > - (match_operand:V4HI 1 "register_operand" "0")
> > + (match_operand:HI 2 "nonimmediate_operand" "rm,rm,rm"))
> > + (match_operand:V4HI 1 "register_operand" "0,0,Yv")
> > (match_operand:SI 3 "const_int_operand")))]
> > - "(TARGET_SSE || TARGET_3DNOW_A)
> > + "(TARGET_MMX || TARGET_MMX_WITH_SSE)
> > + && (TARGET_SSE || TARGET_3DNOW_A)
> > && ((unsigned) exact_log2 (INTVAL (operands[3]))
> > < GET_MODE_NUNITS (V4HImode))"
> > {
> > operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
> > - if (MEM_P (operands[2]))
> > - return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
> > + if (TARGET_MMX_WITH_SSE && TARGET_AVX)
> > + {
> > + if (MEM_P (operands[2]))
> > + return "vpinsrw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
> > + else
> > + return "vpinsrw\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
> > + }
> > else
> > - return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
> > + {
> > + if (MEM_P (operands[2]))
> > + return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
> > + else
> > + return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
> > + }
> > }
> > - [(set_attr "type" "mmxcvt")
> > + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
> > + (set_attr "type" "mmxcvt,sselog,sselog")
> > (set_attr "length_immediate" "1")
> > - (set_attr "mode" "DI")])
> > + (set_attr "mode" "DI,TI,TI")])
> >
> > (define_insn "mmx_pextrw"
> > [(set (match_operand:SI 0 "register_operand" "=r,r")
> > --
> > 2.20.1
> >
> >
--
H.J.
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 09/42] i386: Emulate MMX <any_logic><mode>3 with SSE
2019-02-16 0:34 [PATCH 00/42] V7: Emulate MMX intrinsics with SSE H.J. Lu
` (5 preceding siblings ...)
2019-02-16 0:34 ` [PATCH 17/42] i386: Emulate MMX mmx_pinsrw with SSE H.J. Lu
@ 2019-02-16 0:34 ` H.J. Lu
2019-02-16 0:34 ` [PATCH 06/42] i386: Emulate MMX smulv4hi3_highpart " H.J. Lu
` (34 subsequent siblings)
41 siblings, 0 replies; 50+ messages in thread
From: H.J. Lu @ 2019-02-16 0:34 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX <any_logic><mode>3 with SSE. Only SSE register source
operand is allowed.
PR target/89021
* config/i386/mmx.md (any_logic:mmx_<code><mode>3): Also allow
TARGET_MMX_WITH_SSE.
(any_logic:<code><mode>3): New.
(any_logic:*mmx_<code><mode>3): Also allow TARGET_MMX_WITH_SSE.
Add SSE support.
---
gcc/config/i386/mmx.md | 33 +++++++++++++++++++++++----------
1 file changed, 23 insertions(+), 10 deletions(-)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 240e0188a78..7e2d40313c3 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1061,20 +1061,33 @@
(define_expand "mmx_<code><mode>3"
[(set (match_operand:MMXMODEI 0 "register_operand")
(any_logic:MMXMODEI
- (match_operand:MMXMODEI 1 "nonimmediate_operand")
- (match_operand:MMXMODEI 2 "nonimmediate_operand")))]
- "TARGET_MMX"
+ (match_operand:MMXMODEI 1 "register_mmxmem_operand")
+ (match_operand:MMXMODEI 2 "register_mmxmem_operand")))]
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
+(define_expand "<code><mode>3"
+ [(set (match_operand:MMXMODEI 0 "register_operand")
+ (any_logic:MMXMODEI
+ (match_operand:MMXMODEI 1 "register_operand")
+ (match_operand:MMXMODEI 2 "register_operand")))]
+ "TARGET_MMX_WITH_SSE"
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
(define_insn "*mmx_<code><mode>3"
- [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+ [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
(any_logic:MMXMODEI
- (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0")
- (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
- "TARGET_MMX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
- "p<logic>\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxadd")
- (set_attr "mode" "DI")])
+ (match_operand:MMXMODEI 1 "register_mmxmem_operand" "%0,0,Yv")
+ (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "@
+ p<logic>\t{%2, %0|%0, %2}
+ p<logic>\t{%2, %0|%0, %2}
+ vp<logic>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxadd,sselog,sselog")
+ (set_attr "mode" "DI,TI,TI")])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
--
2.20.1
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 06/42] i386: Emulate MMX smulv4hi3_highpart with SSE
2019-02-16 0:34 [PATCH 00/42] V7: Emulate MMX intrinsics with SSE H.J. Lu
` (6 preceding siblings ...)
2019-02-16 0:34 ` [PATCH 09/42] i386: Emulate MMX <any_logic><mode>3 " H.J. Lu
@ 2019-02-16 0:34 ` H.J. Lu
2019-02-16 0:34 ` [PATCH 10/42] i386: Emulate MMX mmx_andnot<mode>3 " H.J. Lu
` (33 subsequent siblings)
41 siblings, 0 replies; 50+ messages in thread
From: H.J. Lu @ 2019-02-16 0:34 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX mulv4hi3 with SSE. Only SSE register source operand is
allowed.
PR target/89021
* config/i386/mmx.md (mmx_smulv4hi3_highpart): Also allow
TARGET_MMX_WITH_SSE.
(*mmx_smulv4hi3_highpart): Also allow TARGET_MMX_WITH_SSE. Add
SSE support.
---
gcc/config/i386/mmx.md | 25 +++++++++++++++----------
1 file changed, 15 insertions(+), 10 deletions(-)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index cdb0f698001..3a7964d52bb 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -748,27 +748,32 @@
(lshiftrt:V4SI
(mult:V4SI
(sign_extend:V4SI
- (match_operand:V4HI 1 "nonimmediate_operand"))
+ (match_operand:V4HI 1 "register_mmxmem_operand"))
(sign_extend:V4SI
- (match_operand:V4HI 2 "nonimmediate_operand")))
+ (match_operand:V4HI 2 "register_mmxmem_operand")))
(const_int 16))))]
- "TARGET_MMX"
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
"ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
(define_insn "*mmx_smulv4hi3_highpart"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
+ [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(truncate:V4HI
(lshiftrt:V4SI
(mult:V4SI
(sign_extend:V4SI
- (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+ (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv"))
(sign_extend:V4SI
- (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+ (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))
(const_int 16))))]
- "TARGET_MMX && ix86_binary_operator_ok (MULT, V4HImode, operands)"
- "pmulhw\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxmul")
- (set_attr "mode" "DI")])
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && ix86_binary_operator_ok (MULT, V4HImode, operands)"
+ "@
+ pmulhw\t{%2, %0|%0, %2}
+ pmulhw\t{%2, %0|%0, %2}
+ vpmulhw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxmul,ssemul,ssemul")
+ (set_attr "mode" "DI,TI,TI")])
(define_expand "mmx_umulv4hi3_highpart"
[(set (match_operand:V4HI 0 "register_operand")
--
2.20.1
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 10/42] i386: Emulate MMX mmx_andnot<mode>3 with SSE
2019-02-16 0:34 [PATCH 00/42] V7: Emulate MMX intrinsics with SSE H.J. Lu
` (7 preceding siblings ...)
2019-02-16 0:34 ` [PATCH 06/42] i386: Emulate MMX smulv4hi3_highpart " H.J. Lu
@ 2019-02-16 0:34 ` H.J. Lu
2019-02-16 0:34 ` [PATCH 08/42] i386: Emulate MMX ashr<mode>3/<shift_insn><mode>3 " H.J. Lu
` (32 subsequent siblings)
41 siblings, 0 replies; 50+ messages in thread
From: H.J. Lu @ 2019-02-16 0:34 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX mmx_andnot<mode>3 with SSE. Only SSE register source operand
is allowed.
PR target/89021
* config/i386/mmx.md (mmx_andnot<mode>3): Also allow
TARGET_MMX_WITH_SSE. Add SSE support.
---
gcc/config/i386/mmx.md | 18 +++++++++++-------
1 file changed, 11 insertions(+), 7 deletions(-)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 7e2d40313c3..510d453f0fd 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1049,14 +1049,18 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_insn "mmx_andnot<mode>3"
- [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+ [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
(and:MMXMODEI
- (not:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" "0"))
- (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
- "TARGET_MMX"
- "pandn\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxadd")
- (set_attr "mode" "DI")])
+ (not:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" "0,0,Yv"))
+ (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "@
+ pandn\t{%2, %0|%0, %2}
+ pandn\t{%2, %0|%0, %2}
+ vpandn\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxadd,sselog,sselog")
+ (set_attr "mode" "DI,TI,TI")])
(define_expand "mmx_<code><mode>3"
[(set (match_operand:MMXMODEI 0 "register_operand")
--
2.20.1
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 08/42] i386: Emulate MMX ashr<mode>3/<shift_insn><mode>3 with SSE
2019-02-16 0:34 [PATCH 00/42] V7: Emulate MMX intrinsics with SSE H.J. Lu
` (8 preceding siblings ...)
2019-02-16 0:34 ` [PATCH 10/42] i386: Emulate MMX mmx_andnot<mode>3 " H.J. Lu
@ 2019-02-16 0:34 ` H.J. Lu
2019-02-16 0:34 ` [PATCH 04/42] i386: Emulate MMX plusminus/sat_plusminus " H.J. Lu
` (31 subsequent siblings)
41 siblings, 0 replies; 50+ messages in thread
From: H.J. Lu @ 2019-02-16 0:34 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX ashr<mode>3/<shift_insn><mode>3 with SSE. Only SSE register
source operand is allowed.
PR target/89021
* config/i386/mmx.md (mmx_ashr<mode>3): Also allow
TARGET_MMX_WITH_SSE. Add SSE emulation.
(mmx_<shift_insn><mode>3): Likewise.
(ashr<mode>3): New.
(<shift_insn><mode>3): Likewise.
---
gcc/config/i386/mmx.md | 50 ++++++++++++++++++++++++++++++------------
1 file changed, 36 insertions(+), 14 deletions(-)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 9f0311badca..240e0188a78 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -959,32 +959,54 @@
(set_attr "mode" "DI")])
(define_insn "mmx_ashr<mode>3"
- [(set (match_operand:MMXMODE24 0 "register_operand" "=y")
+ [(set (match_operand:MMXMODE24 0 "register_operand" "=y,x,Yv")
(ashiftrt:MMXMODE24
- (match_operand:MMXMODE24 1 "register_operand" "0")
- (match_operand:DI 2 "nonmemory_operand" "yN")))]
- "TARGET_MMX"
- "psra<mmxvecsize>\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxshft")
+ (match_operand:MMXMODE24 1 "register_operand" "0,0,Yv")
+ (match_operand:DI 2 "nonmemory_operand" "yN,xN,YvN")))]
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "@
+ psra<mmxvecsize>\t{%2, %0|%0, %2}
+ psra<mmxvecsize>\t{%2, %0|%0, %2}
+ vpsra<mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxshft,sseishft,sseishft")
(set (attr "length_immediate")
(if_then_else (match_operand 2 "const_int_operand")
(const_string "1")
(const_string "0")))
- (set_attr "mode" "DI")])
+ (set_attr "mode" "DI,TI,TI")])
+
+(define_expand "ashr<mode>3"
+ [(set (match_operand:MMXMODE24 0 "register_operand")
+ (ashiftrt:MMXMODE24
+ (match_operand:MMXMODE24 1 "register_operand")
+ (match_operand:DI 2 "nonmemory_operand")))]
+ "TARGET_MMX_WITH_SSE")
(define_insn "mmx_<shift_insn><mode>3"
- [(set (match_operand:MMXMODE248 0 "register_operand" "=y")
+ [(set (match_operand:MMXMODE248 0 "register_operand" "=y,x,Yv")
(any_lshift:MMXMODE248
- (match_operand:MMXMODE248 1 "register_operand" "0")
- (match_operand:DI 2 "nonmemory_operand" "yN")))]
- "TARGET_MMX"
- "p<vshift><mmxvecsize>\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxshft")
+ (match_operand:MMXMODE248 1 "register_operand" "0,0,Yv")
+ (match_operand:DI 2 "nonmemory_operand" "yN,xN,YvN")))]
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "@
+ p<vshift><mmxvecsize>\t{%2, %0|%0, %2}
+ p<vshift><mmxvecsize>\t{%2, %0|%0, %2}
+ vp<vshift><mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxshft,sseishft,sseishft")
(set (attr "length_immediate")
(if_then_else (match_operand 2 "const_int_operand")
(const_string "1")
(const_string "0")))
- (set_attr "mode" "DI")])
+ (set_attr "mode" "DI,TI,TI")])
+
+(define_expand "<shift_insn><mode>3"
+ [(set (match_operand:MMXMODE248 0 "register_operand")
+ (any_lshift:MMXMODE248
+ (match_operand:MMXMODE248 1 "register_operand")
+ (match_operand:DI 2 "nonmemory_operand")))]
+ "TARGET_MMX_WITH_SSE")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
--
2.20.1
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 04/42] i386: Emulate MMX plusminus/sat_plusminus with SSE
2019-02-16 0:34 [PATCH 00/42] V7: Emulate MMX intrinsics with SSE H.J. Lu
` (9 preceding siblings ...)
2019-02-16 0:34 ` [PATCH 08/42] i386: Emulate MMX ashr<mode>3/<shift_insn><mode>3 " H.J. Lu
@ 2019-02-16 0:34 ` H.J. Lu
2019-02-16 0:34 ` [PATCH 23/42] i386: Emulate MMX mmx_uavgv4hi3 " H.J. Lu
` (30 subsequent siblings)
41 siblings, 0 replies; 50+ messages in thread
From: H.J. Lu @ 2019-02-16 0:34 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX plusminus/sat_plusminus with SSE. Only SSE register source
operand is allowed.
PR target/89021
* config/i386/mmx.md (MMXMODEI8): Require TARGET_SSE2 for V1DI.
(plusminus:mmx_<plusminus_insn><mode>3): Check
TARGET_MMX_WITH_SSE.
(sat_plusminus:mmx_<plusminus_insn><mode>3): Likewise.
(<plusminus_insn><mode>3): New.
(*mmx_<plusminus_insn><mode>3): Add SSE emulation.
(*mmx_<plusminus_insn><mode>3): Likewise.
---
gcc/config/i386/mmx.md | 59 +++++++++++++++++++++++++++---------------
1 file changed, 38 insertions(+), 21 deletions(-)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 34fecd6a745..517c3283963 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -45,7 +45,7 @@
;; 8 byte integral modes handled by MMX (and by extension, SSE)
(define_mode_iterator MMXMODEI [V8QI V4HI V2SI])
-(define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI V1DI])
+(define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI (V1DI "TARGET_SSE2")])
;; All 8-byte vector modes handled by MMX
(define_mode_iterator MMXMODE [V8QI V4HI V2SI V1DI V2SF])
@@ -663,39 +663,56 @@
(define_expand "mmx_<plusminus_insn><mode>3"
[(set (match_operand:MMXMODEI8 0 "register_operand")
(plusminus:MMXMODEI8
- (match_operand:MMXMODEI8 1 "nonimmediate_operand")
- (match_operand:MMXMODEI8 2 "nonimmediate_operand")))]
- "TARGET_MMX || (TARGET_SSE2 && <MODE>mode == V1DImode)"
+ (match_operand:MMXMODEI8 1 "register_mmxmem_operand")
+ (match_operand:MMXMODEI8 2 "register_mmxmem_operand")))]
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
+(define_expand "<plusminus_insn><mode>3"
+ [(set (match_operand:MMXMODEI 0 "register_operand")
+ (plusminus:MMXMODEI
+ (match_operand:MMXMODEI 1 "register_operand")
+ (match_operand:MMXMODEI 2 "register_operand")))]
+ "TARGET_MMX_WITH_SSE"
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
(define_insn "*mmx_<plusminus_insn><mode>3"
- [(set (match_operand:MMXMODEI8 0 "register_operand" "=y")
+ [(set (match_operand:MMXMODEI8 0 "register_operand" "=y,x,Yv")
(plusminus:MMXMODEI8
- (match_operand:MMXMODEI8 1 "nonimmediate_operand" "<comm>0")
- (match_operand:MMXMODEI8 2 "nonimmediate_operand" "ym")))]
- "(TARGET_MMX || (TARGET_SSE2 && <MODE>mode == V1DImode))
+ (match_operand:MMXMODEI8 1 "register_mmxmem_operand" "<comm>0,0,Yv")
+ (match_operand:MMXMODEI8 2 "register_mmxmem_operand" "ym,x,Yv")))]
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
- "p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxadd")
- (set_attr "mode" "DI")])
+ "@
+ p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}
+ p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}
+ vp<plusminus_mnemonic><mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxadd,sseadd,sseadd")
+ (set_attr "mode" "DI,TI,TI")])
(define_expand "mmx_<plusminus_insn><mode>3"
[(set (match_operand:MMXMODE12 0 "register_operand")
(sat_plusminus:MMXMODE12
- (match_operand:MMXMODE12 1 "nonimmediate_operand")
- (match_operand:MMXMODE12 2 "nonimmediate_operand")))]
- "TARGET_MMX"
+ (match_operand:MMXMODE12 1 "register_mmxmem_operand")
+ (match_operand:MMXMODE12 2 "register_mmxmem_operand")))]
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
(define_insn "*mmx_<plusminus_insn><mode>3"
- [(set (match_operand:MMXMODE12 0 "register_operand" "=y")
+ [(set (match_operand:MMXMODE12 0 "register_operand" "=y,x,Yv")
(sat_plusminus:MMXMODE12
- (match_operand:MMXMODE12 1 "nonimmediate_operand" "<comm>0")
- (match_operand:MMXMODE12 2 "nonimmediate_operand" "ym")))]
- "TARGET_MMX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
- "p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxadd")
- (set_attr "mode" "DI")])
+ (match_operand:MMXMODE12 1 "register_mmxmem_operand" "<comm>0,0,Yv")
+ (match_operand:MMXMODE12 2 "register_mmxmem_operand" "ym,x,Yv")))]
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "@
+ p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}
+ p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}
+ vp<plusminus_mnemonic><mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxadd,sseadd,sseadd")
+ (set_attr "mode" "DI,TI,TI")])
(define_expand "mmx_mulv4hi3"
[(set (match_operand:V4HI 0 "register_operand")
--
2.20.1
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 23/42] i386: Emulate MMX mmx_uavgv4hi3 with SSE
2019-02-16 0:34 [PATCH 00/42] V7: Emulate MMX intrinsics with SSE H.J. Lu
` (10 preceding siblings ...)
2019-02-16 0:34 ` [PATCH 04/42] i386: Emulate MMX plusminus/sat_plusminus " H.J. Lu
@ 2019-02-16 0:34 ` H.J. Lu
2019-02-16 0:34 ` [PATCH 14/42] i386: Emulate MMX sse_cvtps2pi/sse_cvttps2pi " H.J. Lu
` (29 subsequent siblings)
41 siblings, 0 replies; 50+ messages in thread
From: H.J. Lu @ 2019-02-16 0:34 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX mmx_uavgv4hi3 with SSE. Only SSE register source operand is
allowed.
PR target/89021
* config/i386/mmx.md (mmx_uavgv4hi3): Also check TARGET_MMX and
TARGET_MMX_WITH_SSE.
(*mmx_uavgv4hi3): Add SSE emulation.
---
gcc/config/i386/mmx.md | 26 ++++++++++++++++----------
1 file changed, 16 insertions(+), 10 deletions(-)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 570153521a1..b8983e1755a 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1735,33 +1735,39 @@
(plus:V4SI
(plus:V4SI
(zero_extend:V4SI
- (match_operand:V4HI 1 "nonimmediate_operand"))
+ (match_operand:V4HI 1 "register_mmxmem_operand"))
(zero_extend:V4SI
- (match_operand:V4HI 2 "nonimmediate_operand")))
+ (match_operand:V4HI 2 "register_mmxmem_operand")))
(const_vector:V4SI [(const_int 1) (const_int 1)
(const_int 1) (const_int 1)]))
(const_int 1))))]
- "TARGET_SSE || TARGET_3DNOW_A"
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && (TARGET_SSE || TARGET_3DNOW_A)"
"ix86_fixup_binary_operands_no_copy (PLUS, V4HImode, operands);")
(define_insn "*mmx_uavgv4hi3"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
+ [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(truncate:V4HI
(lshiftrt:V4SI
(plus:V4SI
(plus:V4SI
(zero_extend:V4SI
- (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+ (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv"))
(zero_extend:V4SI
- (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+ (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))
(const_vector:V4SI [(const_int 1) (const_int 1)
(const_int 1) (const_int 1)]))
(const_int 1))))]
- "(TARGET_SSE || TARGET_3DNOW_A)
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && (TARGET_SSE || TARGET_3DNOW_A)
&& ix86_binary_operator_ok (PLUS, V4HImode, operands)"
- "pavgw\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxshft")
- (set_attr "mode" "DI")])
+ "@
+ pavgw\t{%2, %0|%0, %2}
+ pavgw\t{%2, %0|%0, %2}
+ vpavgw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxshft,sseiadd,sseiadd")
+ (set_attr "mode" "DI,TI,TI")])
(define_insn "mmx_psadbw"
[(set (match_operand:V1DI 0 "register_operand" "=y")
--
2.20.1
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 14/42] i386: Emulate MMX sse_cvtps2pi/sse_cvttps2pi with SSE
2019-02-16 0:34 [PATCH 00/42] V7: Emulate MMX intrinsics with SSE H.J. Lu
` (11 preceding siblings ...)
2019-02-16 0:34 ` [PATCH 23/42] i386: Emulate MMX mmx_uavgv4hi3 " H.J. Lu
@ 2019-02-16 0:34 ` H.J. Lu
2019-02-16 0:34 ` [PATCH 29/42] i386: Emulate MMX ssse3_ph<plusminus_mnemonic>dv2si3 " H.J. Lu
` (28 subsequent siblings)
41 siblings, 0 replies; 50+ messages in thread
From: H.J. Lu @ 2019-02-16 0:34 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX sse_cvtps2pi/sse_cvttps2pi with SSE.
PR target/89021
* config/i386/sse.md (sse_cvtps2pi): Add SSE emulation.
(sse_cvttps2pi): Likewise.
---
gcc/config/i386/sse.md | 30 ++++++++++++++++++------------
1 file changed, 18 insertions(+), 12 deletions(-)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index c8e0133560a..70e3669d115 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -4574,26 +4574,32 @@
(set_attr "mode" "V4SF")])
(define_insn "sse_cvtps2pi"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
+ [(set (match_operand:V2SI 0 "register_operand" "=y,Yv")
(vec_select:V2SI
- (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
+ (unspec:V4SI [(match_operand:V4SF 1 "register_mmxmem_operand" "xm,YvBm")]
UNSPEC_FIX_NOTRUNC)
(parallel [(const_int 0) (const_int 1)])))]
- "TARGET_SSE"
- "cvtps2pi\t{%1, %0|%0, %q1}"
- [(set_attr "type" "ssecvt")
- (set_attr "unit" "mmx")
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
+ "@
+ cvtps2pi\t{%1, %0|%0, %q1}
+ %vcvtps2dq\t{%1, %0|%0, %1}"
+ [(set_attr "mmx_isa" "native,x64")
+ (set_attr "type" "ssecvt")
+ (set_attr "unit" "mmx,*")
(set_attr "mode" "DI")])
(define_insn "sse_cvttps2pi"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
+ [(set (match_operand:V2SI 0 "register_operand" "=y,Yv")
(vec_select:V2SI
- (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
+ (fix:V4SI (match_operand:V4SF 1 "register_mmxmem_operand" "xm,YvBm"))
(parallel [(const_int 0) (const_int 1)])))]
- "TARGET_SSE"
- "cvttps2pi\t{%1, %0|%0, %q1}"
- [(set_attr "type" "ssecvt")
- (set_attr "unit" "mmx")
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
+ "@
+ cvttps2pi\t{%1, %0|%0, %q1}
+ %vcvttps2dq\t{%1, %0|%0, %1}"
+ [(set_attr "mmx_isa" "native,x64")
+ (set_attr "type" "ssecvt")
+ (set_attr "unit" "mmx,*")
(set_attr "prefix_rep" "0")
(set_attr "mode" "SF")])
--
2.20.1
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 29/42] i386: Emulate MMX ssse3_ph<plusminus_mnemonic>dv2si3 with SSE
2019-02-16 0:34 [PATCH 00/42] V7: Emulate MMX intrinsics with SSE H.J. Lu
` (12 preceding siblings ...)
2019-02-16 0:34 ` [PATCH 14/42] i386: Emulate MMX sse_cvtps2pi/sse_cvttps2pi " H.J. Lu
@ 2019-02-16 0:34 ` H.J. Lu
2019-02-16 0:34 ` [PATCH 16/42] i386: Emulate MMX mmx_pextrw " H.J. Lu
` (27 subsequent siblings)
41 siblings, 0 replies; 50+ messages in thread
From: H.J. Lu @ 2019-02-16 0:34 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX ssse3_ph<plusminus_mnemonic>dv2si3 with SSE by moving bits
64:95 to bits 32:63 in SSE register. Only SSE register source operand
is allowed.
PR target/89021
* config/i386/sse.md (ssse3_ph<plusminus_mnemonic>dv2si3):
Changed to define_insn_and_split to support SSE emulation.
---
gcc/config/i386/sse.md | 34 ++++++++++++++++++++++++++--------
1 file changed, 26 insertions(+), 8 deletions(-)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 38b83c57ffc..0565ddc177f 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15356,26 +15356,44 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
-(define_insn "ssse3_ph<plusminus_mnemonic>dv2si3"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
+(define_insn_and_split "ssse3_ph<plusminus_mnemonic>dv2si3"
+ [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
(vec_concat:V2SI
(plusminus:SI
(vec_select:SI
- (match_operand:V2SI 1 "register_operand" "0")
+ (match_operand:V2SI 1 "register_operand" "0,0,Yv")
(parallel [(const_int 0)]))
(vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
(plusminus:SI
(vec_select:SI
- (match_operand:V2SI 2 "nonimmediate_operand" "ym")
+ (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv")
(parallel [(const_int 0)]))
(vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
- "TARGET_SSSE3"
- "ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}"
- [(set_attr "type" "sseiadd")
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
+ "@
+ ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
+ #
+ #"
+ "TARGET_MMX_WITH_SSE && reload_completed"
+ [(const_int 0)]
+{
+ /* Generate SSE version of the operation. */
+ rtx op0 = lowpart_subreg (V4SImode, operands[0],
+ GET_MODE (operands[0]));
+ rtx op1 = lowpart_subreg (V4SImode, operands[1],
+ GET_MODE (operands[1]));
+ rtx op2 = lowpart_subreg (V4SImode, operands[2],
+ GET_MODE (operands[2]));
+ emit_insn (gen_ssse3_ph<plusminus_mnemonic>dv4si3 (op0, op1, op2));
+ ix86_move_vector_high_sse_to_mmx (op0);
+ DONE;
+}
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "sseiadd")
(set_attr "atom_unit" "complex")
(set_attr "prefix_extra" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
- (set_attr "mode" "DI")])
+ (set_attr "mode" "DI,TI,TI")])
(define_insn "avx2_pmaddubsw256"
[(set (match_operand:V16HI 0 "register_operand" "=x,v")
--
2.20.1
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 16/42] i386: Emulate MMX mmx_pextrw with SSE
2019-02-16 0:34 [PATCH 00/42] V7: Emulate MMX intrinsics with SSE H.J. Lu
` (13 preceding siblings ...)
2019-02-16 0:34 ` [PATCH 29/42] i386: Emulate MMX ssse3_ph<plusminus_mnemonic>dv2si3 " H.J. Lu
@ 2019-02-16 0:34 ` H.J. Lu
2019-02-16 0:34 ` [PATCH 36/42] i386: Correct <ssse3_avx2>_pmulhrsw<mode>3[_mask] H.J. Lu
` (26 subsequent siblings)
41 siblings, 0 replies; 50+ messages in thread
From: H.J. Lu @ 2019-02-16 0:34 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX mmx_pextrw with SSE. Only SSE register source operand is
allowed.
PR target/89021
* config/i386/mmx.md (mmx_pextrw): Add SSE emulation.
---
gcc/config/i386/mmx.md | 18 +++++++++++-------
1 file changed, 11 insertions(+), 7 deletions(-)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 43f85064cd9..22547c7da6f 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1310,16 +1310,20 @@
(set_attr "mode" "DI")])
(define_insn "mmx_pextrw"
- [(set (match_operand:SI 0 "register_operand" "=r")
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
(zero_extend:SI
(vec_select:HI
- (match_operand:V4HI 1 "register_operand" "y")
- (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
- "TARGET_SSE || TARGET_3DNOW_A"
- "pextrw\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "mmxcvt")
+ (match_operand:V4HI 1 "register_operand" "y,Yv")
+ (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n")]))))]
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && (TARGET_SSE || TARGET_3DNOW_A)"
+ "@
+ pextrw\t{%2, %1, %0|%0, %1, %2}
+ %vpextrw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mmx_isa" "native,x64")
+ (set_attr "type" "mmxcvt,sselog1")
(set_attr "length_immediate" "1")
- (set_attr "mode" "DI")])
+ (set_attr "mode" "DI,TI")])
(define_expand "mmx_pshufw"
[(match_operand:V4HI 0 "register_operand")
--
2.20.1
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 36/42] i386: Correct <ssse3_avx2>_pmulhrsw<mode>3[_mask]
2019-02-16 0:34 [PATCH 00/42] V7: Emulate MMX intrinsics with SSE H.J. Lu
` (14 preceding siblings ...)
2019-02-16 0:34 ` [PATCH 16/42] i386: Emulate MMX mmx_pextrw " H.J. Lu
@ 2019-02-16 0:34 ` H.J. Lu
2019-02-16 9:28 ` Uros Bizjak
2019-02-16 0:34 ` [PATCH 34/42] i386: Emulate MMX ssse3_palignrdi with SSE H.J. Lu
` (25 subsequent siblings)
41 siblings, 1 reply; 50+ messages in thread
From: H.J. Lu @ 2019-02-16 0:34 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
There is no V4HI pmulhrsw in AVX512BW and V4HI/V8HI pmulhrsw don't require
AVX2. To support TARGET_MMX_WITH_SSE, replace nonimmediate_operand with
register_pmulhrswmem_operand in <ssse3_avx2>_pmulhrsw<mode>3.
PR target/89372
* config/i386/predicates.md (register_pmulhrswmem_operand): New.
* config/i386/sse.md (PMULHRSW): Remove V4HI.
(PMULHRSW_MMX): New.
(<ssse3_avx2>_pmulhrsw<mode>3): Replace PMULHRSW with
PMULHRSW_MMX. Require TARGET_SSSE3, not TARGET_AVX2. Replace
nonimmediate_operand with register_pmulhrswmem_operand.
---
gcc/config/i386/predicates.md | 7 +++++++
gcc/config/i386/sse.md | 15 +++++++++------
2 files changed, 16 insertions(+), 6 deletions(-)
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index f3c2f72de54..b7cb26a81fe 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -56,6 +56,13 @@
(and (not (match_test "TARGET_MMX_WITH_SSE"))
(match_operand 0 "memory_operand"))))
+;; Match register operands, but include memory operands for
+;; !(TARGET_MMX_WITH_SSE && mode == V4HImode).
+(define_predicate "register_pmulhrswmem_operand"
+ (ior (match_operand 0 "register_operand")
+ (and (not (match_test "TARGET_MMX_WITH_SSE && mode == V4HImode"))
+ (match_operand 0 "memory_operand"))))
+
;; True if the operand is an SSE register.
(define_predicate "sse_reg_operand"
(and (match_code "reg")
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 92f5ad17156..379da16615d 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15579,7 +15579,7 @@
(set_attr "mode" "DI,TI,TI")])
(define_mode_iterator PMULHRSW
- [V4HI V8HI (V16HI "TARGET_AVX2")])
+ [V8HI (V16HI "TARGET_AVX2")])
(define_expand "<ssse3_avx2>_pmulhrsw<mode>3_mask"
[(set (match_operand:PMULHRSW 0 "register_operand")
@@ -15604,21 +15604,24 @@
ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
})
+(define_mode_iterator PMULHRSW_MMX
+ [V4HI V8HI (V16HI "TARGET_AVX2")])
+
(define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
- [(set (match_operand:PMULHRSW 0 "register_operand")
- (truncate:PMULHRSW
+ [(set (match_operand:PMULHRSW_MMX 0 "register_operand")
+ (truncate:PMULHRSW_MMX
(lshiftrt:<ssedoublemode>
(plus:<ssedoublemode>
(lshiftrt:<ssedoublemode>
(mult:<ssedoublemode>
(sign_extend:<ssedoublemode>
- (match_operand:PMULHRSW 1 "nonimmediate_operand"))
+ (match_operand:PMULHRSW_MMX 1 "register_pmulhrswmem_operand"))
(sign_extend:<ssedoublemode>
- (match_operand:PMULHRSW 2 "nonimmediate_operand")))
+ (match_operand:PMULHRSW_MMX 2 "register_pmulhrswmem_operand")))
(const_int 14))
(match_dup 3))
(const_int 1))))]
- "TARGET_AVX2"
+ "TARGET_SSSE3"
{
operands[3] = CONST1_RTX(<MODE>mode);
ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
--
2.20.1
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [PATCH 36/42] i386: Correct <ssse3_avx2>_pmulhrsw<mode>3[_mask]
2019-02-16 0:34 ` [PATCH 36/42] i386: Correct <ssse3_avx2>_pmulhrsw<mode>3[_mask] H.J. Lu
@ 2019-02-16 9:28 ` Uros Bizjak
0 siblings, 0 replies; 50+ messages in thread
From: Uros Bizjak @ 2019-02-16 9:28 UTC (permalink / raw)
To: H.J. Lu; +Cc: gcc-patches
On 2/16/19, H.J. Lu <hjl.tools@gmail.com> wrote:
> There is no V4HI pmulhrsw in AVX512BW and V4HI/V8HI pmulhrsw don't require
> AVX2. To support TARGET_MMX_WITH_SSE, replace nonimmediate_operand with
> register_pmulhrswmem_operand in <ssse3_avx2>_pmulhrsw<mode>3.
>
> PR target/89372
> * config/i386/predicates.md (register_pmulhrswmem_operand): New.
> * config/i386/sse.md (PMULHRSW): Remove V4HI.
> (PMULHRSW_MMX): New.
> (<ssse3_avx2>_pmulhrsw<mode>3): Replace PMULHRSW with
> PMULHRSW_MMX. Require TARGET_SSSE3, not TARGET_AVX2. Replace
> nonimmediate_operand with register_pmulhrswmem_operand.
Complications like the above usually points to wrong macroization
choice. Please try to split out V4HImode pattern.
Also, please separate the fix to a separate patch, which should be
committed independently as a fix before gcc-9 is released.
Uros.
> ---
> gcc/config/i386/predicates.md | 7 +++++++
> gcc/config/i386/sse.md | 15 +++++++++------
> 2 files changed, 16 insertions(+), 6 deletions(-)
>
> diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
> index f3c2f72de54..b7cb26a81fe 100644
> --- a/gcc/config/i386/predicates.md
> +++ b/gcc/config/i386/predicates.md
> @@ -56,6 +56,13 @@
> (and (not (match_test "TARGET_MMX_WITH_SSE"))
> (match_operand 0 "memory_operand"))))
>
> +;; Match register operands, but include memory operands for
> +;; !(TARGET_MMX_WITH_SSE && mode == V4HImode).
> +(define_predicate "register_pmulhrswmem_operand"
> + (ior (match_operand 0 "register_operand")
> + (and (not (match_test "TARGET_MMX_WITH_SSE && mode == V4HImode"))
> + (match_operand 0 "memory_operand"))))
> +
> ;; True if the operand is an SSE register.
> (define_predicate "sse_reg_operand"
> (and (match_code "reg")
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 92f5ad17156..379da16615d 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -15579,7 +15579,7 @@
> (set_attr "mode" "DI,TI,TI")])
>
> (define_mode_iterator PMULHRSW
> - [V4HI V8HI (V16HI "TARGET_AVX2")])
> + [V8HI (V16HI "TARGET_AVX2")])
>
> (define_expand "<ssse3_avx2>_pmulhrsw<mode>3_mask"
> [(set (match_operand:PMULHRSW 0 "register_operand")
> @@ -15604,21 +15604,24 @@
> ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
> })
>
> +(define_mode_iterator PMULHRSW_MMX
> + [V4HI V8HI (V16HI "TARGET_AVX2")])
> +
> (define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
> - [(set (match_operand:PMULHRSW 0 "register_operand")
> - (truncate:PMULHRSW
> + [(set (match_operand:PMULHRSW_MMX 0 "register_operand")
> + (truncate:PMULHRSW_MMX
> (lshiftrt:<ssedoublemode>
> (plus:<ssedoublemode>
> (lshiftrt:<ssedoublemode>
> (mult:<ssedoublemode>
> (sign_extend:<ssedoublemode>
> - (match_operand:PMULHRSW 1 "nonimmediate_operand"))
> + (match_operand:PMULHRSW_MMX 1 "register_pmulhrswmem_operand"))
> (sign_extend:<ssedoublemode>
> - (match_operand:PMULHRSW 2 "nonimmediate_operand")))
> + (match_operand:PMULHRSW_MMX 2 "register_pmulhrswmem_operand")))
> (const_int 14))
> (match_dup 3))
> (const_int 1))))]
> - "TARGET_AVX2"
> + "TARGET_SSSE3"
> {
> operands[3] = CONST1_RTX(<MODE>mode);
> ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
> --
> 2.20.1
>
>
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 34/42] i386: Emulate MMX ssse3_palignrdi with SSE
2019-02-16 0:34 [PATCH 00/42] V7: Emulate MMX intrinsics with SSE H.J. Lu
` (15 preceding siblings ...)
2019-02-16 0:34 ` [PATCH 36/42] i386: Correct <ssse3_avx2>_pmulhrsw<mode>3[_mask] H.J. Lu
@ 2019-02-16 0:34 ` H.J. Lu
2019-02-16 0:34 ` [PATCH 03/42] i386: Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX H.J. Lu
` (24 subsequent siblings)
41 siblings, 0 replies; 50+ messages in thread
From: H.J. Lu @ 2019-02-16 0:34 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX version of palignrq with SSE version by concatenating 2
64-bit MMX operands into a single 128-bit SSE operand, followed by
SSE psrldq. Only SSE register source operand is allowed.
PR target/89021
* config/i386/sse.md (ssse3_palignrdi): Changed to
define_insn_and_split to support SSE emulation.
---
gcc/config/i386/sse.md | 58 ++++++++++++++++++++++++++++++++++--------
1 file changed, 48 insertions(+), 10 deletions(-)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index c2dbd59049a..ec68b5dc2ce 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15824,23 +15824,61 @@
(set_attr "prefix" "orig,vex,evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "ssse3_palignrdi"
- [(set (match_operand:DI 0 "register_operand" "=y")
- (unspec:DI [(match_operand:DI 1 "register_operand" "0")
- (match_operand:DI 2 "nonimmediate_operand" "ym")
- (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
+(define_insn_and_split "ssse3_palignrdi"
+ [(set (match_operand:DI 0 "register_operand" "=y,x,Yv")
+ (unspec:DI [(match_operand:DI 1 "register_operand" "0,0,Yv")
+ (match_operand:DI 2 "register_mmxmem_operand" "ym,x,Yv")
+ (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n,n")]
UNSPEC_PALIGNR))]
- "TARGET_SSSE3"
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
{
- operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
- return "palignr\t{%3, %2, %0|%0, %2, %3}";
+ switch (which_alternative)
+ {
+ case 0:
+ operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
+ return "palignr\t{%3, %2, %0|%0, %2, %3}";
+ case 1:
+ case 2:
+ return "#";
+ default:
+ gcc_unreachable ();
+ }
}
- [(set_attr "type" "sseishft")
+ "TARGET_MMX_WITH_SSE && reload_completed"
+ [(set (match_dup 0)
+ (lshiftrt:V1TI (match_dup 0) (match_dup 3)))]
+{
+ /* Emulate MMX palignrdi with SSE psrldq. */
+ rtx op0 = lowpart_subreg (V2DImode, operands[0],
+ GET_MODE (operands[0]));
+ rtx insn;
+ if (TARGET_AVX)
+ insn = gen_vec_concatv2di (op0, operands[2], operands[1]);
+ else
+ {
+ /* NB: SSE can only concatenate OP0 and OP1 to OP0. */
+ insn = gen_vec_concatv2di (op0, operands[1], operands[2]);
+ emit_insn (insn);
+ /* Swap bits 0:63 with bits 64:127. */
+ rtx mask = gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (4, GEN_INT (2),
+ GEN_INT (3),
+ GEN_INT (0),
+ GEN_INT (1)));
+ rtx op1 = lowpart_subreg (V4SImode, op0, GET_MODE (op0));
+ rtx op2 = gen_rtx_VEC_SELECT (V4SImode, op1, mask);
+ insn = gen_rtx_SET (op1, op2);
+ }
+ emit_insn (insn);
+ operands[0] = lowpart_subreg (V1TImode, op0, GET_MODE (op0));
+}
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "sseishft")
(set_attr "atom_unit" "sishuf")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
- (set_attr "mode" "DI")])
+ (set_attr "mode" "DI,TI,TI")])
;; Mode iterator to handle singularity w/ absence of V2DI and V4DI
;; modes for abs instruction on pre AVX-512 targets.
--
2.20.1
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 03/42] i386: Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX
2019-02-16 0:34 [PATCH 00/42] V7: Emulate MMX intrinsics with SSE H.J. Lu
` (16 preceding siblings ...)
2019-02-16 0:34 ` [PATCH 34/42] i386: Emulate MMX ssse3_palignrdi with SSE H.J. Lu
@ 2019-02-16 0:34 ` H.J. Lu
2019-02-16 0:34 ` [PATCH 18/42] i386: Emulate MMX V4HI smaxmin/V8QI umaxmin with SSE H.J. Lu
` (23 subsequent siblings)
41 siblings, 0 replies; 50+ messages in thread
From: H.J. Lu @ 2019-02-16 0:34 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX. For MMX punpckhXX,
move bits 64:127 to bits 0:63 in SSE register. Only SSE register source
operand is allowed.
PR target/89021
* config/i386/i386-protos.h (ix86_split_mmx_punpck): New
prototype.
* config/i386/i386.c (ix86_split_mmx_punpck): New function.
* config/i386/mmx.m (mmx_punpckhbw): Changed to
define_insn_and_split to support SSE emulation.
(mmx_punpcklbw): Likewise.
(mmx_punpckhwd): Likewise.
(mmx_punpcklwd): Likewise.
(mmx_punpckhdq): Likewise.
(mmx_punpckldq): Likewise.
---
gcc/config/i386/i386-protos.h | 1 +
gcc/config/i386/i386.c | 77 +++++++++++++++++++
gcc/config/i386/mmx.md | 138 ++++++++++++++++++++++------------
3 files changed, 168 insertions(+), 48 deletions(-)
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index a53b48438ec..37581837a32 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -204,6 +204,7 @@ extern rtx ix86_split_stack_guard (void);
extern void ix86_move_vector_high_sse_to_mmx (rtx);
extern void ix86_split_mmx_pack (rtx[], enum rtx_code);
+extern void ix86_split_mmx_punpck (rtx[], bool);
#ifdef TREE_CODE
extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index d31b69d9a82..a76c17beece 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -20275,6 +20275,83 @@ ix86_split_mmx_pack (rtx operands[], enum rtx_code code)
ix86_move_vector_high_sse_to_mmx (op0);
}
+/* Split MMX punpcklXX/punpckhXX with SSE punpcklXX. */
+
+void
+ix86_split_mmx_punpck (rtx operands[], bool high_p)
+{
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ rtx op2 = operands[2];
+ machine_mode mode = GET_MODE (op0);
+ rtx mask;
+ /* The corresponding SSE mode. */
+ machine_mode sse_mode, double_sse_mode;
+
+ switch (mode)
+ {
+ case E_V8QImode:
+ sse_mode = V16QImode;
+ double_sse_mode = V32QImode;
+ mask = gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (16,
+ GEN_INT (0), GEN_INT (16),
+ GEN_INT (1), GEN_INT (17),
+ GEN_INT (2), GEN_INT (18),
+ GEN_INT (3), GEN_INT (19),
+ GEN_INT (4), GEN_INT (20),
+ GEN_INT (5), GEN_INT (21),
+ GEN_INT (6), GEN_INT (22),
+ GEN_INT (7), GEN_INT (23)));
+ break;
+
+ case E_V4HImode:
+ sse_mode = V8HImode;
+ double_sse_mode = V16HImode;
+ mask = gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (8,
+ GEN_INT (0), GEN_INT (8),
+ GEN_INT (1), GEN_INT (9),
+ GEN_INT (2), GEN_INT (10),
+ GEN_INT (3), GEN_INT (11)));
+ break;
+
+ case E_V2SImode:
+ sse_mode = V4SImode;
+ double_sse_mode = V8SImode;
+ mask = gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (4,
+ GEN_INT (0), GEN_INT (4),
+ GEN_INT (1), GEN_INT (5)));
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ /* Generate SSE punpcklXX. */
+ rtx dest = lowpart_subreg (sse_mode, op0, GET_MODE (op0));
+ op1 = lowpart_subreg (sse_mode, op1, GET_MODE (op1));
+ op2 = lowpart_subreg (sse_mode, op2, GET_MODE (op2));
+
+ op1 = gen_rtx_VEC_CONCAT (double_sse_mode, op1, op2);
+ op2 = gen_rtx_VEC_SELECT (sse_mode, op1, mask);
+ rtx insn = gen_rtx_SET (dest, op2);
+ emit_insn (insn);
+
+ if (high_p)
+ {
+ /* Move bits 64:127 to bits 0:63. */
+ mask = gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (4, GEN_INT (2), GEN_INT (3),
+ GEN_INT (0), GEN_INT (0)));
+ dest = lowpart_subreg (V4SImode, dest, GET_MODE (dest));
+ op1 = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
+ insn = gen_rtx_SET (dest, op1);
+ emit_insn (insn);
+ }
+}
+
/* Helper function of ix86_fixup_binary_operands to canonicalize
operand order. Returns true if the operands should be swapped. */
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index dbb2baa74d7..34fecd6a745 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1064,87 +1064,129 @@
(set_attr "type" "mmxshft,sselog,sselog")
(set_attr "mode" "DI,TI,TI")])
-(define_insn "mmx_punpckhbw"
- [(set (match_operand:V8QI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_punpckhbw"
+ [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
(vec_select:V8QI
(vec_concat:V16QI
- (match_operand:V8QI 1 "register_operand" "0")
- (match_operand:V8QI 2 "nonimmediate_operand" "ym"))
+ (match_operand:V8QI 1 "register_operand" "0,0,Yv")
+ (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv"))
(parallel [(const_int 4) (const_int 12)
(const_int 5) (const_int 13)
(const_int 6) (const_int 14)
(const_int 7) (const_int 15)])))]
- "TARGET_MMX"
- "punpckhbw\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxcvt")
- (set_attr "mode" "DI")])
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "@
+ punpckhbw\t{%2, %0|%0, %2}
+ #
+ #"
+ "TARGET_MMX_WITH_SSE && reload_completed"
+ [(const_int 0)]
+ "ix86_split_mmx_punpck (operands, true); DONE;"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxcvt,sselog,sselog")
+ (set_attr "mode" "DI,TI,TI")])
-(define_insn "mmx_punpcklbw"
- [(set (match_operand:V8QI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_punpcklbw"
+ [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
(vec_select:V8QI
(vec_concat:V16QI
- (match_operand:V8QI 1 "register_operand" "0")
- (match_operand:V8QI 2 "nonimmediate_operand" "ym"))
+ (match_operand:V8QI 1 "register_operand" "0,0,Yv")
+ (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv"))
(parallel [(const_int 0) (const_int 8)
(const_int 1) (const_int 9)
(const_int 2) (const_int 10)
(const_int 3) (const_int 11)])))]
- "TARGET_MMX"
- "punpcklbw\t{%2, %0|%0, %k2}"
- [(set_attr "type" "mmxcvt")
- (set_attr "mode" "DI")])
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "@
+ punpcklbw\t{%2, %0|%0, %k2}
+ #
+ #"
+ "TARGET_MMX_WITH_SSE && reload_completed"
+ [(const_int 0)]
+ "ix86_split_mmx_punpck (operands, false); DONE;"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxcvt,sselog,sselog")
+ (set_attr "mode" "DI,TI,TI")])
-(define_insn "mmx_punpckhwd"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_punpckhwd"
+ [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(vec_select:V4HI
(vec_concat:V8HI
- (match_operand:V4HI 1 "register_operand" "0")
- (match_operand:V4HI 2 "nonimmediate_operand" "ym"))
+ (match_operand:V4HI 1 "register_operand" "0,0,Yv")
+ (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv"))
(parallel [(const_int 2) (const_int 6)
(const_int 3) (const_int 7)])))]
- "TARGET_MMX"
- "punpckhwd\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxcvt")
- (set_attr "mode" "DI")])
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "@
+ punpckhwd\t{%2, %0|%0, %2}
+ #
+ #"
+ "TARGET_MMX_WITH_SSE && reload_completed"
+ [(const_int 0)]
+ "ix86_split_mmx_punpck (operands, true); DONE;"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxcvt,sselog,sselog")
+ (set_attr "mode" "DI,TI,TI")])
-(define_insn "mmx_punpcklwd"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_punpcklwd"
+ [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(vec_select:V4HI
(vec_concat:V8HI
- (match_operand:V4HI 1 "register_operand" "0")
- (match_operand:V4HI 2 "nonimmediate_operand" "ym"))
+ (match_operand:V4HI 1 "register_operand" "0,0,Yv")
+ (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv"))
(parallel [(const_int 0) (const_int 4)
(const_int 1) (const_int 5)])))]
- "TARGET_MMX"
- "punpcklwd\t{%2, %0|%0, %k2}"
- [(set_attr "type" "mmxcvt")
- (set_attr "mode" "DI")])
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "@
+ punpcklwd\t{%2, %0|%0, %k2}
+ #
+ #"
+ "TARGET_MMX_WITH_SSE && reload_completed"
+ [(const_int 0)]
+ "ix86_split_mmx_punpck (operands, false); DONE;"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxcvt,sselog,sselog")
+ (set_attr "mode" "DI,TI,TI")])
-(define_insn "mmx_punpckhdq"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_punpckhdq"
+ [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
(vec_select:V2SI
(vec_concat:V4SI
- (match_operand:V2SI 1 "register_operand" "0")
- (match_operand:V2SI 2 "nonimmediate_operand" "ym"))
+ (match_operand:V2SI 1 "register_operand" "0,0,Yv")
+ (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv"))
(parallel [(const_int 1)
(const_int 3)])))]
- "TARGET_MMX"
- "punpckhdq\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxcvt")
- (set_attr "mode" "DI")])
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "@
+ punpckhdq\t{%2, %0|%0, %2}
+ #
+ #"
+ "TARGET_MMX_WITH_SSE && reload_completed"
+ [(const_int 0)]
+ "ix86_split_mmx_punpck (operands, true); DONE;"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxcvt,sselog,sselog")
+ (set_attr "mode" "DI,TI,TI")])
-(define_insn "mmx_punpckldq"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_punpckldq"
+ [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
(vec_select:V2SI
(vec_concat:V4SI
- (match_operand:V2SI 1 "register_operand" "0")
- (match_operand:V2SI 2 "nonimmediate_operand" "ym"))
+ (match_operand:V2SI 1 "register_operand" "0,0,Yv")
+ (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv"))
(parallel [(const_int 0)
(const_int 2)])))]
- "TARGET_MMX"
- "punpckldq\t{%2, %0|%0, %k2}"
- [(set_attr "type" "mmxcvt")
- (set_attr "mode" "DI")])
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "@
+ punpckldq\t{%2, %0|%0, %k2}
+ #
+ #"
+ "TARGET_MMX_WITH_SSE && reload_completed"
+ [(const_int 0)]
+ "ix86_split_mmx_punpck (operands, false); DONE;"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxcvt,sselog,sselog")
+ (set_attr "mode" "DI,TI,TI")])
(define_expand "mmx_pinsrw"
[(set (match_operand:V4HI 0 "register_operand")
--
2.20.1
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 18/42] i386: Emulate MMX V4HI smaxmin/V8QI umaxmin with SSE
2019-02-16 0:34 [PATCH 00/42] V7: Emulate MMX intrinsics with SSE H.J. Lu
` (17 preceding siblings ...)
2019-02-16 0:34 ` [PATCH 03/42] i386: Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX H.J. Lu
@ 2019-02-16 0:34 ` H.J. Lu
2019-02-16 0:34 ` [PATCH 33/42] i386: Emulate MMX ssse3_psign<mode>3 " H.J. Lu
` (22 subsequent siblings)
41 siblings, 0 replies; 50+ messages in thread
From: H.J. Lu @ 2019-02-16 0:34 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX V4HI smaxmin/V8QI umaxmin with SSE. Only SSE register source
operand is allowed.
PR target/89021
* config/i386/mmx.md (mmx_<code>v4hi3): Also check TARGET_MMX
and TARGET_MMX_WITH_SSE.
(mmx_<code>v8qi3): Likewise.
(smaxmin:<code>v4hi3): New.
(umaxmin:<code>v8qi3): Likewise.
(smaxmin:*mmx_<code>v4hi3): Add SSE emulation.
(umaxmin:*mmx_<code>v8qi3): Likewise.
---
gcc/config/i386/mmx.md | 68 +++++++++++++++++++++++++++++-------------
1 file changed, 48 insertions(+), 20 deletions(-)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 1e68d1bb338..058791e01e6 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -923,40 +923,68 @@
(define_expand "mmx_<code>v4hi3"
[(set (match_operand:V4HI 0 "register_operand")
(smaxmin:V4HI
- (match_operand:V4HI 1 "nonimmediate_operand")
- (match_operand:V4HI 2 "nonimmediate_operand")))]
- "TARGET_SSE || TARGET_3DNOW_A"
+ (match_operand:V4HI 1 "register_mmxmem_operand")
+ (match_operand:V4HI 2 "register_mmxmem_operand")))]
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && (TARGET_SSE || TARGET_3DNOW_A)"
+ "ix86_fixup_binary_operands_no_copy (<CODE>, V4HImode, operands);")
+
+(define_expand "<code>v4hi3"
+ [(set (match_operand:V4HI 0 "register_operand")
+ (smaxmin:V4HI
+ (match_operand:V4HI 1 "register_operand")
+ (match_operand:V4HI 2 "register_operand")))]
+ "TARGET_MMX_WITH_SSE"
"ix86_fixup_binary_operands_no_copy (<CODE>, V4HImode, operands);")
(define_insn "*mmx_<code>v4hi3"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
+ [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(smaxmin:V4HI
- (match_operand:V4HI 1 "nonimmediate_operand" "%0")
- (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
- "(TARGET_SSE || TARGET_3DNOW_A)
+ (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv")
+ (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && (TARGET_SSE || TARGET_3DNOW_A)
&& ix86_binary_operator_ok (<CODE>, V4HImode, operands)"
- "p<maxmin_int>w\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxadd")
- (set_attr "mode" "DI")])
+ "@
+ p<maxmin_int>w\t{%2, %0|%0, %2}
+ p<maxmin_int>w\t{%2, %0|%0, %2}
+ vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxadd,sseiadd,sseiadd")
+ (set_attr "mode" "DI,TI,TI")])
(define_expand "mmx_<code>v8qi3"
[(set (match_operand:V8QI 0 "register_operand")
(umaxmin:V8QI
- (match_operand:V8QI 1 "nonimmediate_operand")
- (match_operand:V8QI 2 "nonimmediate_operand")))]
- "TARGET_SSE || TARGET_3DNOW_A"
+ (match_operand:V8QI 1 "register_mmxmem_operand")
+ (match_operand:V8QI 2 "register_mmxmem_operand")))]
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && (TARGET_SSE || TARGET_3DNOW_A)"
+ "ix86_fixup_binary_operands_no_copy (<CODE>, V8QImode, operands);")
+
+(define_expand "<code>v8qi3"
+ [(set (match_operand:V8QI 0 "register_operand")
+ (umaxmin:V8QI
+ (match_operand:V8QI 1 "register_operand")
+ (match_operand:V8QI 2 "register_operand")))]
+ "TARGET_MMX_WITH_SSE"
"ix86_fixup_binary_operands_no_copy (<CODE>, V8QImode, operands);")
(define_insn "*mmx_<code>v8qi3"
- [(set (match_operand:V8QI 0 "register_operand" "=y")
+ [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
(umaxmin:V8QI
- (match_operand:V8QI 1 "nonimmediate_operand" "%0")
- (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
- "(TARGET_SSE || TARGET_3DNOW_A)
+ (match_operand:V8QI 1 "register_mmxmem_operand" "%0,0,Yv")
+ (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && (TARGET_SSE || TARGET_3DNOW_A)
&& ix86_binary_operator_ok (<CODE>, V8QImode, operands)"
- "p<maxmin_int>b\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxadd")
- (set_attr "mode" "DI")])
+ "@
+ p<maxmin_int>b\t{%2, %0|%0, %2}
+ p<maxmin_int>b\t{%2, %0|%0, %2}
+ vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxadd,sseiadd,sseiadd")
+ (set_attr "mode" "DI,TI,TI")])
(define_insn "mmx_ashr<mode>3"
[(set (match_operand:MMXMODE24 0 "register_operand" "=y,x,Yv")
--
2.20.1
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 33/42] i386: Emulate MMX ssse3_psign<mode>3 with SSE
2019-02-16 0:34 [PATCH 00/42] V7: Emulate MMX intrinsics with SSE H.J. Lu
` (18 preceding siblings ...)
2019-02-16 0:34 ` [PATCH 18/42] i386: Emulate MMX V4HI smaxmin/V8QI umaxmin with SSE H.J. Lu
@ 2019-02-16 0:34 ` H.J. Lu
2019-02-16 0:34 ` [PATCH 02/42] i386: Emulate MMX packsswb/packssdw/packuswb with SSE2 H.J. Lu
` (21 subsequent siblings)
41 siblings, 0 replies; 50+ messages in thread
From: H.J. Lu @ 2019-02-16 0:34 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX ssse3_psign<mode>3 with SSE. Only SSE register source operand
is allowed.
PR target/89021
* config/i386/sse.md (ssse3_psign<mode>3): Add SSE emulation.
---
gcc/config/i386/sse.md | 18 +++++++++++-------
1 file changed, 11 insertions(+), 7 deletions(-)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 704e211c0b8..c2dbd59049a 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15755,17 +15755,21 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "ssse3_psign<mode>3"
- [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+ [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
(unspec:MMXMODEI
- [(match_operand:MMXMODEI 1 "register_operand" "0")
- (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
+ [(match_operand:MMXMODEI 1 "register_operand" "0,0,Yv")
+ (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")]
UNSPEC_PSIGN))]
- "TARGET_SSSE3"
- "psign<mmxvecsize>\t{%2, %0|%0, %2}";
- [(set_attr "type" "sselog1")
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
+ "@
+ psign<mmxvecsize>\t{%2, %0|%0, %2}
+ psign<mmxvecsize>\t{%2, %0|%0, %2}
+ vpsign<mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "sselog1")
(set_attr "prefix_extra" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
- (set_attr "mode" "DI")])
+ (set_attr "mode" "DI,TI,TI")])
(define_insn "<ssse3_avx2>_palignr<mode>_mask"
[(set (match_operand:VI1_AVX512 0 "register_operand" "=v")
--
2.20.1
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 02/42] i386: Emulate MMX packsswb/packssdw/packuswb with SSE2
2019-02-16 0:34 [PATCH 00/42] V7: Emulate MMX intrinsics with SSE H.J. Lu
` (19 preceding siblings ...)
2019-02-16 0:34 ` [PATCH 33/42] i386: Emulate MMX ssse3_psign<mode>3 " H.J. Lu
@ 2019-02-16 0:34 ` H.J. Lu
2019-02-16 0:35 ` [PATCH 42/42] i386: Add tests for MMX intrinsic emulations with SSE H.J. Lu
` (20 subsequent siblings)
41 siblings, 0 replies; 50+ messages in thread
From: H.J. Lu @ 2019-02-16 0:34 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX packsswb/packssdw/packuswb with SSE packsswb/packssdw/packuswb
plus moving bits 64:95 to bits 32:63 in SSE register. Only SSE register
source operand is allowed.
2019-02-08 H.J. Lu <hongjiu.lu@intel.com>
Uros Bizjak <ubizjak@gmail.com>
PR target/89021
* config/i386/i386-protos.h (ix86_move_vector_high_sse_to_mmx):
New prototype.
(ix86_split_mmx_pack): Likewise.
* config/i386/i386.c (ix86_move_vector_high_sse_to_mmx): New
function.
(ix86_split_mmx_pack): Likewise.
* config/i386/i386.md (mmx_isa): New.
(enabled): Also check mmx_isa.
* config/i386/mmx.md (any_s_truncate): New code iterator.
(s_trunsuffix): New code attr.
(mmx_packsswb): Removed.
(mmx_packssdw): Likewise.
(mmx_packuswb): Likewise.
(mmx_pack<s_trunsuffix>swb): New define_insn_and_split to emulate
MMX packsswb/packuswb with SSE2.
(mmx_packssdw): Likewise.
* config/i386/predicates.md (register_mmxmem_operand): New.
---
gcc/config/i386/i386-protos.h | 3 ++
gcc/config/i386/i386.c | 54 ++++++++++++++++++++++++++++
gcc/config/i386/i386.md | 13 +++++++
gcc/config/i386/mmx.md | 67 +++++++++++++++++++----------------
gcc/config/i386/predicates.md | 7 ++++
5 files changed, 114 insertions(+), 30 deletions(-)
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 27f5cc13abf..a53b48438ec 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -202,6 +202,9 @@ extern void ix86_expand_vecop_qihi (enum rtx_code, rtx, rtx, rtx);
extern rtx ix86_split_stack_guard (void);
+extern void ix86_move_vector_high_sse_to_mmx (rtx);
+extern void ix86_split_mmx_pack (rtx[], enum rtx_code);
+
#ifdef TREE_CODE
extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);
#endif /* TREE_CODE */
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 7d7dd80930e..d31b69d9a82 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -20221,6 +20221,60 @@ ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
gcc_unreachable ();
}
+/* Move bits 64:95 to bits 32:63. */
+
+void
+ix86_move_vector_high_sse_to_mmx (rtx op)
+{
+ rtx mask = gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (4, GEN_INT (0), GEN_INT (2),
+ GEN_INT (0), GEN_INT (0)));
+ rtx dest = lowpart_subreg (V4SImode, op, GET_MODE (op));
+ op = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
+ rtx insn = gen_rtx_SET (dest, op);
+ emit_insn (insn);
+}
+
+/* Split MMX pack with signed/unsigned saturation with SSE/SSE2. */
+
+void
+ix86_split_mmx_pack (rtx operands[], enum rtx_code code)
+{
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ rtx op2 = operands[2];
+
+ machine_mode dmode = GET_MODE (op0);
+ machine_mode smode = GET_MODE (op1);
+ machine_mode inner_dmode = GET_MODE_INNER (dmode);
+ machine_mode inner_smode = GET_MODE_INNER (smode);
+
+ /* Get the corresponding SSE mode for destination. */
+ int nunits = 16 / GET_MODE_SIZE (inner_dmode);
+ machine_mode sse_dmode = mode_for_vector (GET_MODE_INNER (dmode),
+ nunits).require ();
+ machine_mode sse_half_dmode = mode_for_vector (GET_MODE_INNER (dmode),
+ nunits / 2).require ();
+
+ /* Get the corresponding SSE mode for source. */
+ nunits = 16 / GET_MODE_SIZE (inner_smode);
+ machine_mode sse_smode = mode_for_vector (GET_MODE_INNER (smode),
+ nunits).require ();
+
+ /* Generate SSE pack with signed/unsigned saturation. */
+ rtx dest = lowpart_subreg (sse_dmode, op0, GET_MODE (op0));
+ op1 = lowpart_subreg (sse_smode, op1, GET_MODE (op1));
+ op2 = lowpart_subreg (sse_smode, op2, GET_MODE (op2));
+
+ op1 = gen_rtx_fmt_e (code, sse_half_dmode, op1);
+ op2 = gen_rtx_fmt_e (code, sse_half_dmode, op2);
+ rtx insn = gen_rtx_SET (dest, gen_rtx_VEC_CONCAT (sse_dmode,
+ op1, op2));
+ emit_insn (insn);
+
+ ix86_move_vector_high_sse_to_mmx (op0);
+}
+
/* Helper function of ix86_fixup_binary_operands to canonicalize
operand order. Returns true if the operands should be swapped. */
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 40ed93dc804..e1727676deb 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -792,6 +792,10 @@
avx512vl,noavx512vl,x64_avx512dq,x64_avx512bw"
(const_string "base"))
+;; Define instruction set of MMX instructions
+(define_attr "mmx_isa" "base,native,x64,x64_noavx,x64_avx"
+ (const_string "base"))
+
(define_attr "enabled" ""
(cond [(eq_attr "isa" "x64") (symbol_ref "TARGET_64BIT")
(eq_attr "isa" "x64_sse2")
@@ -830,6 +834,15 @@
(eq_attr "isa" "noavx512dq") (symbol_ref "!TARGET_AVX512DQ")
(eq_attr "isa" "avx512vl") (symbol_ref "TARGET_AVX512VL")
(eq_attr "isa" "noavx512vl") (symbol_ref "!TARGET_AVX512VL")
+
+ (eq_attr "mmx_isa" "native")
+ (symbol_ref "!TARGET_MMX_WITH_SSE")
+ (eq_attr "mmx_isa" "x64")
+ (symbol_ref "TARGET_MMX_WITH_SSE")
+ (eq_attr "mmx_isa" "x64_avx")
+ (symbol_ref "TARGET_MMX_WITH_SSE && TARGET_AVX")
+ (eq_attr "mmx_isa" "x64_noavx")
+ (symbol_ref "TARGET_MMX_WITH_SSE && !TARGET_AVX")
]
(const_int 1)))
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 9c3808338d3..dbb2baa74d7 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1021,41 +1021,48 @@
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(define_insn "mmx_packsswb"
- [(set (match_operand:V8QI 0 "register_operand" "=y")
+;; Used in signed and unsigned truncations with saturation.
+(define_code_iterator any_s_truncate [ss_truncate us_truncate])
+;; Instruction suffix for truncations with saturation.
+(define_code_attr s_trunsuffix [(ss_truncate "s") (us_truncate "u")])
+
+(define_insn_and_split "mmx_pack<s_trunsuffix>swb"
+ [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
(vec_concat:V8QI
- (ss_truncate:V4QI
- (match_operand:V4HI 1 "register_operand" "0"))
- (ss_truncate:V4QI
- (match_operand:V4HI 2 "nonimmediate_operand" "ym"))))]
- "TARGET_MMX"
- "packsswb\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxshft")
- (set_attr "mode" "DI")])
+ (any_s_truncate:V4QI
+ (match_operand:V4HI 1 "register_operand" "0,0,Yv"))
+ (any_s_truncate:V4QI
+ (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv"))))]
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "@
+ pack<s_trunsuffix>swb\t{%2, %0|%0, %2}
+ #
+ #"
+ "TARGET_MMX_WITH_SSE && reload_completed"
+ [(const_int 0)]
+ "ix86_split_mmx_pack (operands, <any_s_truncate:CODE>); DONE;"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxshft,sselog,sselog")
+ (set_attr "mode" "DI,TI,TI")])
-(define_insn "mmx_packssdw"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_packssdw"
+ [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(vec_concat:V4HI
(ss_truncate:V2HI
- (match_operand:V2SI 1 "register_operand" "0"))
+ (match_operand:V2SI 1 "register_operand" "0,0,Yv"))
(ss_truncate:V2HI
- (match_operand:V2SI 2 "nonimmediate_operand" "ym"))))]
- "TARGET_MMX"
- "packssdw\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxshft")
- (set_attr "mode" "DI")])
-
-(define_insn "mmx_packuswb"
- [(set (match_operand:V8QI 0 "register_operand" "=y")
- (vec_concat:V8QI
- (us_truncate:V4QI
- (match_operand:V4HI 1 "register_operand" "0"))
- (us_truncate:V4QI
- (match_operand:V4HI 2 "nonimmediate_operand" "ym"))))]
- "TARGET_MMX"
- "packuswb\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxshft")
- (set_attr "mode" "DI")])
+ (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv"))))]
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "@
+ packssdw\t{%2, %0|%0, %2}
+ #
+ #"
+ "TARGET_MMX_WITH_SSE && reload_completed"
+ [(const_int 0)]
+ "ix86_split_mmx_pack (operands, SS_TRUNCATE); DONE;"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxshft,sselog,sselog")
+ (set_attr "mode" "DI,TI,TI")])
(define_insn "mmx_punpckhbw"
[(set (match_operand:V8QI 0 "register_operand" "=y")
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 99226e86436..f3c2f72de54 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -49,6 +49,13 @@
(and (match_code "reg")
(match_test "MMX_REGNO_P (REGNO (op))")))
+;; Match register operands, but include memory operands for
+;; !TARGET_MMX_WITH_SSE.
+(define_predicate "register_mmxmem_operand"
+ (ior (match_operand 0 "register_operand")
+ (and (not (match_test "TARGET_MMX_WITH_SSE"))
+ (match_operand 0 "memory_operand"))))
+
;; True if the operand is an SSE register.
(define_predicate "sse_reg_operand"
(and (match_code "reg")
--
2.20.1
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 42/42] i386: Add tests for MMX intrinsic emulations with SSE
2019-02-16 0:34 [PATCH 00/42] V7: Emulate MMX intrinsics with SSE H.J. Lu
` (20 preceding siblings ...)
2019-02-16 0:34 ` [PATCH 02/42] i386: Emulate MMX packsswb/packssdw/packuswb with SSE2 H.J. Lu
@ 2019-02-16 0:35 ` H.J. Lu
2019-02-16 0:35 ` [PATCH 30/42] i386: Emulate MMX ssse3_pmaddubsw " H.J. Lu
` (19 subsequent siblings)
41 siblings, 0 replies; 50+ messages in thread
From: H.J. Lu @ 2019-02-16 0:35 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Test MMX intrinsics with -msse2 in 32-bit mode and -msse2 -mno-mmx in
64-bit mode.
PR target/89021
* gcc.target/i386/mmx-vals.h: New file.
* gcc.target/i386/sse2-mmx-2.c: Likewise.
* gcc.target/i386/sse2-mmx-3.c: Likewise.
* gcc.target/i386/sse2-mmx-4.c: Likewise.
* gcc.target/i386/sse2-mmx-5.c: Likewise.
* gcc.target/i386/sse2-mmx-6.c: Likewise.
* gcc.target/i386/sse2-mmx-7.c: Likewise.
* gcc.target/i386/sse2-mmx-8.c: Likewise.
* gcc.target/i386/sse2-mmx-9.c: Likewise.
* gcc.target/i386/sse2-mmx-10.c: Likewise.
* gcc.target/i386/sse2-mmx-11.c: Likewise.
* gcc.target/i386/sse2-mmx-12.c: Likewise.
* gcc.target/i386/sse2-mmx-13.c: Likewise.
* gcc.target/i386/sse2-mmx-14.c: Likewise.
* gcc.target/i386/sse2-mmx-15.c: Likewise.
* gcc.target/i386/sse2-mmx-16.c: Likewise.
* gcc.target/i386/sse2-mmx-17.c: Likewise.
* gcc.target/i386/sse2-mmx-18a.c: Likewise.
* gcc.target/i386/sse2-mmx-18b.c: Likewise.
* gcc.target/i386/sse2-mmx-18c.c: Likewise.
* gcc.target/i386/sse2-mmx-19a.c: Likewise.
* gcc.target/i386/sse2-mmx-18b.c: Likewise.
* gcc.target/i386/sse2-mmx-19c.c: Likewise.
* gcc.target/i386/sse2-mmx-19d.c: Likewise.
* gcc.target/i386/sse2-mmx-19e.c: Likewise.
* gcc.target/i386/sse2-mmx-20.c: Likewise.
* gcc.target/i386/sse2-mmx-21.c: Likewise.
* gcc.target/i386/sse2-mmx-cvtpi2ps.c: Likewise.
* gcc.target/i386/sse2-mmx-cvtps2pi.c: Likewise.
* gcc.target/i386/sse2-mmx-cvttps2pi.c: Likewise.
* gcc.target/i386/sse2-mmx-maskmovq.c: Likewise.
* gcc.target/i386/sse2-mmx-packssdw.c: Likewise.
* gcc.target/i386/sse2-mmx-packsswb.c: Likewise.
* gcc.target/i386/sse2-mmx-packuswb.c: Likewise.
* gcc.target/i386/sse2-mmx-paddb.c: Likewise.
* gcc.target/i386/sse2-mmx-paddd.c: Likewise.
* gcc.target/i386/sse2-mmx-paddq.c: Likewise.
* gcc.target/i386/sse2-mmx-paddsb.c: Likewise.
* gcc.target/i386/sse2-mmx-paddsw.c: Likewise.
* gcc.target/i386/sse2-mmx-paddusb.c: Likewise.
* gcc.target/i386/sse2-mmx-paddusw.c: Likewise.
* gcc.target/i386/sse2-mmx-paddw.c: Likewise.
* gcc.target/i386/sse2-mmx-pand.c: Likewise.
* gcc.target/i386/sse2-mmx-pandn.c: Likewise.
* gcc.target/i386/sse2-mmx-pavgb.c: Likewise.
* gcc.target/i386/sse2-mmx-pavgw.c: Likewise.
* gcc.target/i386/sse2-mmx-pcmpeqb.c: Likewise.
* gcc.target/i386/sse2-mmx-pcmpeqd.c: Likewise.
* gcc.target/i386/sse2-mmx-pcmpeqw.c: Likewise.
* gcc.target/i386/sse2-mmx-pcmpgtb.c: Likewise.
* gcc.target/i386/sse2-mmx-pcmpgtd.c: Likewise.
* gcc.target/i386/sse2-mmx-pcmpgtw.c: Likewise.
* gcc.target/i386/sse2-mmx-pextrw.c: Likewise.
* gcc.target/i386/sse2-mmx-pinsrw.c: Likewise.
* gcc.target/i386/sse2-mmx-pmaddwd.c: Likewise.
* gcc.target/i386/sse2-mmx-pmaxsw.c: Likewise.
* gcc.target/i386/sse2-mmx-pmaxub.c: Likewise.
* gcc.target/i386/sse2-mmx-pminsw.c: Likewise.
* gcc.target/i386/sse2-mmx-pminub.c: Likewise.
* gcc.target/i386/sse2-mmx-pmovmskb.c: Likewise.
* gcc.target/i386/sse2-mmx-pmulhuw.c: Likewise.
* gcc.target/i386/sse2-mmx-pmulhw.c: Likewise.
* gcc.target/i386/sse2-mmx-pmullw.c: Likewise.
* gcc.target/i386/sse2-mmx-pmuludq.c: Likewise.
* gcc.target/i386/sse2-mmx-por.c: Likewise.
* gcc.target/i386/sse2-mmx-psadbw.c: Likewise.
* gcc.target/i386/sse2-mmx-pshufw.c: Likewise.
* gcc.target/i386/sse2-mmx-pslld.c: Likewise.
* gcc.target/i386/sse2-mmx-pslldi.c: Likewise.
* gcc.target/i386/sse2-mmx-psllq.c: Likewise.
* gcc.target/i386/sse2-mmx-psllqi.c: Likewise.
* gcc.target/i386/sse2-mmx-psllw.c: Likewise.
* gcc.target/i386/sse2-mmx-psllwi.c: Likewise.
* gcc.target/i386/sse2-mmx-psrad.c: Likewise.
* gcc.target/i386/sse2-mmx-psradi.c: Likewise.
* gcc.target/i386/sse2-mmx-psraw.c: Likewise.
* gcc.target/i386/sse2-mmx-psrawi.c: Likewise.
* gcc.target/i386/sse2-mmx-psrld.c: Likewise.
* gcc.target/i386/sse2-mmx-psrldi.c: Likewise.
* gcc.target/i386/sse2-mmx-psrlq.c: Likewise.
* gcc.target/i386/sse2-mmx-psrlqi.c: Likewise.
* gcc.target/i386/sse2-mmx-psrlw.c: Likewise.
* gcc.target/i386/sse2-mmx-psrlwi.c: Likewise.
* gcc.target/i386/sse2-mmx-psubb.c: Likewise.
* gcc.target/i386/sse2-mmx-psubd.c: Likewise.
* gcc.target/i386/sse2-mmx-psubq.c: Likewise.
* gcc.target/i386/sse2-mmx-psubusb.c: Likewise.
* gcc.target/i386/sse2-mmx-psubusw.c: Likewise.
* gcc.target/i386/sse2-mmx-psubw.c: Likewise.
* gcc.target/i386/sse2-mmx-punpckhbw.c: Likewise.
* gcc.target/i386/sse2-mmx-punpckhdq.c: Likewise.
* gcc.target/i386/sse2-mmx-punpckhwd.c: Likewise.
* gcc.target/i386/sse2-mmx-punpcklbw.c: Likewise.
* gcc.target/i386/sse2-mmx-punpckldq.c: Likewise.
* gcc.target/i386/sse2-mmx-punpcklwd.c: Likewise.
* gcc.target/i386/sse2-mmx-pxor.c: Likewise.
---
gcc/testsuite/gcc.target/i386/mmx-vals.h | 77 ++++++
gcc/testsuite/gcc.target/i386/sse2-mmx-10.c | 43 +++
gcc/testsuite/gcc.target/i386/sse2-mmx-11.c | 39 +++
gcc/testsuite/gcc.target/i386/sse2-mmx-12.c | 42 +++
gcc/testsuite/gcc.target/i386/sse2-mmx-13.c | 40 +++
gcc/testsuite/gcc.target/i386/sse2-mmx-14.c | 31 +++
gcc/testsuite/gcc.target/i386/sse2-mmx-15.c | 36 +++
gcc/testsuite/gcc.target/i386/sse2-mmx-16.c | 40 +++
gcc/testsuite/gcc.target/i386/sse2-mmx-17.c | 51 ++++
gcc/testsuite/gcc.target/i386/sse2-mmx-18a.c | 14 +
gcc/testsuite/gcc.target/i386/sse2-mmx-18b.c | 7 +
gcc/testsuite/gcc.target/i386/sse2-mmx-18c.c | 7 +
gcc/testsuite/gcc.target/i386/sse2-mmx-19a.c | 14 +
gcc/testsuite/gcc.target/i386/sse2-mmx-19b.c | 7 +
gcc/testsuite/gcc.target/i386/sse2-mmx-19c.c | 7 +
gcc/testsuite/gcc.target/i386/sse2-mmx-19d.c | 7 +
gcc/testsuite/gcc.target/i386/sse2-mmx-19e.c | 7 +
gcc/testsuite/gcc.target/i386/sse2-mmx-2.c | 12 +
gcc/testsuite/gcc.target/i386/sse2-mmx-20.c | 12 +
gcc/testsuite/gcc.target/i386/sse2-mmx-21.c | 13 +
gcc/testsuite/gcc.target/i386/sse2-mmx-3.c | 13 +
gcc/testsuite/gcc.target/i386/sse2-mmx-4.c | 4 +
gcc/testsuite/gcc.target/i386/sse2-mmx-5.c | 11 +
gcc/testsuite/gcc.target/i386/sse2-mmx-6.c | 11 +
gcc/testsuite/gcc.target/i386/sse2-mmx-7.c | 13 +
gcc/testsuite/gcc.target/i386/sse2-mmx-8.c | 4 +
gcc/testsuite/gcc.target/i386/sse2-mmx-9.c | 79 ++++++
.../gcc.target/i386/sse2-mmx-cvtpi2ps.c | 43 +++
.../gcc.target/i386/sse2-mmx-cvtps2pi.c | 36 +++
.../gcc.target/i386/sse2-mmx-cvttps2pi.c | 36 +++
.../gcc.target/i386/sse2-mmx-maskmovq.c | 99 +++++++
.../gcc.target/i386/sse2-mmx-packssdw.c | 52 ++++
.../gcc.target/i386/sse2-mmx-packsswb.c | 52 ++++
.../gcc.target/i386/sse2-mmx-packuswb.c | 52 ++++
.../gcc.target/i386/sse2-mmx-paddb.c | 48 ++++
.../gcc.target/i386/sse2-mmx-paddd.c | 48 ++++
.../gcc.target/i386/sse2-mmx-paddq.c | 43 +++
.../gcc.target/i386/sse2-mmx-paddsb.c | 48 ++++
.../gcc.target/i386/sse2-mmx-paddsw.c | 48 ++++
.../gcc.target/i386/sse2-mmx-paddusb.c | 48 ++++
.../gcc.target/i386/sse2-mmx-paddusw.c | 48 ++++
.../gcc.target/i386/sse2-mmx-paddw.c | 48 ++++
gcc/testsuite/gcc.target/i386/sse2-mmx-pand.c | 44 ++++
.../gcc.target/i386/sse2-mmx-pandn.c | 44 ++++
.../gcc.target/i386/sse2-mmx-pavgb.c | 52 ++++
.../gcc.target/i386/sse2-mmx-pavgw.c | 52 ++++
.../gcc.target/i386/sse2-mmx-pcmpeqb.c | 48 ++++
.../gcc.target/i386/sse2-mmx-pcmpeqd.c | 48 ++++
.../gcc.target/i386/sse2-mmx-pcmpeqw.c | 48 ++++
.../gcc.target/i386/sse2-mmx-pcmpgtb.c | 48 ++++
.../gcc.target/i386/sse2-mmx-pcmpgtd.c | 48 ++++
.../gcc.target/i386/sse2-mmx-pcmpgtw.c | 48 ++++
.../gcc.target/i386/sse2-mmx-pextrw.c | 59 +++++
.../gcc.target/i386/sse2-mmx-pinsrw.c | 61 +++++
.../gcc.target/i386/sse2-mmx-pmaddwd.c | 47 ++++
.../gcc.target/i386/sse2-mmx-pmaxsw.c | 48 ++++
.../gcc.target/i386/sse2-mmx-pmaxub.c | 48 ++++
.../gcc.target/i386/sse2-mmx-pminsw.c | 48 ++++
.../gcc.target/i386/sse2-mmx-pminub.c | 48 ++++
.../gcc.target/i386/sse2-mmx-pmovmskb.c | 46 ++++
.../gcc.target/i386/sse2-mmx-pmulhuw.c | 51 ++++
.../gcc.target/i386/sse2-mmx-pmulhw.c | 53 ++++
.../gcc.target/i386/sse2-mmx-pmullw.c | 52 ++++
.../gcc.target/i386/sse2-mmx-pmuludq.c | 47 ++++
gcc/testsuite/gcc.target/i386/sse2-mmx-por.c | 44 ++++
.../gcc.target/i386/sse2-mmx-psadbw.c | 58 ++++
.../gcc.target/i386/sse2-mmx-pshufw.c | 248 ++++++++++++++++++
.../gcc.target/i386/sse2-mmx-pslld.c | 52 ++++
.../gcc.target/i386/sse2-mmx-pslldi.c | 153 +++++++++++
.../gcc.target/i386/sse2-mmx-psllq.c | 47 ++++
.../gcc.target/i386/sse2-mmx-psllqi.c | 245 +++++++++++++++++
.../gcc.target/i386/sse2-mmx-psllw.c | 52 ++++
.../gcc.target/i386/sse2-mmx-psllwi.c | 105 ++++++++
.../gcc.target/i386/sse2-mmx-psrad.c | 52 ++++
.../gcc.target/i386/sse2-mmx-psradi.c | 153 +++++++++++
.../gcc.target/i386/sse2-mmx-psraw.c | 52 ++++
.../gcc.target/i386/sse2-mmx-psrawi.c | 105 ++++++++
.../gcc.target/i386/sse2-mmx-psrld.c | 52 ++++
.../gcc.target/i386/sse2-mmx-psrldi.c | 153 +++++++++++
.../gcc.target/i386/sse2-mmx-psrlq.c | 47 ++++
.../gcc.target/i386/sse2-mmx-psrlqi.c | 245 +++++++++++++++++
.../gcc.target/i386/sse2-mmx-psrlw.c | 52 ++++
.../gcc.target/i386/sse2-mmx-psrlwi.c | 105 ++++++++
.../gcc.target/i386/sse2-mmx-psubb.c | 48 ++++
.../gcc.target/i386/sse2-mmx-psubd.c | 48 ++++
.../gcc.target/i386/sse2-mmx-psubq.c | 43 +++
.../gcc.target/i386/sse2-mmx-psubusb.c | 48 ++++
.../gcc.target/i386/sse2-mmx-psubusw.c | 48 ++++
.../gcc.target/i386/sse2-mmx-psubw.c | 48 ++++
.../gcc.target/i386/sse2-mmx-punpckhbw.c | 53 ++++
.../gcc.target/i386/sse2-mmx-punpckhdq.c | 47 ++++
.../gcc.target/i386/sse2-mmx-punpckhwd.c | 49 ++++
.../gcc.target/i386/sse2-mmx-punpcklbw.c | 53 ++++
.../gcc.target/i386/sse2-mmx-punpckldq.c | 47 ++++
.../gcc.target/i386/sse2-mmx-punpcklwd.c | 49 ++++
gcc/testsuite/gcc.target/i386/sse2-mmx-pxor.c | 44 ++++
gcc/testsuite/gcc.target/i386/sse2-mmx.c | 1 -
97 files changed, 5123 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.target/i386/mmx-vals.h
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-10.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-11.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-12.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-13.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-14.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-15.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-16.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-17.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-18a.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-18b.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-18c.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-19a.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-19b.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-19c.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-19d.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-19e.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-2.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-20.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-21.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-3.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-4.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-5.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-6.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-7.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-8.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-9.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-cvtpi2ps.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-cvtps2pi.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-cvttps2pi.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-maskmovq.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-packssdw.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-packsswb.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-packuswb.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddb.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddd.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddq.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddsb.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddsw.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddusb.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddusw.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-paddw.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pand.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pandn.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pavgb.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pavgw.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqb.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqd.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqw.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtb.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtd.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtw.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pextrw.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pinsrw.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmaddwd.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmaxsw.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmaxub.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pminsw.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pminub.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmovmskb.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmulhuw.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmulhw.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmullw.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pmuludq.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-por.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psadbw.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pshufw.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pslld.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pslldi.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psllq.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psllqi.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psllw.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psllwi.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrad.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psradi.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psraw.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrawi.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrld.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrldi.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrlq.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrlqi.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrlw.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psrlwi.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psubb.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psubd.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psubq.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psubusb.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psubusw.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-psubw.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhbw.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhdq.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhwd.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-punpcklbw.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-punpckldq.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-punpcklwd.c
create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-pxor.c
diff --git a/gcc/testsuite/gcc.target/i386/mmx-vals.h b/gcc/testsuite/gcc.target/i386/mmx-vals.h
new file mode 100644
index 00000000000..62d0c1cb514
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/mmx-vals.h
@@ -0,0 +1,77 @@
+/* Routine to check correctness of the results */
+
+__attribute__((unused))
+static int
+saturate_b (int i)
+{
+ if (i > 127)
+ i = 127;
+ else if (i < -128)
+ i = -128;
+ return i;
+}
+
+__attribute__((unused))
+static int
+saturate_w (int i)
+{
+ if (i > 32767)
+ i = 32767;
+ else if (i < -32768)
+ i = -32768;
+ return i;
+}
+
+__attribute__((unused))
+static int
+saturate_ub (int i)
+{
+ if (i > 255)
+ i = 255;
+ else if (i < 0)
+ i = 0;
+ return i;
+}
+
+__attribute__((unused))
+static int
+saturate_uw (int i)
+{
+ if (i > 65535)
+ i = 65535;
+ else if (i < 0)
+ i = 0;
+ return i;
+}
+
+static long long MMXops[] =
+{
+ 0x3467512347612976LL, 0x000000000000000eLL,
+ 0x3467512347612976LL, 0x0000000000000014LL,
+ 0x3467512347612976LL, 0x000000000000003cLL,
+ 0x0000000000000000LL, 0xFFFFFFFFFFFFFFFFLL,
+ 0xFFFFFFFFFFFFFFFFLL, 0x0000000000000000LL,
+ 0x0000000000000001LL, 0x1000000000000000LL,
+ 0x1000000000000000LL, 0x0000000000000001LL,
+ 0xFF00FF00FF00FF00LL, 0x00FF00FF00FF00FFLL,
+ 0xFFFFFFFFFFFFFFFFLL, 0x0101010101010101LL,
+ 0x0101010101010101LL, 0xFFFFFFFFFFFFFFFFLL,
+ 0x0123456789ABCDEFLL, 0x0123456789ABCDEFLL,
+ 0x3467512347612976LL, 0x1839876340879234LL,
+ 0x0000000000000000LL, 0x0000000000000000LL,
+ 0xFFFFFFFFFFFFFFFFLL, 0xFFFFFFFFFFFFFFFFLL,
+ 0x7F7F7F7F7F7F7F7FLL, 0x7F7F7F7F7F7F7F7FLL,
+ 0x7F7F7F7F7F7F7F7FLL, 0x0101010101010101LL,
+ 0x7F7F7F7F7F7F7F7FLL, 0x4782082349761237LL,
+ 0x0000000000000000LL, 0x7F7F7F7F7F7F7F7FLL,
+ 0x8080808080808080LL, 0x8080808080808080LL,
+ 0x0101010101010101LL, 0x8080808080808080LL,
+ 0x8080808080808080LL, 0x0000000000000000LL,
+ 0x2372347120982458LL, 0x8080808080808080LL,
+ 0xFFFFFFFFFFFFFFFFLL, 0x8080808080808080LL,
+ 0x7F7F7F7F7F7F7F7FLL, 0xFFFFFFFFFFFFFFFFLL,
+ 0x8080808080808080LL, 0x7F7F7F7F7F7F7F7FLL,
+ 0xFFFFFFFFFFFFFFFFLL, 0x7F7F7F7F7F7F7F7FLL
+};
+
+#define MMX_num_ops (sizeof (MMXops) / sizeof (MMXops[0]))
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-10.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-10.c
new file mode 100644
index 00000000000..4da7ed3a3e9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-10.c
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_from_int (long long *ll1, long long *r)
+{
+ int i1 = *(int *) ll1;
+ *(__m64 *) r = _m_from_int (i1);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *ll1, long long *r)
+{
+ int *res = (int *) r;
+ res[0] = *(int *) ll1;
+ res[1] = 0;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i++)
+ {
+ test_from_int (&MMXops[i], &r);
+ compute_correct_result (&MMXops[i], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-11.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-11.c
new file mode 100644
index 00000000000..6737ec5f2d4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-11.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_from_long_long (long long *ll1, long long *r)
+{
+ *(__m64 *) r = _mm_cvtsi64_m64 (*ll1);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *ll1, long long *r)
+{
+ *r = *ll1;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i++)
+ {
+ test_from_long_long (&MMXops[i], &r);
+ compute_correct_result (&MMXops[i], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-12.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-12.c
new file mode 100644
index 00000000000..b626daa0aab
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-12.c
@@ -0,0 +1,42 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_to_int (long long *ll1, long long *r)
+{
+ __m64 m = *(__m64 *) ll1;
+ *(int *) r = _m_to_int (m);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *ll1, long long *r)
+{
+ int *i1 = (int *) ll1;
+ *(int *) r = *i1;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r = 0, ck = 0;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i++)
+ {
+ test_to_int (&MMXops[i], &r);
+ compute_correct_result (&MMXops[i], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-13.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-13.c
new file mode 100644
index 00000000000..fd1eed66daa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-13.c
@@ -0,0 +1,40 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_to_long_long (long long *ll1, long long *r)
+{
+ __m64 m = *(__m64 *) ll1;
+ *r = _mm_cvtm64_si64 (m);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *ll1, long long *r)
+{
+ *r = *ll1;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i++)
+ {
+ test_to_long_long (&MMXops[i], &r);
+ compute_correct_result (&MMXops[i], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-14.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-14.c
new file mode 100644
index 00000000000..bc21ba711da
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-14.c
@@ -0,0 +1,31 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+
+__attribute__((noinline, noclone))
+static void
+test_setzero (long long *r)
+{
+ *(__m64 *) r = _mm_setzero_si64 ();
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *r)
+{
+ *r = 0x0LL;
+}
+
+static void
+sse2_test (void)
+{
+ long long r, ck;
+
+ /* Run the MMX tests */
+ test_setzero (&r);
+ compute_correct_result (&ck);
+ if (ck != r)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-15.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-15.c
new file mode 100644
index 00000000000..ab8451b591d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-15.c
@@ -0,0 +1,36 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+
+__attribute__((noinline, noclone))
+static void
+test_set (int x, int y, long long *r)
+{
+ *(__m64 *) r = _mm_set_pi32 (x, y);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (int x, int y, long long *res_p)
+{
+ int *res = (int *) res_p;
+ res[0] = y;
+ res[1] = x;
+}
+
+static void
+sse2_test (void)
+{
+ int x, y;
+ long long r, ck;
+
+ /* Run the MMX tests */
+ x = 0x0badbeef;
+ y = 0x0badfeed;
+ test_set (x, y, &r);
+ compute_correct_result (x, y, &ck);
+ if (ck != r)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-16.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-16.c
new file mode 100644
index 00000000000..b348b9f1c64
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-16.c
@@ -0,0 +1,40 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+
+__attribute__((noinline, noclone))
+static void
+test_set (int i0, int i1, int i2, int i3, long long *r)
+{
+ *(__m64 *) r = _mm_set_pi16 (i0, i1, i2, i3);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (int i0, int i1, int i2, int i3, long long *res_p)
+{
+ short *res = (short *) res_p;
+ res[0] = i3;
+ res[1] = i2;
+ res[2] = i1;
+ res[3] = i0;
+}
+
+static void
+sse2_test (void)
+{
+ short i0, i1, i2, i3;
+ long long r, ck;
+
+ /* Run the MMX tests */
+ i0 = 0x0bad;
+ i1 = 0xbeef;
+ i2 = 0x0bad;
+ i3 = 0xfeed;
+ test_set (i0, i1, i2, i3, &r);
+ compute_correct_result (i0, i1, i2, i3, &ck);
+ if (ck != r)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-17.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-17.c
new file mode 100644
index 00000000000..756cbeb33da
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-17.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+
+__attribute__((noinline, noclone))
+static void
+test_set (char i0, char i1, char i2, char i3,
+ char i4, char i5, char i6, char i7, long long *r)
+{
+ *(__m64 *) r = _mm_set_pi8 (i0, i1, i2, i3, i4, i5, i6, i7);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (char i0, char i1, char i2, char i3,
+ char i4, char i5, char i6, char i7,
+ long long *res_p)
+{
+ char *res = (char *) res_p;
+ res[0] = i7;
+ res[1] = i6;
+ res[2] = i5;
+ res[3] = i4;
+ res[4] = i3;
+ res[5] = i2;
+ res[6] = i1;
+ res[7] = i0;
+}
+
+static void
+sse2_test (void)
+{
+ char i0, i1, i2, i3, i4, i5, i6, i7;
+ long long r, ck;
+
+ /* Run the MMX tests */
+ i0 = 0x12;
+ i1 = 0x34;
+ i2 = 0x56;
+ i3 = 0x78;
+ i4 = 0x90;
+ i5 = 0xab;
+ i6 = 0xcd;
+ i7 = 0xef;
+ test_set (i0, i1, i2, i3, i4, i5, i6, i7, &r);
+ compute_correct_result (i0, i1, i2, i3, i4, i5, i6, i7, &ck);
+ if (ck != r)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-18a.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-18a.c
new file mode 100644
index 00000000000..3505a5c0cf4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-18a.c
@@ -0,0 +1,14 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx -mno-avx512vl" } */
+/* { dg-final { scan-assembler-times "pshufd" 1 } } */
+/* { dg-final { scan-assembler-times "movd" 1 } } */
+/* { dg-final { scan-assembler-not "movl" } } */
+
+#include <mmintrin.h>
+
+__m64
+foo (int i)
+{
+ __v2si x = { i, i };
+ return (__m64) x;
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-18b.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-18b.c
new file mode 100644
index 00000000000..9b267b17346
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-18b.c
@@ -0,0 +1,7 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-mmx -mavx512bw -mavx512vl" } */
+/* { dg-final { scan-assembler-times "pbroadcastd" 1 } } */
+/* { dg-final { scan-assembler-not "movd" } } */
+/* { dg-final { scan-assembler-not "movl" } } */
+
+#include "sse2-mmx-18a.c"
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-18c.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-18c.c
new file mode 100644
index 00000000000..394f05b6b49
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-18c.c
@@ -0,0 +1,7 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-mmx -mavx512bw -mno-avx512vl" } */
+/* { dg-final { scan-assembler-times "pshufd" 1 } } */
+/* { dg-final { scan-assembler-times "movd" 1 } } */
+/* { dg-final { scan-assembler-not "movl" } } */
+
+#include "sse2-mmx-18a.c"
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-19a.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-19a.c
new file mode 100644
index 00000000000..9715ace241f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-19a.c
@@ -0,0 +1,14 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx -mno-avx -mtune=intel" } */
+/* { dg-final { scan-assembler-times "pshuflw" 1 } } */
+/* { dg-final { scan-assembler-times "movd" 1 } } */
+/* { dg-final { scan-assembler-not "movl" } } */
+
+#include <mmintrin.h>
+
+__m64
+foo (short i)
+{
+ __v4hi x = { i, i, i, i };
+ return (__m64) x;
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-19b.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-19b.c
new file mode 100644
index 00000000000..a6d42313336
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-19b.c
@@ -0,0 +1,7 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-mmx -mavx512bw -mavx512vl" } */
+/* { dg-final { scan-assembler-times "pbroadcastw" 1 } } */
+/* { dg-final { scan-assembler-not "movd" } } */
+/* { dg-final { scan-assembler-not "movl" } } */
+
+#include "sse2-mmx-19a.c"
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-19c.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-19c.c
new file mode 100644
index 00000000000..b02dc8c2ffd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-19c.c
@@ -0,0 +1,7 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-mmx -mavx -mno-avx2 -mtune=intel" } */
+/* { dg-final { scan-assembler-times "pshuflw" 1 } } */
+/* { dg-final { scan-assembler-times "movd" 1 } } */
+/* { dg-final { scan-assembler-not "movl" } } */
+
+#include "sse2-mmx-19a.c"
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-19d.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-19d.c
new file mode 100644
index 00000000000..54691883c9c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-19d.c
@@ -0,0 +1,7 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-mmx -mavx512bw -mno-avx512vl -mtune=intel" } */
+/* { dg-final { scan-assembler-times "pbroadcastw" 1 } } */
+/* { dg-final { scan-assembler-times "movd" 1 } } */
+/* { dg-final { scan-assembler-not "movl" } } */
+
+#include "sse2-mmx-19a.c"
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-19e.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-19e.c
new file mode 100644
index 00000000000..8be973cc4fc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-19e.c
@@ -0,0 +1,7 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-mmx -mavx2 -mno-avx512f -mtune=intel" } */
+/* { dg-final { scan-assembler-times "pbroadcastw" 1 } } */
+/* { dg-final { scan-assembler-times "movd" 1 } } */
+/* { dg-final { scan-assembler-not "movl" } } */
+
+#include "sse2-mmx-19a.c"
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-2.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-2.c
new file mode 100644
index 00000000000..e4cee2da83e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-2.c
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-final { scan-assembler-not "%xmm" } } */
+/* { dg-final { scan-assembler-not "%mm" } } */
+
+#include <mmintrin.h>
+
+float
+foo (__m64 x)
+{
+ return ((__v2sf) x)[0];
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-20.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-20.c
new file mode 100644
index 00000000000..173fa154d40
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-20.c
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-final { scan-assembler-times "movd" 1 } } */
+/* { dg-final { scan-assembler-not "%mm" } } */
+
+#include <mmintrin.h>
+
+int
+foo (__m64 x)
+{
+ return ((__v2si) x)[0];
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-21.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-21.c
new file mode 100644
index 00000000000..8f5341e2de6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-21.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-final { scan-assembler-times "pshufd" 1 } } */
+/* { dg-final { scan-assembler-times "movd" 1 } } */
+/* { dg-final { scan-assembler-not "%mm" } } */
+
+#include <mmintrin.h>
+
+int
+foo (__m64 x)
+{
+ return ((__v2si) x)[1];
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-3.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-3.c
new file mode 100644
index 00000000000..77f518b6c5f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-3.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-final { scan-assembler "cvtdq2ps" } } */
+/* { dg-final { scan-assembler-not "cvtpi2ps" } } */
+/* { dg-final { scan-assembler-not "%mm" } } */
+
+#include <xmmintrin.h>
+
+__m128
+foo (__m128 i1, __m64 i2)
+{
+ return _mm_cvtpi32_ps (i1, i2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-4.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-4.c
new file mode 100644
index 00000000000..d923724fc1c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-4.c
@@ -0,0 +1,4 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+
+#include "mmx-4.c"
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-5.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-5.c
new file mode 100644
index 00000000000..1953dc89bb1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-5.c
@@ -0,0 +1,11 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-final { scan-assembler-not "%mm" } } */
+
+#include <xmmintrin.h>
+
+int
+foo (__m64 i)
+{
+ return _m_pextrw (i, 2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-6.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-6.c
new file mode 100644
index 00000000000..f73444f493b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-6.c
@@ -0,0 +1,11 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-final { scan-assembler-not "%mm" } } */
+
+#include <xmmintrin.h>
+
+__m64
+foo (__m64 i, int w)
+{
+ return _m_pinsrw (i, w, 2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-7.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-7.c
new file mode 100644
index 00000000000..6ea491d2715
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-7.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-final { scan-assembler "movnti" } } */
+/* { dg-final { scan-assembler-not "movntq" } } */
+/* { dg-final { scan-assembler-not "%mm" } } */
+
+#include <xmmintrin.h>
+
+void
+foo (__m64 *p, __m64 i)
+{
+ _mm_stream_pi (p, i);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-8.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-8.c
new file mode 100644
index 00000000000..342c2fa4f25
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-8.c
@@ -0,0 +1,4 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+
+#include "mmx-8.c"
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-9.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-9.c
new file mode 100644
index 00000000000..f0bf7256c0e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-9.c
@@ -0,0 +1,79 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-require-effective-target sse2 } */
+
+#include "sse2-check.h"
+
+#include <string.h>
+
+#define FLOAT_X 2.3456
+#define FLOAT_Y -4.5987
+
+static float expected_x = FLOAT_X;
+static float expected_y = FLOAT_Y;
+static __v2sf expected1 = { FLOAT_X, FLOAT_Y };
+static __v2sf expected2 = { FLOAT_X, 0 };
+static __v2sf expected3 = { FLOAT_X, FLOAT_X };
+
+float
+__attribute__((noinline, noclone))
+foo1 (__m64 x)
+{
+ return ((__v2sf) x)[0];
+}
+
+float
+__attribute__((noinline, noclone))
+foo2 (__m64 x)
+{
+ return ((__v2sf) x)[1];
+}
+
+__m64
+__attribute__((noinline, noclone))
+foo3 (float x)
+{
+ return __extension__ (__m64) (__v2sf) { x, 0 };
+}
+
+__m64
+__attribute__((noinline, noclone))
+foo4 (float x)
+{
+ return __extension__ (__m64) (__v2sf) { x, x };
+}
+
+__m64
+__attribute__((noinline, noclone))
+foo5 (float x, float y)
+{
+ return __extension__ (__m64) (__v2sf) { x, y };
+}
+
+void
+__attribute__((noinline))
+sse2_test (void)
+{
+ __m64 res;
+ float x;
+
+ x = foo1 ((__m64) expected1);
+ if (x != expected_x)
+ abort ();
+
+ x = foo2 ((__m64) expected1);
+ if (x != expected_y)
+ abort ();
+
+ res = foo3 (FLOAT_X);
+ if (memcmp (&res, &expected2, sizeof (res)))
+ abort ();
+
+ res = foo4 (FLOAT_X);
+ if (memcmp (&res, &expected3, sizeof (res)))
+ abort ();
+
+ res = foo5 (FLOAT_X, FLOAT_Y);
+ if (memcmp (&res, &expected1, sizeof (res)))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-cvtpi2ps.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-cvtpi2ps.c
new file mode 100644
index 00000000000..4ab38d6f015
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-cvtpi2ps.c
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include <string.h>
+#include "sse2-check.h"
+
+__attribute__((noinline, noclone))
+static void
+test_cvtpi32_ps (__m128 *i1, __m64 *i2, __m128 *r)
+{
+ *(__m128 *) r = _mm_cvtpi32_ps (*i1, *i2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (__m128 *dst_p, __m64 *src_p, __m128 *res_p)
+{
+ int *src = (int *) src_p;
+ float *res = (float *) res_p;
+ *res_p = *dst_p;
+ int i;
+ __m128 r;
+ for (i = 0; i < 2; i++)
+ {
+ r = _mm_cvt_si2ss (*dst_p, src[i]);
+ res[i] = ((__v4sf) r)[0];
+ }
+}
+
+static void
+sse2_test (void)
+{
+ __m128 r, ck;
+ __v4sf x = { 1.99f, -3.9f, -4.9f, 3.8f };
+ __v2si y = { 30, -39 };
+
+ /* Run the MMX tests */
+ test_cvtpi32_ps ((__m128 *) &x, (__m64 *) &y, &r);
+ compute_correct_result ((__m128 *) &x, (__m64 *) &y, &ck);
+ if (memcmp (&ck, &r, sizeof (r)))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-cvtps2pi.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-cvtps2pi.c
new file mode 100644
index 00000000000..6084c9ff3d5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-cvtps2pi.c
@@ -0,0 +1,36 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+
+__attribute__((noinline, noclone))
+static void
+test_cvtps_pi32 (__m128 *src_p, long long *r)
+{
+ *(__m64 *) r = _mm_cvtps_pi32 (*src_p);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (__m128 *src_p, long long *res_p)
+{
+ __v4sf *src = (__v4sf *) src_p;
+ int *res = (int *) res_p;
+ int i;
+ for (i = 0; i < 2; i++)
+ res[i] = _mm_cvt_ss2si (_mm_set_ss ((*src)[i]));
+}
+
+static void
+sse2_test (void)
+{
+ long long r, ck;
+ __v4sf x = { 1.99f, -3.9f, -4.9f, 3.8f };
+
+ /* Run the MMX tests */
+ test_cvtps_pi32 ((__m128 *) &x, &r);
+ compute_correct_result ((__m128 *) &x, &ck);
+ if (ck != r)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-cvttps2pi.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-cvttps2pi.c
new file mode 100644
index 00000000000..6f12b2f064f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-cvttps2pi.c
@@ -0,0 +1,36 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+
+__attribute__((noinline, noclone))
+static void
+test_cvttps_pi32 (__m128 *src_p, long long *r)
+{
+ *(__m64 *) r = _mm_cvttps_pi32 (*src_p);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (__m128 *src_p, long long *res_p)
+{
+ __v4sf *src = (__v4sf *) src_p;
+ int *res = (int *) res_p;
+ int i;
+ for (i = 0; i < 2; i++)
+ res[i] = _mm_cvtt_ss2si (_mm_set_ss ((*src)[i]));
+}
+
+static void
+sse2_test (void)
+{
+ long long r, ck;
+ __v4sf x = { 1.99f, -3.9f, -4.9f, 3.8f };
+
+ /* Run the MMX tests */
+ test_cvttps_pi32 ((__m128 *) &x, &r);
+ compute_correct_result ((__m128 *) &x, &ck);
+ if (ck != r)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-maskmovq.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-maskmovq.c
new file mode 100644
index 00000000000..29fab1914f6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-maskmovq.c
@@ -0,0 +1,99 @@
+/* { dg-do run { target { *-*-linux* } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include <unistd.h>
+#include <sys/mman.h>
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_maskmovq (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ _m_maskmovq (t1, t2, (char *) r);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ char *dst = (char *) dst_p;
+ char *src = (char *) src_p;
+ char *res = (char *) res_p;
+ int i;
+ for (i = 0; i < 8; i++)
+ if ((src[i] & 0x80) != 0)
+ res[i] = dst[i];
+}
+
+static void
+do_maskmovq_test (long long *r)
+{
+ int i;
+ long long ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i++)
+ {
+ r[0] = -1LL;
+ ck = -1LL;
+ test_maskmovq (&MMXops[i], &MMXops[i], r);
+ compute_correct_result (&MMXops[i], &MMXops[i], &ck);
+ if (*r != ck)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
+
+static void
+sse2_test (void)
+{
+ char *buf;
+ long long *r;
+ size_t page_size = sysconf(_SC_PAGESIZE);
+
+ buf = mmap (0, 3 * page_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (buf == MAP_FAILED)
+ {
+ perror ("mmap");
+ abort ();
+ }
+
+ if (mprotect (buf, page_size, PROT_NONE))
+ {
+ perror ("mprotect");
+ abort ();
+ }
+
+ if (mprotect (buf + 2 * page_size, page_size, PROT_NONE))
+ {
+ perror ("mprotect");
+ abort ();
+ }
+
+ r = (long long *) (buf + page_size);
+ do_maskmovq_test (r);
+
+ r = (long long *) (buf + page_size + 3);
+ do_maskmovq_test (r);
+
+ r = (long long *) (buf + page_size + 11);
+ do_maskmovq_test (r);
+
+ r = (long long *) (buf + 2 * page_size - 16);
+ do_maskmovq_test (r);
+
+ r = (long long *) (buf + 2 * page_size - 16 + 3);
+ do_maskmovq_test (r);
+
+ r = (long long *) (buf + 2 * page_size - 16 + 8);
+ do_maskmovq_test (r);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-packssdw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-packssdw.c
new file mode 100644
index 00000000000..ef5fded4568
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-packssdw.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_packssdw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_packssdw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ int *dst = (int *) dst_p;
+ int *src = (int *) src_p;
+ short *res = (short *) res_p;
+ int i;
+
+ for (i = 0; i < 2; i++)
+ {
+ res[i] = saturate_w (dst[i]);
+ res[i + 2] = saturate_w (src[i]);
+ }
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_packssdw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-packsswb.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-packsswb.c
new file mode 100644
index 00000000000..bcc34040b2b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-packsswb.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_packsswb (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_packsswb (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ short *src = (short *) src_p;
+ char *res = (char *) res_p;
+ int i;
+
+ for (i = 0; i < 4; i++)
+ {
+ res[i] = saturate_b (dst[i]);
+ res[i + 4] = saturate_b (src[i]);
+ }
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_packsswb (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-packuswb.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-packuswb.c
new file mode 100644
index 00000000000..ac2da068d64
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-packuswb.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_packuswb (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_packuswb (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ short *src = (short *) src_p;
+ unsigned char *res = (unsigned char *) res_p;
+ int i;
+
+ for (i = 0; i < 4; i++)
+ {
+ res[i] = saturate_ub (dst[i]);
+ res[i + 4] = saturate_ub (src[i]);
+ }
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_packuswb (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-paddb.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddb.c
new file mode 100644
index 00000000000..b12534908ec
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddb.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_paddb (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_paddb (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ char *dst = (char *) dst_p;
+ char *src = (char *) src_p;
+ char *res = (char *) res_p;
+ int i;
+ for (i = 0; i < 8; i++)
+ res[i] = dst[i] + src[i];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_paddb (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-paddd.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddd.c
new file mode 100644
index 00000000000..d71b21713a6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddd.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_paddd (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_paddd (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ int *dst = (int *) dst_p;
+ int *src = (int *) src_p;
+ int *res = (int *) res_p;
+ int i;
+ for (i = 0; i < 2; i++)
+ res[i] = dst[i] + src[i];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_paddd (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-paddq.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddq.c
new file mode 100644
index 00000000000..b015393cf03
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddq.c
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_paddq (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _mm_add_si64 (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ res_p[0] = dst_p[0] + src_p[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_paddq (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-paddsb.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddsb.c
new file mode 100644
index 00000000000..f8236beef32
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddsb.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_paddsb (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_paddsb (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ char *dst = (char *) dst_p;
+ char *src = (char *) src_p;
+ char *res = (char *) res_p;
+ int i;
+ for (i = 0; i < 8; i++)
+ res[i] = saturate_b (dst[i] + src[i]);
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_paddsb (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-paddsw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddsw.c
new file mode 100644
index 00000000000..cc52fe65e66
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddsw.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_paddsw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_paddsw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ short *src = (short *) src_p;
+ short *res = (short *) res_p;
+ int i;
+ for (i = 0; i < 4; i++)
+ res[i] = saturate_w (dst[i] + src[i]);
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_paddsw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-paddusb.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddusb.c
new file mode 100644
index 00000000000..c802bcf4d10
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddusb.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_paddusb (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_paddusb (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ unsigned char *dst = (unsigned char *) dst_p;
+ unsigned char *src = (unsigned char *) src_p;
+ unsigned char *res = (unsigned char *) res_p;
+ int i;
+ for (i = 0; i < 8; i++)
+ res[i] = saturate_ub (dst[i] + src[i]);
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_paddusb (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-paddusw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddusw.c
new file mode 100644
index 00000000000..c0f3c8601fb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddusw.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_paddusw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_paddusw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ unsigned short *dst = (unsigned short *) dst_p;
+ unsigned short *src = (unsigned short *) src_p;
+ unsigned short *res = (unsigned short *) res_p;
+ int i;
+ for (i = 0; i < 4; i++)
+ res[i] = saturate_uw (dst[i] + src[i]);
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_paddusw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-paddw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddw.c
new file mode 100644
index 00000000000..6921ae8da3c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-paddw.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_paddw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_paddw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ short *src = (short *) src_p;
+ short *res = (short *) res_p;
+ int i;
+ for (i = 0; i < 4; i++)
+ res[i] = dst[i] + src[i];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_paddw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pand.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pand.c
new file mode 100644
index 00000000000..0047a7e4d93
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pand.c
@@ -0,0 +1,44 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pand (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pand (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (unsigned long long *dst,
+ unsigned long long *src,
+ unsigned long long *res)
+{
+ res[0] = dst[0] & src[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pand (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pandn.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pandn.c
new file mode 100644
index 00000000000..a0cce55a4ea
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pandn.c
@@ -0,0 +1,44 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pandn (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pandn (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (unsigned long long *dst,
+ unsigned long long *src,
+ unsigned long long *res)
+{
+ res[0] = ~dst[0] & src[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pandn (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pavgb.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pavgb.c
new file mode 100644
index 00000000000..fbb586e1d21
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pavgb.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pavgb (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pavgb (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ unsigned char *dst = (unsigned char *) dst_p;
+ unsigned char *src = (unsigned char *) src_p;
+ unsigned char *res = (unsigned char *) res_p;
+ int i;
+ unsigned int tmp;
+ for (i = 0; i < 8; i++)
+ {
+ tmp = dst[i] + src[i] + 1;
+ res[i] = tmp >> 1;
+ }
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pavgb (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pavgw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pavgw.c
new file mode 100644
index 00000000000..81cdc213541
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pavgw.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pavgw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pavgw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ unsigned short *dst = (unsigned short *) dst_p;
+ unsigned short *src = (unsigned short *) src_p;
+ unsigned short *res = (unsigned short *) res_p;
+ int i;
+ unsigned int tmp;
+ for (i = 0; i < 4; i++)
+ {
+ tmp = dst[i] + src[i] + 1;
+ res[i] = tmp >> 1;
+ }
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pavgw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqb.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqb.c
new file mode 100644
index 00000000000..d355ba0a757
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqb.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pcmpeqb (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pcmpeqb (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ char *dst = (char *) dst_p;
+ char *src = (char *) src_p;
+ char *res = (char *) res_p;
+ int i;
+ for (i = 0; i < 8; i++)
+ res[i] = dst[i] == src[i] ? -1 : 0;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pcmpeqb (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqd.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqd.c
new file mode 100644
index 00000000000..bd896249212
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqd.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pcmpeqd (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pcmpeqd (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ int *dst = (int *) dst_p;
+ int *src = (int *) src_p;
+ int *res = (int *) res_p;
+ int i;
+ for (i = 0; i < 2; i++)
+ res[i] = dst[i] == src[i] ? -1 : 0;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pcmpeqd (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqw.c
new file mode 100644
index 00000000000..ae15437f1cb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpeqw.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pcmpeqw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pcmpeqw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ short *src = (short *) src_p;
+ short *res = (short *) res_p;
+ int i;
+ for (i = 0; i < 4; i++)
+ res[i] = dst[i] == src[i] ? -1 : 0;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pcmpeqw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtb.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtb.c
new file mode 100644
index 00000000000..f175c698803
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtb.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pcmpgtb (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pcmpgtb (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ char *dst = (char *) dst_p;
+ char *src = (char *) src_p;
+ char *res = (char *) res_p;
+ int i;
+ for (i = 0; i < 8; i++)
+ res[i] = dst[i] > src[i] ? -1 : 0;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pcmpgtb (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtd.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtd.c
new file mode 100644
index 00000000000..451afb54c3d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtd.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pcmpgtd (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pcmpgtd (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ int *dst = (int *) dst_p;
+ int *src = (int *) src_p;
+ int *res = (int *) res_p;
+ int i;
+ for (i = 0; i < 2; i++)
+ res[i] = dst[i] > src[i] ? -1 : 0;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pcmpgtd (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtw.c
new file mode 100644
index 00000000000..f3ebe8eedb9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pcmpgtw.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pcmpgtw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pcmpgtw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ short *src = (short *) src_p;
+ short *res = (short *) res_p;
+ int i;
+ for (i = 0; i < 4; i++)
+ res[i] = dst[i] > src[i] ? -1 : 0;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pcmpgtw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pextrw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pextrw.c
new file mode 100644
index 00000000000..bb48740a7ca
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pextrw.c
@@ -0,0 +1,59 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include <string.h>
+#include "sse2-check.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pextrw (__m64 *i, unsigned int imm, int *r)
+{
+ switch (imm)
+ {
+ case 0:
+ *r = _m_pextrw (*i, 0);
+ break;
+ case 1:
+ *r = _m_pextrw (*i, 1);
+ break;
+ case 2:
+ *r = _m_pextrw (*i, 2);
+ break;
+ case 3:
+ *r = _m_pextrw (*i, 3);
+ break;
+ default:
+ break;
+ }
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (__m64 *src_p, unsigned int imm, int *res_p)
+{
+ short *src = (short *) src_p;
+ if (imm < 4)
+ *res_p = src[imm];
+}
+
+static void
+sse2_test (void)
+{
+ int r, ck;
+ int i;
+ int failed = 0;
+ __v4hi y = { 3320, -3339, 48, 4392 };
+
+ /* Run the MMX tests */
+ for (i = 0; i < 4; i++)
+ {
+ test_pextrw ((__m64 *) &y, i, &r);
+ compute_correct_result ((__m64 *) &y, i, &ck);
+ if (r != ck)
+ failed++;
+ }
+
+ if (failed)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pinsrw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pinsrw.c
new file mode 100644
index 00000000000..c25ddd96daa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pinsrw.c
@@ -0,0 +1,61 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include <string.h>
+#include "sse2-check.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pinsrw (__m64 *i, int val, unsigned int imm, int *r)
+{
+ switch (imm)
+ {
+ case 0:
+ *(__m64 *) r = _m_pinsrw (*i, val, 0);
+ break;
+ case 1:
+ *(__m64 *) r = _m_pinsrw (*i, val, 1);
+ break;
+ case 2:
+ *(__m64 *) r = _m_pinsrw (*i, val, 2);
+ break;
+ case 3:
+ *(__m64 *) r = _m_pinsrw (*i, val, 3);
+ break;
+ default:
+ break;
+ }
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (__m64 *src_p, int val, unsigned int imm,
+ int *res_p)
+{
+ short *res = (short *) res_p;
+ *(__m64 *) res_p = *src_p;
+ if (imm < 4)
+ res[imm] = val;
+}
+
+static void
+sse2_test (void)
+{
+ int r, ck;
+ int i;
+ int failed = 0;
+ __v4hi y = { 3320, -3339, 48, 4392 };
+
+ /* Run the MMX tests */
+ for (i = 0; i < 4; i++)
+ {
+ test_pinsrw ((__m64 *) &y, 0x1234, i, &r);
+ compute_correct_result ((__m64 *) &y, 0x1234, i, &ck);
+ if (r != ck)
+ failed++;
+ }
+
+ if (failed)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pmaddwd.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmaddwd.c
new file mode 100644
index 00000000000..1eace62088a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmaddwd.c
@@ -0,0 +1,47 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pmaddwd (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pmaddwd (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ short *src = (short *) src_p;
+ int *res = (int *) res_p;
+ res[0] = dst[0] * src[0] + dst[1] * src[1];
+ res[1] = dst[2] * src[2] + dst[3] * src[3];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pmaddwd (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pmaxsw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmaxsw.c
new file mode 100644
index 00000000000..ec3b68f09d5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmaxsw.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pmaxsw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pmaxsw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ short *src = (short *) src_p;
+ short *res = (short *) res_p;
+ int i;
+ for (i = 0; i < 4; i++)
+ res[i] = dst[i] > src[i] ? dst[i] : src[i];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pmaxsw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pmaxub.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmaxub.c
new file mode 100644
index 00000000000..085d6a0eaf5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmaxub.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pmaxub (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pmaxub (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ unsigned char *dst = (unsigned char *) dst_p;
+ unsigned char *src = (unsigned char *) src_p;
+ unsigned char *res = (unsigned char *) res_p;
+ int i;
+ for (i = 0; i < 8; i++)
+ res[i] = dst[i] > src[i] ? dst[i] : src[i];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pmaxub (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pminsw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pminsw.c
new file mode 100644
index 00000000000..f64d9b2e2b7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pminsw.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pminsw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pminsw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ short *src = (short *) src_p;
+ short *res = (short *) res_p;
+ int i;
+ for (i = 0; i < 4; i++)
+ res[i] = dst[i] < src[i] ? dst[i] : src[i];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pminsw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pminub.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pminub.c
new file mode 100644
index 00000000000..372e5ef8764
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pminub.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pminub (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pminub (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ unsigned char *dst = (unsigned char *) dst_p;
+ unsigned char *src = (unsigned char *) src_p;
+ unsigned char *res = (unsigned char *) res_p;
+ int i;
+ for (i = 0; i < 8; i++)
+ res[i] = dst[i] < src[i] ? dst[i] : src[i];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pminub (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pmovmskb.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmovmskb.c
new file mode 100644
index 00000000000..46b18d20558
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmovmskb.c
@@ -0,0 +1,46 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pmovmskb (long long *ll1, int *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ *r = _m_pmovmskb (t1);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *src_p, int *res_p)
+{
+ char *src = (char *) src_p;
+ int i;
+ int res = 0;
+ for (i = 0; i < 8; i++)
+ res |= ((src[i] & 0x80) >> 7) << i;
+ *res_p = res;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ int r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i++)
+ {
+ test_pmovmskb (&MMXops[i], &r);
+ compute_correct_result (&MMXops[i], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pmulhuw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmulhuw.c
new file mode 100644
index 00000000000..0f8fcbe02b8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmulhuw.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pmulhuw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pmulhuw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ unsigned short *dst = (unsigned short *) dst_p;
+ unsigned short *src = (unsigned short *) src_p;
+ unsigned short *res = (unsigned short *) res_p;
+ int i;
+ for (i = 0; i < 4; i++)
+ {
+ unsigned int t = dst[i] * src[i];
+ res[i] = t >> 16;
+ }
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pmulhuw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pmulhw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmulhw.c
new file mode 100644
index 00000000000..b89c58f3b6d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmulhw.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pmulhw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pmulhw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ short *src = (short *) src_p;
+ short *res = (short *) res_p;
+ int tmp;
+ int i;
+ for (i = 0; i < 4; i++)
+ {
+ tmp = dst[i] * src[i];
+ tmp >>= 16;
+ res[i] = tmp;
+ }
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pmulhw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pmullw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmullw.c
new file mode 100644
index 00000000000..13b89622560
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmullw.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pmullw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pmullw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ short *src = (short *) src_p;
+ short *res = (short *) res_p;
+ int tmp;
+ int i;
+ for (i = 0; i < 4; i++)
+ {
+ tmp = dst[i] * src[i];
+ res[i] = tmp;
+ }
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pmullw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pmuludq.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmuludq.c
new file mode 100644
index 00000000000..819e0e13c03
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pmuludq.c
@@ -0,0 +1,47 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pmuludq (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _mm_mul_su32 (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ unsigned int *dst = (unsigned int*) dst_p;
+ unsigned int *src = (unsigned int *) src_p;
+ unsigned long long *res = (unsigned long long *) res_p;
+ res[0] = dst[0];
+ res[0] *= src[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pmuludq (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-por.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-por.c
new file mode 100644
index 00000000000..9dc6072fffa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-por.c
@@ -0,0 +1,44 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_por (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_por (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (unsigned long long *dst,
+ unsigned long long *src,
+ unsigned long long *res)
+{
+ res[0] = dst[0] | src[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_por (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psadbw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psadbw.c
new file mode 100644
index 00000000000..223d3b280bd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psadbw.c
@@ -0,0 +1,58 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psadbw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_psadbw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ unsigned char *dst = (unsigned char *) dst_p;
+ unsigned char *src = (unsigned char *) src_p;
+ unsigned short *res = (unsigned short *) res_p;
+ int i;
+ int tmp;
+ unsigned int sum = 0;
+ for (i = 0; i < 8; i++)
+ {
+ tmp = dst[i] - src[i];
+ if (tmp < 0)
+ tmp = -tmp;
+ sum += tmp;
+ }
+ res[0] = sum;
+ for (i = 1; i < 4; i++)
+ res[i] = 0;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_psadbw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pshufw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pshufw.c
new file mode 100644
index 00000000000..b7236586216
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pshufw.c
@@ -0,0 +1,248 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pshufw (long long *ll1, unsigned int imm, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ switch (imm)
+ {
+ case 0:
+ *(__m64 *) r = _m_pshufw (t1, 0);
+ break;
+ case 1:
+ *(__m64 *) r = _m_pshufw (t1, 1);
+ break;
+ case 2:
+ *(__m64 *) r = _m_pshufw (t1, 2);
+ break;
+ case 3:
+ *(__m64 *) r = _m_pshufw (t1, 3);
+ break;
+ case 4:
+ *(__m64 *) r = _m_pshufw (t1, 4);
+ break;
+ case 5:
+ *(__m64 *) r = _m_pshufw (t1, 5);
+ break;
+ case 6:
+ *(__m64 *) r = _m_pshufw (t1, 6);
+ break;
+ case 7:
+ *(__m64 *) r = _m_pshufw (t1, 7);
+ break;
+ case 8:
+ *(__m64 *) r = _m_pshufw (t1, 8);
+ break;
+ case 9:
+ *(__m64 *) r = _m_pshufw (t1, 9);
+ break;
+ case 10:
+ *(__m64 *) r = _m_pshufw (t1, 10);
+ break;
+ case 11:
+ *(__m64 *) r = _m_pshufw (t1, 11);
+ break;
+ case 12:
+ *(__m64 *) r = _m_pshufw (t1, 12);
+ break;
+ case 13:
+ *(__m64 *) r = _m_pshufw (t1, 13);
+ break;
+ case 14:
+ *(__m64 *) r = _m_pshufw (t1, 14);
+ break;
+ case 15:
+ *(__m64 *) r = _m_pshufw (t1, 15);
+ break;
+ case 16:
+ *(__m64 *) r = _m_pshufw (t1, 16);
+ break;
+ case 17:
+ *(__m64 *) r = _m_pshufw (t1, 17);
+ break;
+ case 18:
+ *(__m64 *) r = _m_pshufw (t1, 18);
+ break;
+ case 19:
+ *(__m64 *) r = _m_pshufw (t1, 19);
+ break;
+ case 20:
+ *(__m64 *) r = _m_pshufw (t1, 20);
+ break;
+ case 21:
+ *(__m64 *) r = _m_pshufw (t1, 21);
+ break;
+ case 22:
+ *(__m64 *) r = _m_pshufw (t1, 22);
+ break;
+ case 23:
+ *(__m64 *) r = _m_pshufw (t1, 23);
+ break;
+ case 24:
+ *(__m64 *) r = _m_pshufw (t1, 24);
+ break;
+ case 25:
+ *(__m64 *) r = _m_pshufw (t1, 25);
+ break;
+ case 26:
+ *(__m64 *) r = _m_pshufw (t1, 26);
+ break;
+ case 27:
+ *(__m64 *) r = _m_pshufw (t1, 27);
+ break;
+ case 28:
+ *(__m64 *) r = _m_pshufw (t1, 28);
+ break;
+ case 29:
+ *(__m64 *) r = _m_pshufw (t1, 29);
+ break;
+ case 30:
+ *(__m64 *) r = _m_pshufw (t1, 30);
+ break;
+ case 31:
+ *(__m64 *) r = _m_pshufw (t1, 31);
+ break;
+ case 32:
+ *(__m64 *) r = _m_pshufw (t1, 32);
+ break;
+ case 33:
+ *(__m64 *) r = _m_pshufw (t1, 33);
+ break;
+ case 34:
+ *(__m64 *) r = _m_pshufw (t1, 34);
+ break;
+ case 35:
+ *(__m64 *) r = _m_pshufw (t1, 35);
+ break;
+ case 36:
+ *(__m64 *) r = _m_pshufw (t1, 36);
+ break;
+ case 37:
+ *(__m64 *) r = _m_pshufw (t1, 37);
+ break;
+ case 38:
+ *(__m64 *) r = _m_pshufw (t1, 38);
+ break;
+ case 39:
+ *(__m64 *) r = _m_pshufw (t1, 39);
+ break;
+ case 40:
+ *(__m64 *) r = _m_pshufw (t1, 40);
+ break;
+ case 41:
+ *(__m64 *) r = _m_pshufw (t1, 41);
+ break;
+ case 42:
+ *(__m64 *) r = _m_pshufw (t1, 42);
+ break;
+ case 43:
+ *(__m64 *) r = _m_pshufw (t1, 43);
+ break;
+ case 44:
+ *(__m64 *) r = _m_pshufw (t1, 44);
+ break;
+ case 45:
+ *(__m64 *) r = _m_pshufw (t1, 45);
+ break;
+ case 46:
+ *(__m64 *) r = _m_pshufw (t1, 46);
+ break;
+ case 47:
+ *(__m64 *) r = _m_pshufw (t1, 47);
+ break;
+ case 48:
+ *(__m64 *) r = _m_pshufw (t1, 48);
+ break;
+ case 49:
+ *(__m64 *) r = _m_pshufw (t1, 49);
+ break;
+ case 50:
+ *(__m64 *) r = _m_pshufw (t1, 50);
+ break;
+ case 51:
+ *(__m64 *) r = _m_pshufw (t1, 51);
+ break;
+ case 52:
+ *(__m64 *) r = _m_pshufw (t1, 52);
+ break;
+ case 53:
+ *(__m64 *) r = _m_pshufw (t1, 53);
+ break;
+ case 54:
+ *(__m64 *) r = _m_pshufw (t1, 54);
+ break;
+ case 55:
+ *(__m64 *) r = _m_pshufw (t1, 55);
+ break;
+ case 56:
+ *(__m64 *) r = _m_pshufw (t1, 56);
+ break;
+ case 57:
+ *(__m64 *) r = _m_pshufw (t1, 57);
+ break;
+ case 58:
+ *(__m64 *) r = _m_pshufw (t1, 58);
+ break;
+ case 59:
+ *(__m64 *) r = _m_pshufw (t1, 59);
+ break;
+ case 60:
+ *(__m64 *) r = _m_pshufw (t1, 60);
+ break;
+ case 61:
+ *(__m64 *) r = _m_pshufw (t1, 61);
+ break;
+ case 62:
+ *(__m64 *) r = _m_pshufw (t1, 62);
+ break;
+ case 63:
+ *(__m64 *) r = _m_pshufw (t1, 63);
+ break;
+ default:
+ break;
+ }
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *src_p, unsigned int imm,
+ long long *res_p)
+{
+ unsigned long long src = *(unsigned long long *) src_p;
+ unsigned short *res = (unsigned short *) res_p;
+ int i;
+ unsigned int shift;
+ for (i = 0; i < 4; i++)
+ {
+ shift = ((imm >> (2 * i)) & 0x3) * 16;
+ res[i] = (src >> shift) & 0xffff;
+ }
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i++)
+ {
+ if (i > 63)
+ break;
+ test_pshufw (&MMXops[i], i, &r);
+ compute_correct_result (&MMXops[i], i, &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pslld.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pslld.c
new file mode 100644
index 00000000000..86983c0b848
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pslld.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pslld (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pslld (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ unsigned int *dst = (unsigned int *) dst_p;
+ unsigned int *src = (unsigned int *) src_p;
+ unsigned int *res = (unsigned int *) res_p;
+ int i;
+ if (src[1] || src[0] > 31)
+ for (i = 0; i < 2; i++)
+ res[i] = 0;
+ else
+ for (i = 0; i < 2; i++)
+ res[i] = dst[i] << src[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pslld (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pslldi.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pslldi.c
new file mode 100644
index 00000000000..07f0ae9d83c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pslldi.c
@@ -0,0 +1,153 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psllwi (long long *ll1, unsigned int imm, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ switch (imm)
+ {
+ case 0:
+ *(__m64 *) r = _m_pslldi (t1, 0);
+ break;
+ case 1:
+ *(__m64 *) r = _m_pslldi (t1, 1);
+ break;
+ case 2:
+ *(__m64 *) r = _m_pslldi (t1, 2);
+ break;
+ case 3:
+ *(__m64 *) r = _m_pslldi (t1, 3);
+ break;
+ case 4:
+ *(__m64 *) r = _m_pslldi (t1, 4);
+ break;
+ case 5:
+ *(__m64 *) r = _m_pslldi (t1, 5);
+ break;
+ case 6:
+ *(__m64 *) r = _m_pslldi (t1, 6);
+ break;
+ case 7:
+ *(__m64 *) r = _m_pslldi (t1, 7);
+ break;
+ case 8:
+ *(__m64 *) r = _m_pslldi (t1, 8);
+ break;
+ case 9:
+ *(__m64 *) r = _m_pslldi (t1, 9);
+ break;
+ case 10:
+ *(__m64 *) r = _m_pslldi (t1, 10);
+ break;
+ case 11:
+ *(__m64 *) r = _m_pslldi (t1, 11);
+ break;
+ case 12:
+ *(__m64 *) r = _m_pslldi (t1, 12);
+ break;
+ case 13:
+ *(__m64 *) r = _m_pslldi (t1, 13);
+ break;
+ case 14:
+ *(__m64 *) r = _m_pslldi (t1, 14);
+ break;
+ case 15:
+ *(__m64 *) r = _m_pslldi (t1, 15);
+ break;
+ case 16:
+ *(__m64 *) r = _m_pslldi (t1, 16);
+ break;
+ case 17:
+ *(__m64 *) r = _m_pslldi (t1, 17);
+ break;
+ case 18:
+ *(__m64 *) r = _m_pslldi (t1, 18);
+ break;
+ case 19:
+ *(__m64 *) r = _m_pslldi (t1, 19);
+ break;
+ case 20:
+ *(__m64 *) r = _m_pslldi (t1, 20);
+ break;
+ case 21:
+ *(__m64 *) r = _m_pslldi (t1, 21);
+ break;
+ case 22:
+ *(__m64 *) r = _m_pslldi (t1, 22);
+ break;
+ case 23:
+ *(__m64 *) r = _m_pslldi (t1, 23);
+ break;
+ case 24:
+ *(__m64 *) r = _m_pslldi (t1, 24);
+ break;
+ case 25:
+ *(__m64 *) r = _m_pslldi (t1, 25);
+ break;
+ case 26:
+ *(__m64 *) r = _m_pslldi (t1, 26);
+ break;
+ case 27:
+ *(__m64 *) r = _m_pslldi (t1, 27);
+ break;
+ case 28:
+ *(__m64 *) r = _m_pslldi (t1, 28);
+ break;
+ case 29:
+ *(__m64 *) r = _m_pslldi (t1, 29);
+ break;
+ case 30:
+ *(__m64 *) r = _m_pslldi (t1, 30);
+ break;
+ case 31:
+ *(__m64 *) r = _m_pslldi (t1, 31);
+ break;
+ default:
+ *(__m64 *) r = _m_pslldi (t1, 32);
+ break;
+ }
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *src_p, unsigned int imm,
+ long long *res_p)
+{
+ unsigned int *src = (unsigned int *) src_p;
+ unsigned int *res = (unsigned int *) res_p;
+ int i;
+ if (imm > 31)
+ for (i = 0; i < 2; i++)
+ res[i] = 0;
+ else
+ for (i = 0; i < 2; i++)
+ res[i] = src[i] << imm;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ unsigned int count;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i++)
+ {
+ count = MMXops[i];
+ test_psllwi (&MMXops[i], count, &r);
+ compute_correct_result (&MMXops[i], count, &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psllq.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psllq.c
new file mode 100644
index 00000000000..37d2e0b22cc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psllq.c
@@ -0,0 +1,47 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psllq (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_psllq (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (unsigned long long *dst,
+ unsigned long long *src,
+ unsigned long long *res)
+{
+ if (src[0] > 63)
+ res[0] = 0;
+ else
+ res[0] = dst[0] << src[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_psllq (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psllqi.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psllqi.c
new file mode 100644
index 00000000000..655b369e04f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psllqi.c
@@ -0,0 +1,245 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psllwi (long long *ll1, unsigned int imm, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ switch (imm)
+ {
+ case 0:
+ *(__m64 *) r = _m_psllqi (t1, 0);
+ break;
+ case 1:
+ *(__m64 *) r = _m_psllqi (t1, 1);
+ break;
+ case 2:
+ *(__m64 *) r = _m_psllqi (t1, 2);
+ break;
+ case 3:
+ *(__m64 *) r = _m_psllqi (t1, 3);
+ break;
+ case 4:
+ *(__m64 *) r = _m_psllqi (t1, 4);
+ break;
+ case 5:
+ *(__m64 *) r = _m_psllqi (t1, 5);
+ break;
+ case 6:
+ *(__m64 *) r = _m_psllqi (t1, 6);
+ break;
+ case 7:
+ *(__m64 *) r = _m_psllqi (t1, 7);
+ break;
+ case 8:
+ *(__m64 *) r = _m_psllqi (t1, 8);
+ break;
+ case 9:
+ *(__m64 *) r = _m_psllqi (t1, 9);
+ break;
+ case 10:
+ *(__m64 *) r = _m_psllqi (t1, 10);
+ break;
+ case 11:
+ *(__m64 *) r = _m_psllqi (t1, 11);
+ break;
+ case 12:
+ *(__m64 *) r = _m_psllqi (t1, 12);
+ break;
+ case 13:
+ *(__m64 *) r = _m_psllqi (t1, 13);
+ break;
+ case 14:
+ *(__m64 *) r = _m_psllqi (t1, 14);
+ break;
+ case 15:
+ *(__m64 *) r = _m_psllqi (t1, 15);
+ break;
+ case 16:
+ *(__m64 *) r = _m_psllqi (t1, 16);
+ break;
+ case 17:
+ *(__m64 *) r = _m_psllqi (t1, 17);
+ break;
+ case 18:
+ *(__m64 *) r = _m_psllqi (t1, 18);
+ break;
+ case 19:
+ *(__m64 *) r = _m_psllqi (t1, 19);
+ break;
+ case 20:
+ *(__m64 *) r = _m_psllqi (t1, 20);
+ break;
+ case 21:
+ *(__m64 *) r = _m_psllqi (t1, 21);
+ break;
+ case 22:
+ *(__m64 *) r = _m_psllqi (t1, 22);
+ break;
+ case 23:
+ *(__m64 *) r = _m_psllqi (t1, 23);
+ break;
+ case 24:
+ *(__m64 *) r = _m_psllqi (t1, 24);
+ break;
+ case 25:
+ *(__m64 *) r = _m_psllqi (t1, 25);
+ break;
+ case 26:
+ *(__m64 *) r = _m_psllqi (t1, 26);
+ break;
+ case 27:
+ *(__m64 *) r = _m_psllqi (t1, 27);
+ break;
+ case 28:
+ *(__m64 *) r = _m_psllqi (t1, 28);
+ break;
+ case 29:
+ *(__m64 *) r = _m_psllqi (t1, 29);
+ break;
+ case 30:
+ *(__m64 *) r = _m_psllqi (t1, 30);
+ break;
+ case 31:
+ *(__m64 *) r = _m_psllqi (t1, 31);
+ break;
+ case 32:
+ *(__m64 *) r = _m_psllqi (t1, 32);
+ break;
+ case 33:
+ *(__m64 *) r = _m_psllqi (t1, 33);
+ break;
+ case 34:
+ *(__m64 *) r = _m_psllqi (t1, 34);
+ break;
+ case 35:
+ *(__m64 *) r = _m_psllqi (t1, 35);
+ break;
+ case 36:
+ *(__m64 *) r = _m_psllqi (t1, 36);
+ break;
+ case 37:
+ *(__m64 *) r = _m_psllqi (t1, 37);
+ break;
+ case 38:
+ *(__m64 *) r = _m_psllqi (t1, 38);
+ break;
+ case 39:
+ *(__m64 *) r = _m_psllqi (t1, 39);
+ break;
+ case 40:
+ *(__m64 *) r = _m_psllqi (t1, 40);
+ break;
+ case 41:
+ *(__m64 *) r = _m_psllqi (t1, 41);
+ break;
+ case 42:
+ *(__m64 *) r = _m_psllqi (t1, 42);
+ break;
+ case 43:
+ *(__m64 *) r = _m_psllqi (t1, 43);
+ break;
+ case 44:
+ *(__m64 *) r = _m_psllqi (t1, 44);
+ break;
+ case 45:
+ *(__m64 *) r = _m_psllqi (t1, 45);
+ break;
+ case 46:
+ *(__m64 *) r = _m_psllqi (t1, 46);
+ break;
+ case 47:
+ *(__m64 *) r = _m_psllqi (t1, 47);
+ break;
+ case 48:
+ *(__m64 *) r = _m_psllqi (t1, 48);
+ break;
+ case 49:
+ *(__m64 *) r = _m_psllqi (t1, 49);
+ break;
+ case 50:
+ *(__m64 *) r = _m_psllqi (t1, 50);
+ break;
+ case 51:
+ *(__m64 *) r = _m_psllqi (t1, 51);
+ break;
+ case 52:
+ *(__m64 *) r = _m_psllqi (t1, 52);
+ break;
+ case 53:
+ *(__m64 *) r = _m_psllqi (t1, 53);
+ break;
+ case 54:
+ *(__m64 *) r = _m_psllqi (t1, 54);
+ break;
+ case 55:
+ *(__m64 *) r = _m_psllqi (t1, 55);
+ break;
+ case 56:
+ *(__m64 *) r = _m_psllqi (t1, 56);
+ break;
+ case 57:
+ *(__m64 *) r = _m_psllqi (t1, 57);
+ break;
+ case 58:
+ *(__m64 *) r = _m_psllqi (t1, 58);
+ break;
+ case 59:
+ *(__m64 *) r = _m_psllqi (t1, 59);
+ break;
+ case 60:
+ *(__m64 *) r = _m_psllqi (t1, 60);
+ break;
+ case 61:
+ *(__m64 *) r = _m_psllqi (t1, 61);
+ break;
+ case 62:
+ *(__m64 *) r = _m_psllqi (t1, 62);
+ break;
+ case 63:
+ *(__m64 *) r = _m_psllqi (t1, 63);
+ break;
+ default:
+ *(__m64 *) r = _m_psllqi (t1, 64);
+ break;
+ }
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (unsigned long long *src, unsigned int imm,
+ unsigned long long *res)
+{
+ int i;
+ if (imm > 63)
+ res[0] = 0;
+ else
+ res[0] = src[0] << imm;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ unsigned int count;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i++)
+ {
+ count = MMXops[i];
+ test_psllwi (&MMXops[i], count, &r);
+ compute_correct_result (&MMXops[i], count, &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psllw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psllw.c
new file mode 100644
index 00000000000..7c27d144185
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psllw.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psllw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_psllw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ unsigned short *dst = (unsigned short *) dst_p;
+ unsigned int *src = (unsigned int *) src_p;
+ unsigned short *res = (unsigned short *) res_p;
+ int i;
+ if (src[1] || src[0] > 15)
+ for (i = 0; i < 4; i++)
+ res[i] = 0;
+ else
+ for (i = 0; i < 4; i++)
+ res[i] = dst[i] << src[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_psllw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psllwi.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psllwi.c
new file mode 100644
index 00000000000..458463b2073
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psllwi.c
@@ -0,0 +1,105 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psllwi (long long *ll1, unsigned int imm, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ switch (imm)
+ {
+ case 0:
+ *(__m64 *) r = _m_psllwi (t1, 0);
+ break;
+ case 1:
+ *(__m64 *) r = _m_psllwi (t1, 1);
+ break;
+ case 2:
+ *(__m64 *) r = _m_psllwi (t1, 2);
+ break;
+ case 3:
+ *(__m64 *) r = _m_psllwi (t1, 3);
+ break;
+ case 4:
+ *(__m64 *) r = _m_psllwi (t1, 4);
+ break;
+ case 5:
+ *(__m64 *) r = _m_psllwi (t1, 5);
+ break;
+ case 6:
+ *(__m64 *) r = _m_psllwi (t1, 6);
+ break;
+ case 7:
+ *(__m64 *) r = _m_psllwi (t1, 7);
+ break;
+ case 8:
+ *(__m64 *) r = _m_psllwi (t1, 8);
+ break;
+ case 9:
+ *(__m64 *) r = _m_psllwi (t1, 9);
+ break;
+ case 10:
+ *(__m64 *) r = _m_psllwi (t1, 10);
+ break;
+ case 11:
+ *(__m64 *) r = _m_psllwi (t1, 11);
+ break;
+ case 12:
+ *(__m64 *) r = _m_psllwi (t1, 12);
+ break;
+ case 13:
+ *(__m64 *) r = _m_psllwi (t1, 13);
+ break;
+ case 14:
+ *(__m64 *) r = _m_psllwi (t1, 14);
+ break;
+ case 15:
+ *(__m64 *) r = _m_psllwi (t1, 15);
+ break;
+ default:
+ *(__m64 *) r = _m_psllwi (t1, 16);
+ break;
+ }
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *src_p, unsigned int imm,
+ long long *res_p)
+{
+ unsigned short *src = (unsigned short *) src_p;
+ unsigned short *res = (unsigned short *) res_p;
+ int i;
+ if (imm > 15)
+ for (i = 0; i < 4; i++)
+ res[i] = 0;
+ else
+ for (i = 0; i < 4; i++)
+ res[i] = src[i] << imm;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ unsigned int count;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i++)
+ {
+ count = MMXops[i];
+ test_psllwi (&MMXops[i], count, &r);
+ compute_correct_result (&MMXops[i], count, &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psrad.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrad.c
new file mode 100644
index 00000000000..494bcb4ccbd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrad.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psrad (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_psrad (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ int *dst = (int *) dst_p;
+ unsigned int *src = (unsigned int *) src_p;
+ int *res = (int *) res_p;
+ int i;
+ if (src[1] || src[0] > 31)
+ for (i = 0; i < 2; i++)
+ res[i] = dst[i] < 0 ? -1 : 0;
+ else
+ for (i = 0; i < 2; i++)
+ res[i] = dst[i] >> src[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_psrad (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psradi.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psradi.c
new file mode 100644
index 00000000000..4ae2ac848bc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psradi.c
@@ -0,0 +1,153 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psradi (long long *ll1, unsigned int imm, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ switch (imm)
+ {
+ case 0:
+ *(__m64 *) r = _m_psradi (t1, 0);
+ break;
+ case 1:
+ *(__m64 *) r = _m_psradi (t1, 1);
+ break;
+ case 2:
+ *(__m64 *) r = _m_psradi (t1, 2);
+ break;
+ case 3:
+ *(__m64 *) r = _m_psradi (t1, 3);
+ break;
+ case 4:
+ *(__m64 *) r = _m_psradi (t1, 4);
+ break;
+ case 5:
+ *(__m64 *) r = _m_psradi (t1, 5);
+ break;
+ case 6:
+ *(__m64 *) r = _m_psradi (t1, 6);
+ break;
+ case 7:
+ *(__m64 *) r = _m_psradi (t1, 7);
+ break;
+ case 8:
+ *(__m64 *) r = _m_psradi (t1, 8);
+ break;
+ case 9:
+ *(__m64 *) r = _m_psradi (t1, 9);
+ break;
+ case 10:
+ *(__m64 *) r = _m_psradi (t1, 10);
+ break;
+ case 11:
+ *(__m64 *) r = _m_psradi (t1, 11);
+ break;
+ case 12:
+ *(__m64 *) r = _m_psradi (t1, 12);
+ break;
+ case 13:
+ *(__m64 *) r = _m_psradi (t1, 13);
+ break;
+ case 14:
+ *(__m64 *) r = _m_psradi (t1, 14);
+ break;
+ case 15:
+ *(__m64 *) r = _m_psradi (t1, 15);
+ break;
+ case 16:
+ *(__m64 *) r = _m_psradi (t1, 16);
+ break;
+ case 17:
+ *(__m64 *) r = _m_psradi (t1, 17);
+ break;
+ case 18:
+ *(__m64 *) r = _m_psradi (t1, 18);
+ break;
+ case 19:
+ *(__m64 *) r = _m_psradi (t1, 19);
+ break;
+ case 20:
+ *(__m64 *) r = _m_psradi (t1, 20);
+ break;
+ case 21:
+ *(__m64 *) r = _m_psradi (t1, 21);
+ break;
+ case 22:
+ *(__m64 *) r = _m_psradi (t1, 22);
+ break;
+ case 23:
+ *(__m64 *) r = _m_psradi (t1, 23);
+ break;
+ case 24:
+ *(__m64 *) r = _m_psradi (t1, 24);
+ break;
+ case 25:
+ *(__m64 *) r = _m_psradi (t1, 25);
+ break;
+ case 26:
+ *(__m64 *) r = _m_psradi (t1, 26);
+ break;
+ case 27:
+ *(__m64 *) r = _m_psradi (t1, 27);
+ break;
+ case 28:
+ *(__m64 *) r = _m_psradi (t1, 28);
+ break;
+ case 29:
+ *(__m64 *) r = _m_psradi (t1, 29);
+ break;
+ case 30:
+ *(__m64 *) r = _m_psradi (t1, 30);
+ break;
+ case 31:
+ *(__m64 *) r = _m_psradi (t1, 31);
+ break;
+ default:
+ *(__m64 *) r = _m_psradi (t1, 32);
+ break;
+ }
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *src_p, unsigned int imm,
+ long long *res_p)
+{
+ int *src = (int *) src_p;
+ int *res = (int *) res_p;
+ int i;
+ if (imm > 31)
+ for (i = 0; i < 2; i++)
+ res[i] = src[i] < 0 ? -1 : 0;
+ else
+ for (i = 0; i < 2; i++)
+ res[i] = src[i] >> imm;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ unsigned int count;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i++)
+ {
+ count = MMXops[i];
+ test_psradi (&MMXops[i], count, &r);
+ compute_correct_result (&MMXops[i], count, &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psraw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psraw.c
new file mode 100644
index 00000000000..dd097f2e16b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psraw.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psraw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_psraw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ unsigned int *src = (unsigned int *) src_p;
+ short *res = (short *) res_p;
+ int i;
+ if (src[1] || src[0] > 15)
+ for (i = 0; i < 4; i++)
+ res[i] = dst[i] < 0 ? -1 : 0;
+ else
+ for (i = 0; i < 4; i++)
+ res[i] = dst[i] >> src[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_psraw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psrawi.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrawi.c
new file mode 100644
index 00000000000..30a2d8bd08b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrawi.c
@@ -0,0 +1,105 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psrawi (long long *ll1, unsigned int imm, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ switch (imm)
+ {
+ case 0:
+ *(__m64 *) r = _m_psrawi (t1, 0);
+ break;
+ case 1:
+ *(__m64 *) r = _m_psrawi (t1, 1);
+ break;
+ case 2:
+ *(__m64 *) r = _m_psrawi (t1, 2);
+ break;
+ case 3:
+ *(__m64 *) r = _m_psrawi (t1, 3);
+ break;
+ case 4:
+ *(__m64 *) r = _m_psrawi (t1, 4);
+ break;
+ case 5:
+ *(__m64 *) r = _m_psrawi (t1, 5);
+ break;
+ case 6:
+ *(__m64 *) r = _m_psrawi (t1, 6);
+ break;
+ case 7:
+ *(__m64 *) r = _m_psrawi (t1, 7);
+ break;
+ case 8:
+ *(__m64 *) r = _m_psrawi (t1, 8);
+ break;
+ case 9:
+ *(__m64 *) r = _m_psrawi (t1, 9);
+ break;
+ case 10:
+ *(__m64 *) r = _m_psrawi (t1, 10);
+ break;
+ case 11:
+ *(__m64 *) r = _m_psrawi (t1, 11);
+ break;
+ case 12:
+ *(__m64 *) r = _m_psrawi (t1, 12);
+ break;
+ case 13:
+ *(__m64 *) r = _m_psrawi (t1, 13);
+ break;
+ case 14:
+ *(__m64 *) r = _m_psrawi (t1, 14);
+ break;
+ case 15:
+ *(__m64 *) r = _m_psrawi (t1, 15);
+ break;
+ default:
+ *(__m64 *) r = _m_psrawi (t1, 16);
+ break;
+ }
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *src_p, unsigned int imm,
+ long long *res_p)
+{
+ short *src = (short *) src_p;
+ short *res = (short *) res_p;
+ int i;
+ if (imm > 15)
+ for (i = 0; i < 4; i++)
+ res[i] = src[i] < 0 ? -1 : 0;
+ else
+ for (i = 0; i < 4; i++)
+ res[i] = src[i] >> imm;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ unsigned int count;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i++)
+ {
+ count = MMXops[i];
+ test_psrawi (&MMXops[i], count, &r);
+ compute_correct_result (&MMXops[i], count, &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psrld.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrld.c
new file mode 100644
index 00000000000..3858be180a8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrld.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psrld (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_psrld (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ int *dst = (int *) dst_p;
+ unsigned int *src = (unsigned int *) src_p;
+ int *res = (int *) res_p;
+ int i;
+ if (src[1] || src[0] > 31)
+ for (i = 0; i < 2; i++)
+ res[i] = 0;
+ else
+ for (i = 0; i < 2; i++)
+ res[i] = dst[i] >> src[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_psrld (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psrldi.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrldi.c
new file mode 100644
index 00000000000..9315b6f0137
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrldi.c
@@ -0,0 +1,153 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psrldi (long long *ll1, unsigned int imm, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ switch (imm)
+ {
+ case 0:
+ *(__m64 *) r = _m_psrldi (t1, 0);
+ break;
+ case 1:
+ *(__m64 *) r = _m_psrldi (t1, 1);
+ break;
+ case 2:
+ *(__m64 *) r = _m_psrldi (t1, 2);
+ break;
+ case 3:
+ *(__m64 *) r = _m_psrldi (t1, 3);
+ break;
+ case 4:
+ *(__m64 *) r = _m_psrldi (t1, 4);
+ break;
+ case 5:
+ *(__m64 *) r = _m_psrldi (t1, 5);
+ break;
+ case 6:
+ *(__m64 *) r = _m_psrldi (t1, 6);
+ break;
+ case 7:
+ *(__m64 *) r = _m_psrldi (t1, 7);
+ break;
+ case 8:
+ *(__m64 *) r = _m_psrldi (t1, 8);
+ break;
+ case 9:
+ *(__m64 *) r = _m_psrldi (t1, 9);
+ break;
+ case 10:
+ *(__m64 *) r = _m_psrldi (t1, 10);
+ break;
+ case 11:
+ *(__m64 *) r = _m_psrldi (t1, 11);
+ break;
+ case 12:
+ *(__m64 *) r = _m_psrldi (t1, 12);
+ break;
+ case 13:
+ *(__m64 *) r = _m_psrldi (t1, 13);
+ break;
+ case 14:
+ *(__m64 *) r = _m_psrldi (t1, 14);
+ break;
+ case 15:
+ *(__m64 *) r = _m_psrldi (t1, 15);
+ break;
+ case 16:
+ *(__m64 *) r = _m_psrldi (t1, 16);
+ break;
+ case 17:
+ *(__m64 *) r = _m_psrldi (t1, 17);
+ break;
+ case 18:
+ *(__m64 *) r = _m_psrldi (t1, 18);
+ break;
+ case 19:
+ *(__m64 *) r = _m_psrldi (t1, 19);
+ break;
+ case 20:
+ *(__m64 *) r = _m_psrldi (t1, 20);
+ break;
+ case 21:
+ *(__m64 *) r = _m_psrldi (t1, 21);
+ break;
+ case 22:
+ *(__m64 *) r = _m_psrldi (t1, 22);
+ break;
+ case 23:
+ *(__m64 *) r = _m_psrldi (t1, 23);
+ break;
+ case 24:
+ *(__m64 *) r = _m_psrldi (t1, 24);
+ break;
+ case 25:
+ *(__m64 *) r = _m_psrldi (t1, 25);
+ break;
+ case 26:
+ *(__m64 *) r = _m_psrldi (t1, 26);
+ break;
+ case 27:
+ *(__m64 *) r = _m_psrldi (t1, 27);
+ break;
+ case 28:
+ *(__m64 *) r = _m_psrldi (t1, 28);
+ break;
+ case 29:
+ *(__m64 *) r = _m_psrldi (t1, 29);
+ break;
+ case 30:
+ *(__m64 *) r = _m_psrldi (t1, 30);
+ break;
+ case 31:
+ *(__m64 *) r = _m_psrldi (t1, 31);
+ break;
+ default:
+ *(__m64 *) r = _m_psrldi (t1, 32);
+ break;
+ }
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *src_p, unsigned int imm,
+ long long *res_p)
+{
+ int *src = (int *) src_p;
+ int *res = (int *) res_p;
+ int i;
+ if (imm > 31)
+ for (i = 0; i < 2; i++)
+ res[i] = 0;
+ else
+ for (i = 0; i < 2; i++)
+ res[i] = src[i] >> imm;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ unsigned int count;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i++)
+ {
+ count = MMXops[i];
+ test_psrldi (&MMXops[i], count, &r);
+ compute_correct_result (&MMXops[i], count, &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlq.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlq.c
new file mode 100644
index 00000000000..064fb4aab7e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlq.c
@@ -0,0 +1,47 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psrlq (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_psrlq (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (unsigned long long *dst,
+ unsigned long long *src,
+ unsigned long long *res)
+{
+ if (src[0] > 63)
+ res[0] = 0;
+ else
+ res[0] = dst[0] >> src[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_psrlq (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlqi.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlqi.c
new file mode 100644
index 00000000000..35de178efea
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlqi.c
@@ -0,0 +1,245 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psllwi (long long *ll1, unsigned int imm, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ switch (imm)
+ {
+ case 0:
+ *(__m64 *) r = _m_psrlqi (t1, 0);
+ break;
+ case 1:
+ *(__m64 *) r = _m_psrlqi (t1, 1);
+ break;
+ case 2:
+ *(__m64 *) r = _m_psrlqi (t1, 2);
+ break;
+ case 3:
+ *(__m64 *) r = _m_psrlqi (t1, 3);
+ break;
+ case 4:
+ *(__m64 *) r = _m_psrlqi (t1, 4);
+ break;
+ case 5:
+ *(__m64 *) r = _m_psrlqi (t1, 5);
+ break;
+ case 6:
+ *(__m64 *) r = _m_psrlqi (t1, 6);
+ break;
+ case 7:
+ *(__m64 *) r = _m_psrlqi (t1, 7);
+ break;
+ case 8:
+ *(__m64 *) r = _m_psrlqi (t1, 8);
+ break;
+ case 9:
+ *(__m64 *) r = _m_psrlqi (t1, 9);
+ break;
+ case 10:
+ *(__m64 *) r = _m_psrlqi (t1, 10);
+ break;
+ case 11:
+ *(__m64 *) r = _m_psrlqi (t1, 11);
+ break;
+ case 12:
+ *(__m64 *) r = _m_psrlqi (t1, 12);
+ break;
+ case 13:
+ *(__m64 *) r = _m_psrlqi (t1, 13);
+ break;
+ case 14:
+ *(__m64 *) r = _m_psrlqi (t1, 14);
+ break;
+ case 15:
+ *(__m64 *) r = _m_psrlqi (t1, 15);
+ break;
+ case 16:
+ *(__m64 *) r = _m_psrlqi (t1, 16);
+ break;
+ case 17:
+ *(__m64 *) r = _m_psrlqi (t1, 17);
+ break;
+ case 18:
+ *(__m64 *) r = _m_psrlqi (t1, 18);
+ break;
+ case 19:
+ *(__m64 *) r = _m_psrlqi (t1, 19);
+ break;
+ case 20:
+ *(__m64 *) r = _m_psrlqi (t1, 20);
+ break;
+ case 21:
+ *(__m64 *) r = _m_psrlqi (t1, 21);
+ break;
+ case 22:
+ *(__m64 *) r = _m_psrlqi (t1, 22);
+ break;
+ case 23:
+ *(__m64 *) r = _m_psrlqi (t1, 23);
+ break;
+ case 24:
+ *(__m64 *) r = _m_psrlqi (t1, 24);
+ break;
+ case 25:
+ *(__m64 *) r = _m_psrlqi (t1, 25);
+ break;
+ case 26:
+ *(__m64 *) r = _m_psrlqi (t1, 26);
+ break;
+ case 27:
+ *(__m64 *) r = _m_psrlqi (t1, 27);
+ break;
+ case 28:
+ *(__m64 *) r = _m_psrlqi (t1, 28);
+ break;
+ case 29:
+ *(__m64 *) r = _m_psrlqi (t1, 29);
+ break;
+ case 30:
+ *(__m64 *) r = _m_psrlqi (t1, 30);
+ break;
+ case 31:
+ *(__m64 *) r = _m_psrlqi (t1, 31);
+ break;
+ case 32:
+ *(__m64 *) r = _m_psrlqi (t1, 32);
+ break;
+ case 33:
+ *(__m64 *) r = _m_psrlqi (t1, 33);
+ break;
+ case 34:
+ *(__m64 *) r = _m_psrlqi (t1, 34);
+ break;
+ case 35:
+ *(__m64 *) r = _m_psrlqi (t1, 35);
+ break;
+ case 36:
+ *(__m64 *) r = _m_psrlqi (t1, 36);
+ break;
+ case 37:
+ *(__m64 *) r = _m_psrlqi (t1, 37);
+ break;
+ case 38:
+ *(__m64 *) r = _m_psrlqi (t1, 38);
+ break;
+ case 39:
+ *(__m64 *) r = _m_psrlqi (t1, 39);
+ break;
+ case 40:
+ *(__m64 *) r = _m_psrlqi (t1, 40);
+ break;
+ case 41:
+ *(__m64 *) r = _m_psrlqi (t1, 41);
+ break;
+ case 42:
+ *(__m64 *) r = _m_psrlqi (t1, 42);
+ break;
+ case 43:
+ *(__m64 *) r = _m_psrlqi (t1, 43);
+ break;
+ case 44:
+ *(__m64 *) r = _m_psrlqi (t1, 44);
+ break;
+ case 45:
+ *(__m64 *) r = _m_psrlqi (t1, 45);
+ break;
+ case 46:
+ *(__m64 *) r = _m_psrlqi (t1, 46);
+ break;
+ case 47:
+ *(__m64 *) r = _m_psrlqi (t1, 47);
+ break;
+ case 48:
+ *(__m64 *) r = _m_psrlqi (t1, 48);
+ break;
+ case 49:
+ *(__m64 *) r = _m_psrlqi (t1, 49);
+ break;
+ case 50:
+ *(__m64 *) r = _m_psrlqi (t1, 50);
+ break;
+ case 51:
+ *(__m64 *) r = _m_psrlqi (t1, 51);
+ break;
+ case 52:
+ *(__m64 *) r = _m_psrlqi (t1, 52);
+ break;
+ case 53:
+ *(__m64 *) r = _m_psrlqi (t1, 53);
+ break;
+ case 54:
+ *(__m64 *) r = _m_psrlqi (t1, 54);
+ break;
+ case 55:
+ *(__m64 *) r = _m_psrlqi (t1, 55);
+ break;
+ case 56:
+ *(__m64 *) r = _m_psrlqi (t1, 56);
+ break;
+ case 57:
+ *(__m64 *) r = _m_psrlqi (t1, 57);
+ break;
+ case 58:
+ *(__m64 *) r = _m_psrlqi (t1, 58);
+ break;
+ case 59:
+ *(__m64 *) r = _m_psrlqi (t1, 59);
+ break;
+ case 60:
+ *(__m64 *) r = _m_psrlqi (t1, 60);
+ break;
+ case 61:
+ *(__m64 *) r = _m_psrlqi (t1, 61);
+ break;
+ case 62:
+ *(__m64 *) r = _m_psrlqi (t1, 62);
+ break;
+ case 63:
+ *(__m64 *) r = _m_psrlqi (t1, 63);
+ break;
+ default:
+ *(__m64 *) r = _m_psrlqi (t1, 64);
+ break;
+ }
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (unsigned long long *src, unsigned int imm,
+ unsigned long long *res)
+{
+ int i;
+ if (imm > 63)
+ res[0] = 0;
+ else
+ res[0] = src[0] >> imm;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ unsigned int count;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i++)
+ {
+ count = MMXops[i];
+ test_psllwi (&MMXops[i], count, &r);
+ compute_correct_result (&MMXops[i], count, &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlw.c
new file mode 100644
index 00000000000..d2c1680ba5d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlw.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psrlw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_psrlw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ unsigned int *src = (unsigned int *) src_p;
+ short *res = (short *) res_p;
+ int i;
+ if (src[1] || src[0] > 15)
+ for (i = 0; i < 4; i++)
+ res[i] = 0;
+ else
+ for (i = 0; i < 4; i++)
+ res[i] = dst[i] >> src[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_psrlw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlwi.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlwi.c
new file mode 100644
index 00000000000..0dbffa56299
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psrlwi.c
@@ -0,0 +1,105 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psrlwi (long long *ll1, unsigned int imm, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ switch (imm)
+ {
+ case 0:
+ *(__m64 *) r = _m_psrlwi (t1, 0);
+ break;
+ case 1:
+ *(__m64 *) r = _m_psrlwi (t1, 1);
+ break;
+ case 2:
+ *(__m64 *) r = _m_psrlwi (t1, 2);
+ break;
+ case 3:
+ *(__m64 *) r = _m_psrlwi (t1, 3);
+ break;
+ case 4:
+ *(__m64 *) r = _m_psrlwi (t1, 4);
+ break;
+ case 5:
+ *(__m64 *) r = _m_psrlwi (t1, 5);
+ break;
+ case 6:
+ *(__m64 *) r = _m_psrlwi (t1, 6);
+ break;
+ case 7:
+ *(__m64 *) r = _m_psrlwi (t1, 7);
+ break;
+ case 8:
+ *(__m64 *) r = _m_psrlwi (t1, 8);
+ break;
+ case 9:
+ *(__m64 *) r = _m_psrlwi (t1, 9);
+ break;
+ case 10:
+ *(__m64 *) r = _m_psrlwi (t1, 10);
+ break;
+ case 11:
+ *(__m64 *) r = _m_psrlwi (t1, 11);
+ break;
+ case 12:
+ *(__m64 *) r = _m_psrlwi (t1, 12);
+ break;
+ case 13:
+ *(__m64 *) r = _m_psrlwi (t1, 13);
+ break;
+ case 14:
+ *(__m64 *) r = _m_psrlwi (t1, 14);
+ break;
+ case 15:
+ *(__m64 *) r = _m_psrlwi (t1, 15);
+ break;
+ default:
+ *(__m64 *) r = _m_psrlwi (t1, 16);
+ break;
+ }
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *src_p, unsigned int imm,
+ long long *res_p)
+{
+ short *src = (short *) src_p;
+ short *res = (short *) res_p;
+ int i;
+ if (imm > 15)
+ for (i = 0; i < 4; i++)
+ res[i] = 0;
+ else
+ for (i = 0; i < 4; i++)
+ res[i] = src[i] >> imm;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ unsigned int count;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i++)
+ {
+ count = MMXops[i];
+ test_psrlwi (&MMXops[i], count, &r);
+ compute_correct_result (&MMXops[i], count, &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psubb.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubb.c
new file mode 100644
index 00000000000..de0076a1654
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubb.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psubb (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_psubb (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ char *dst = (char *) dst_p;
+ char *src = (char *) src_p;
+ char *res = (char *) res_p;
+ int i;
+ for (i = 0; i < 8; i++)
+ res[i] = dst[i] - src[i];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_psubb (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psubd.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubd.c
new file mode 100644
index 00000000000..344f632b6d6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubd.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psubd (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_psubd (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ int *dst = (int *) dst_p;
+ int *src = (int *) src_p;
+ int *res = (int *) res_p;
+ int i;
+ for (i = 0; i < 2; i++)
+ res[i] = dst[i] - src[i];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_psubd (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psubq.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubq.c
new file mode 100644
index 00000000000..613f302bf6a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubq.c
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psubq (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _mm_sub_si64 (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ res_p[0] = dst_p[0] - src_p[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_psubq (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psubusb.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubusb.c
new file mode 100644
index 00000000000..ad6112a5d33
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubusb.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psubusb (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_psubusb (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ unsigned char *dst = (unsigned char *) dst_p;
+ unsigned char *src = (unsigned char *) src_p;
+ unsigned char *res = (unsigned char *) res_p;
+ int i;
+ for (i = 0; i < 8; i++)
+ res[i] = saturate_ub (dst[i] - src[i]);
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_psubusb (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psubusw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubusw.c
new file mode 100644
index 00000000000..8a8a9c0fb4d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubusw.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psubusw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_psubusw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ unsigned short *dst = (unsigned short *) dst_p;
+ unsigned short *src = (unsigned short *) src_p;
+ unsigned short *res = (unsigned short *) res_p;
+ int i;
+ for (i = 0; i < 4; i++)
+ res[i] = saturate_uw (dst[i] - src[i]);
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_psubusw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-psubw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubw.c
new file mode 100644
index 00000000000..4d53c8c419a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-psubw.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psubw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_psubw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ short *src = (short *) src_p;
+ short *res = (short *) res_p;
+ int i;
+ for (i = 0; i < 4; i++)
+ res[i] = dst[i] - src[i];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_psubw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhbw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhbw.c
new file mode 100644
index 00000000000..07281f2c3a3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhbw.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_punpckhbw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_punpckhbw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ char *dst = (char *) dst_p;
+ char *src = (char *) src_p;
+ char *res = (char *) res_p;
+ res[0] = dst[4];
+ res[1] = src[4];
+ res[2] = dst[5];
+ res[3] = src[5];
+ res[4] = dst[6];
+ res[5] = src[6];
+ res[6] = dst[7];
+ res[7] = src[7];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_punpckhbw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhdq.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhdq.c
new file mode 100644
index 00000000000..6c67af92b19
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhdq.c
@@ -0,0 +1,47 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_punpckhdq (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_punpckhdq (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ int *dst = (int *) dst_p;
+ int *src = (int *) src_p;
+ int *res = (int *) res_p;
+ res[0] = dst[1];
+ res[1] = src[1];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_punpckhdq (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhwd.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhwd.c
new file mode 100644
index 00000000000..b6c348323ea
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckhwd.c
@@ -0,0 +1,49 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_punpckhwd (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_punpckhwd (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ short *src = (short *) src_p;
+ short *res = (short *) res_p;
+ res[0] = dst[2];
+ res[1] = src[2];
+ res[2] = dst[3];
+ res[3] = src[3];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_punpckhwd (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-punpcklbw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpcklbw.c
new file mode 100644
index 00000000000..dcf6d3f25bb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpcklbw.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_punpcklbw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_punpcklbw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ char *dst = (char *) dst_p;
+ char *src = (char *) src_p;
+ char *res = (char *) res_p;
+ res[0] = dst[0];
+ res[1] = src[0];
+ res[2] = dst[1];
+ res[3] = src[1];
+ res[4] = dst[2];
+ res[5] = src[2];
+ res[6] = dst[3];
+ res[7] = src[3];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_punpcklbw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckldq.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckldq.c
new file mode 100644
index 00000000000..463c6d2c989
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpckldq.c
@@ -0,0 +1,47 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_punpckldq (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_punpckldq (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ int *dst = (int *) dst_p;
+ int *src = (int *) src_p;
+ int *res = (int *) res_p;
+ res[0] = dst[0];
+ res[1] = src[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_punpckldq (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-punpcklwd.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpcklwd.c
new file mode 100644
index 00000000000..acd62f5bd74
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-punpcklwd.c
@@ -0,0 +1,49 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_punpcklwd (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_punpcklwd (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ short *src = (short *) src_p;
+ short *res = (short *) res_p;
+ res[0] = dst[0];
+ res[1] = src[0];
+ res[2] = dst[1];
+ res[3] = src[1];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_punpcklwd (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pxor.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pxor.c
new file mode 100644
index 00000000000..be632989c35
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pxor.c
@@ -0,0 +1,44 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pxor (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pxor (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (unsigned long long *dst,
+ unsigned long long *src,
+ unsigned long long *res)
+{
+ res[0] = dst[0] ^ src[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pxor (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx.c b/gcc/testsuite/gcc.target/i386/sse2-mmx.c
index fb226a8e8f3..338cb9da289 100644
--- a/gcc/testsuite/gcc.target/i386/sse2-mmx.c
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx.c
@@ -4,7 +4,6 @@
#include "sse2-check.h"
-#include <mmintrin.h>
#define N 4
--
2.20.1
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 30/42] i386: Emulate MMX ssse3_pmaddubsw with SSE
2019-02-16 0:34 [PATCH 00/42] V7: Emulate MMX intrinsics with SSE H.J. Lu
` (21 preceding siblings ...)
2019-02-16 0:35 ` [PATCH 42/42] i386: Add tests for MMX intrinsic emulations with SSE H.J. Lu
@ 2019-02-16 0:35 ` H.J. Lu
2019-02-16 0:35 ` [PATCH 05/42] i386: Emulate MMX mulv4hi3 " H.J. Lu
` (18 subsequent siblings)
41 siblings, 0 replies; 50+ messages in thread
From: H.J. Lu @ 2019-02-16 0:35 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX ssse3_pmaddubsw with SSE. Only SSE register source operand
is allowed.
PR target/89021
* config/i386/sse.md (ssse3_pmaddubsw): Add SSE emulation.
---
gcc/config/i386/sse.md | 18 +++++++++++-------
1 file changed, 11 insertions(+), 7 deletions(-)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 0565ddc177f..dc07173cb1c 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15544,17 +15544,17 @@
(set_attr "mode" "TI")])
(define_insn "ssse3_pmaddubsw"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
+ [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(ss_plus:V4HI
(mult:V4HI
(zero_extend:V4HI
(vec_select:V4QI
- (match_operand:V8QI 1 "register_operand" "0")
+ (match_operand:V8QI 1 "register_operand" "0,0,Yv")
(parallel [(const_int 0) (const_int 2)
(const_int 4) (const_int 6)])))
(sign_extend:V4HI
(vec_select:V4QI
- (match_operand:V8QI 2 "nonimmediate_operand" "ym")
+ (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")
(parallel [(const_int 0) (const_int 2)
(const_int 4) (const_int 6)]))))
(mult:V4HI
@@ -15566,13 +15566,17 @@
(vec_select:V4QI (match_dup 2)
(parallel [(const_int 1) (const_int 3)
(const_int 5) (const_int 7)]))))))]
- "TARGET_SSSE3"
- "pmaddubsw\t{%2, %0|%0, %2}"
- [(set_attr "type" "sseiadd")
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
+ "@
+ pmaddubsw\t{%2, %0|%0, %2}
+ pmaddubsw\t{%2, %0|%0, %2}
+ vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "sseiadd")
(set_attr "atom_unit" "simul")
(set_attr "prefix_extra" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
- (set_attr "mode" "DI")])
+ (set_attr "mode" "DI,TI,TI")])
(define_mode_iterator PMULHRSW
[V4HI V8HI (V16HI "TARGET_AVX2")])
--
2.20.1
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 05/42] i386: Emulate MMX mulv4hi3 with SSE
2019-02-16 0:34 [PATCH 00/42] V7: Emulate MMX intrinsics with SSE H.J. Lu
` (22 preceding siblings ...)
2019-02-16 0:35 ` [PATCH 30/42] i386: Emulate MMX ssse3_pmaddubsw " H.J. Lu
@ 2019-02-16 0:35 ` H.J. Lu
2019-02-16 0:35 ` [PATCH 19/42] i386: Emulate MMX mmx_pmovmskb " H.J. Lu
` (17 subsequent siblings)
41 siblings, 0 replies; 50+ messages in thread
From: H.J. Lu @ 2019-02-16 0:35 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX mulv4hi3 with SSE. Only SSE register source operand is
allowed.
PR target/89021
* config/i386/mmx.md (mmx_mulv4hi3): Also allow
TARGET_MMX_WITH_SSE.
(mulv4hi3): New.
(*mmx_mulv4hi3): Also allow TARGET_MMX_WITH_SSE. Add SSE
support.
---
gcc/config/i386/mmx.md | 32 ++++++++++++++++++++++----------
1 file changed, 22 insertions(+), 10 deletions(-)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 517c3283963..cdb0f698001 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -716,19 +716,31 @@
(define_expand "mmx_mulv4hi3"
[(set (match_operand:V4HI 0 "register_operand")
- (mult:V4HI (match_operand:V4HI 1 "nonimmediate_operand")
- (match_operand:V4HI 2 "nonimmediate_operand")))]
- "TARGET_MMX"
+ (mult:V4HI (match_operand:V4HI 1 "register_mmxmem_operand")
+ (match_operand:V4HI 2 "register_mmxmem_operand")))]
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
+
+(define_expand "mulv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand")
+ (mult:V4HI (match_operand:V4HI 1 "register_operand")
+ (match_operand:V4HI 2 "register_operand")))]
+ "TARGET_MMX_WITH_SSE"
"ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
(define_insn "*mmx_mulv4hi3"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
- (mult:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "%0")
- (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
- "TARGET_MMX && ix86_binary_operator_ok (MULT, V4HImode, operands)"
- "pmullw\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxmul")
- (set_attr "mode" "DI")])
+ [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
+ (mult:V4HI (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv")
+ (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && ix86_binary_operator_ok (MULT, V4HImode, operands)"
+ "@
+ pmullw\t{%2, %0|%0, %2}
+ pmullw\t{%2, %0|%0, %2}
+ vpmullw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxmul,ssemul,ssemul")
+ (set_attr "mode" "DI,TI,TI")])
(define_expand "mmx_smulv4hi3_highpart"
[(set (match_operand:V4HI 0 "register_operand")
--
2.20.1
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 19/42] i386: Emulate MMX mmx_pmovmskb with SSE
2019-02-16 0:34 [PATCH 00/42] V7: Emulate MMX intrinsics with SSE H.J. Lu
` (23 preceding siblings ...)
2019-02-16 0:35 ` [PATCH 05/42] i386: Emulate MMX mulv4hi3 " H.J. Lu
@ 2019-02-16 0:35 ` H.J. Lu
2019-02-16 0:35 ` [PATCH 07/42] i386: Emulate MMX mmx_pmaddwd " H.J. Lu
` (16 subsequent siblings)
41 siblings, 0 replies; 50+ messages in thread
From: H.J. Lu @ 2019-02-16 0:35 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX mmx_pmovmskb with SSE by zero-extending result of SSE pmovmskb
from QImode to SImode. Only SSE register source operand is allowed.
PR target/89021
* config/i386/mmx.md (mmx_pmovmskb): Changed to
define_insn_and_split to support SSE emulation.
---
gcc/config/i386/mmx.md | 30 +++++++++++++++++++++++-------
1 file changed, 23 insertions(+), 7 deletions(-)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 058791e01e6..9c552f929f1 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1762,14 +1762,30 @@
[(set_attr "type" "mmxshft")
(set_attr "mode" "DI")])
-(define_insn "mmx_pmovmskb"
- [(set (match_operand:SI 0 "register_operand" "=r")
- (unspec:SI [(match_operand:V8QI 1 "register_operand" "y")]
+(define_insn_and_split "mmx_pmovmskb"
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
+ (unspec:SI [(match_operand:V8QI 1 "register_operand" "y,x")]
UNSPEC_MOVMSK))]
- "TARGET_SSE || TARGET_3DNOW_A"
- "pmovmskb\t{%1, %0|%0, %1}"
- [(set_attr "type" "mmxcvt")
- (set_attr "mode" "DI")])
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && (TARGET_SSE || TARGET_3DNOW_A)"
+ "@
+ pmovmskb\t{%1, %0|%0, %1}
+ #"
+ "TARGET_MMX_WITH_SSE && reload_completed"
+ [(set (match_dup 0)
+ (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))
+ (set (match_dup 0)
+ (zero_extend:SI (match_dup 2)))]
+{
+ /* Generate SSE pmovmskb and zero-extend from QImode to SImode. */
+ operands[1] = lowpart_subreg (V16QImode, operands[1],
+ GET_MODE (operands[1]));
+ operands[2] = lowpart_subreg (QImode, operands[0],
+ GET_MODE (operands[0]));
+}
+ [(set_attr "mmx_isa" "native,x64")
+ (set_attr "type" "mmxcvt,ssemov")
+ (set_attr "mode" "DI,TI")])
(define_expand "mmx_maskmovq"
[(set (match_operand:V8QI 0 "memory_operand")
--
2.20.1
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 07/42] i386: Emulate MMX mmx_pmaddwd with SSE
2019-02-16 0:34 [PATCH 00/42] V7: Emulate MMX intrinsics with SSE H.J. Lu
` (24 preceding siblings ...)
2019-02-16 0:35 ` [PATCH 19/42] i386: Emulate MMX mmx_pmovmskb " H.J. Lu
@ 2019-02-16 0:35 ` H.J. Lu
2019-02-16 0:43 ` [PATCH 25/42] i386: Emulate MMX movntq with SSE2 movntidi H.J. Lu
` (15 subsequent siblings)
41 siblings, 0 replies; 50+ messages in thread
From: H.J. Lu @ 2019-02-16 0:35 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX pmaddwd with SSE. Only SSE register source operand is
allowed.
PR target/89021
* config/i386/mmx.md (mmx_pmaddwd): Also allow TARGET_MMX_WITH_SSE.
(*mmx_pmaddwd): Also allow TARGET_MMX_WITH_SSE. Add SSE support.
---
gcc/config/i386/mmx.md | 25 +++++++++++++++----------
1 file changed, 15 insertions(+), 10 deletions(-)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 3a7964d52bb..9f0311badca 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -810,11 +810,11 @@
(mult:V2SI
(sign_extend:V2SI
(vec_select:V2HI
- (match_operand:V4HI 1 "nonimmediate_operand")
+ (match_operand:V4HI 1 "register_mmxmem_operand")
(parallel [(const_int 0) (const_int 2)])))
(sign_extend:V2SI
(vec_select:V2HI
- (match_operand:V4HI 2 "nonimmediate_operand")
+ (match_operand:V4HI 2 "register_mmxmem_operand")
(parallel [(const_int 0) (const_int 2)]))))
(mult:V2SI
(sign_extend:V2SI
@@ -823,20 +823,20 @@
(sign_extend:V2SI
(vec_select:V2HI (match_dup 2)
(parallel [(const_int 1) (const_int 3)]))))))]
- "TARGET_MMX"
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
"ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
(define_insn "*mmx_pmaddwd"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
+ [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
(plus:V2SI
(mult:V2SI
(sign_extend:V2SI
(vec_select:V2HI
- (match_operand:V4HI 1 "nonimmediate_operand" "%0")
+ (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv")
(parallel [(const_int 0) (const_int 2)])))
(sign_extend:V2SI
(vec_select:V2HI
- (match_operand:V4HI 2 "nonimmediate_operand" "ym")
+ (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")
(parallel [(const_int 0) (const_int 2)]))))
(mult:V2SI
(sign_extend:V2SI
@@ -845,10 +845,15 @@
(sign_extend:V2SI
(vec_select:V2HI (match_dup 2)
(parallel [(const_int 1) (const_int 3)]))))))]
- "TARGET_MMX && ix86_binary_operator_ok (MULT, V4HImode, operands)"
- "pmaddwd\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxmul")
- (set_attr "mode" "DI")])
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && ix86_binary_operator_ok (MULT, V4HImode, operands)"
+ "@
+ pmaddwd\t{%2, %0|%0, %2}
+ pmaddwd\t{%2, %0|%0, %2}
+ vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxmul,sseiadd,sseiadd")
+ (set_attr "mode" "DI,TI,TI")])
(define_expand "mmx_pmulhrwv4hi3"
[(set (match_operand:V4HI 0 "register_operand")
--
2.20.1
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 25/42] i386: Emulate MMX movntq with SSE2 movntidi
2019-02-16 0:34 [PATCH 00/42] V7: Emulate MMX intrinsics with SSE H.J. Lu
` (25 preceding siblings ...)
2019-02-16 0:35 ` [PATCH 07/42] i386: Emulate MMX mmx_pmaddwd " H.J. Lu
@ 2019-02-16 0:43 ` H.J. Lu
2019-02-16 0:43 ` [PATCH 27/42] i386: Make _mm_empty () as NOP when MMX is disabled H.J. Lu
` (14 subsequent siblings)
41 siblings, 0 replies; 50+ messages in thread
From: H.J. Lu @ 2019-02-16 0:43 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX movntq with SSE2 movntidi. Only register source operand is
allowed.
PR target/89021
* config/i386/mmx.md (sse_movntq): Add SSE2 emulation.
---
gcc/config/i386/mmx.md | 14 +++++++++-----
1 file changed, 9 insertions(+), 5 deletions(-)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index bcce7c06c4f..993ad99a36e 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -214,12 +214,16 @@
})
(define_insn "sse_movntq"
- [(set (match_operand:DI 0 "memory_operand" "=m")
- (unspec:DI [(match_operand:DI 1 "register_operand" "y")]
+ [(set (match_operand:DI 0 "memory_operand" "=m,m")
+ (unspec:DI [(match_operand:DI 1 "register_operand" "y,r")]
UNSPEC_MOVNTQ))]
- "TARGET_SSE || TARGET_3DNOW_A"
- "movntq\t{%1, %0|%0, %1}"
- [(set_attr "type" "mmxmov")
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && (TARGET_SSE || TARGET_3DNOW_A)"
+ "@
+ movntq\t{%1, %0|%0, %1}
+ movnti\t{%1, %0|%0, %1}"
+ [(set_attr "mmx_isa" "native,x64")
+ (set_attr "type" "mmxmov,ssemov")
(set_attr "mode" "DI")])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
--
2.20.1
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 27/42] i386: Make _mm_empty () as NOP when MMX is disabled
2019-02-16 0:34 [PATCH 00/42] V7: Emulate MMX intrinsics with SSE H.J. Lu
` (26 preceding siblings ...)
2019-02-16 0:43 ` [PATCH 25/42] i386: Emulate MMX movntq with SSE2 movntidi H.J. Lu
@ 2019-02-16 0:43 ` H.J. Lu
2019-02-16 8:58 ` Uros Bizjak
2019-02-16 0:43 ` [PATCH 28/42] i386: Emulate MMX ssse3_ph<plusminus_mnemonic>wv4hi3 with SSE H.J. Lu
` (13 subsequent siblings)
41 siblings, 1 reply; 50+ messages in thread
From: H.J. Lu @ 2019-02-16 0:43 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
With SSE emulation of MMX intrinsics, we should make _mm_empty () as NOP
when MMX is disabled.
PR target/89021
* config/i386/mmx.md (mmx_<emms>): Renamed to ...
(mmx_<emms>_1): This.
(mmx_<emms>): New expander.
---
gcc/config/i386/mmx.md | 29 ++++++++++++++++++++++++++++-
1 file changed, 28 insertions(+), 1 deletion(-)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 9cf0251293a..0f925c0b1ea 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1848,7 +1848,34 @@
[(UNSPECV_EMMS "emms")
(UNSPECV_FEMMS "femms")])
-(define_insn "mmx_<emms>"
+(define_expand "mmx_<emms>"
+ [(unspec_volatile [(const_int 0)] EMMS)
+ (clobber (reg:XF ST0_REG))
+ (clobber (reg:XF ST1_REG))
+ (clobber (reg:XF ST2_REG))
+ (clobber (reg:XF ST3_REG))
+ (clobber (reg:XF ST4_REG))
+ (clobber (reg:XF ST5_REG))
+ (clobber (reg:XF ST6_REG))
+ (clobber (reg:XF ST7_REG))
+ (clobber (reg:DI MM0_REG))
+ (clobber (reg:DI MM1_REG))
+ (clobber (reg:DI MM2_REG))
+ (clobber (reg:DI MM3_REG))
+ (clobber (reg:DI MM4_REG))
+ (clobber (reg:DI MM5_REG))
+ (clobber (reg:DI MM6_REG))
+ (clobber (reg:DI MM7_REG))]
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
+{
+ if (TARGET_MMX)
+ emit_insn (gen_mmx_<emms>_1 ());
+ else
+ emit_insn (gen_nop ());
+ DONE;
+})
+
+(define_insn "mmx_<emms>_1"
[(unspec_volatile [(const_int 0)] EMMS)
(clobber (reg:XF ST0_REG))
(clobber (reg:XF ST1_REG))
--
2.20.1
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [PATCH 27/42] i386: Make _mm_empty () as NOP when MMX is disabled
2019-02-16 0:43 ` [PATCH 27/42] i386: Make _mm_empty () as NOP when MMX is disabled H.J. Lu
@ 2019-02-16 8:58 ` Uros Bizjak
2019-02-16 14:56 ` H.J. Lu
0 siblings, 1 reply; 50+ messages in thread
From: Uros Bizjak @ 2019-02-16 8:58 UTC (permalink / raw)
To: H.J. Lu; +Cc: gcc-patches
On 2/16/19, H.J. Lu <hjl.tools@gmail.com> wrote:
> With SSE emulation of MMX intrinsics, we should make _mm_empty () as NOP
> when MMX is disabled.
>
> PR target/89021
> * config/i386/mmx.md (mmx_<emms>): Renamed to ...
> (mmx_<emms>_1): This.
> (mmx_<emms>): New expander.
> ---
> gcc/config/i386/mmx.md | 29 ++++++++++++++++++++++++++++-
> 1 file changed, 28 insertions(+), 1 deletion(-)
>
> diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
> index 9cf0251293a..0f925c0b1ea 100644
> --- a/gcc/config/i386/mmx.md
> +++ b/gcc/config/i386/mmx.md
> @@ -1848,7 +1848,34 @@
> [(UNSPECV_EMMS "emms")
> (UNSPECV_FEMMS "femms")])
>
> -(define_insn "mmx_<emms>"
> +(define_expand "mmx_<emms>"
> + [(unspec_volatile [(const_int 0)] EMMS)
> + (clobber (reg:XF ST0_REG))
> + (clobber (reg:XF ST1_REG))
> + (clobber (reg:XF ST2_REG))
> + (clobber (reg:XF ST3_REG))
> + (clobber (reg:XF ST4_REG))
> + (clobber (reg:XF ST5_REG))
> + (clobber (reg:XF ST6_REG))
> + (clobber (reg:XF ST7_REG))
> + (clobber (reg:DI MM0_REG))
> + (clobber (reg:DI MM1_REG))
> + (clobber (reg:DI MM2_REG))
> + (clobber (reg:DI MM3_REG))
> + (clobber (reg:DI MM4_REG))
> + (clobber (reg:DI MM5_REG))
> + (clobber (reg:DI MM6_REG))
> + (clobber (reg:DI MM7_REG))]
> + "TARGET_MMX || TARGET_MMX_WITH_SSE"
> +{
> + if (TARGET_MMX)
> + emit_insn (gen_mmx_<emms>_1 ());
> + else
> + emit_insn (gen_nop ());
> + DONE;
The above should be written as:
if (!TARGET_MMX)
{
emit_insn (gen_nop ()));
DONE;
}
> +})
> +
> +(define_insn "mmx_<emms>_1"
The old insn should be renamed to "*mmx_<emms>".
Uros.
> [(unspec_volatile [(const_int 0)] EMMS)
> (clobber (reg:XF ST0_REG))
> (clobber (reg:XF ST1_REG))
> --
> 2.20.1
>
>
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [PATCH 27/42] i386: Make _mm_empty () as NOP when MMX is disabled
2019-02-16 8:58 ` Uros Bizjak
@ 2019-02-16 14:56 ` H.J. Lu
2019-02-16 19:02 ` Uros Bizjak
0 siblings, 1 reply; 50+ messages in thread
From: H.J. Lu @ 2019-02-16 14:56 UTC (permalink / raw)
To: Uros Bizjak; +Cc: GCC Patches
On Sat, Feb 16, 2019 at 12:58 AM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On 2/16/19, H.J. Lu <hjl.tools@gmail.com> wrote:
> > With SSE emulation of MMX intrinsics, we should make _mm_empty () as NOP
> > when MMX is disabled.
> >
> > PR target/89021
> > * config/i386/mmx.md (mmx_<emms>): Renamed to ...
> > (mmx_<emms>_1): This.
> > (mmx_<emms>): New expander.
> > ---
> > gcc/config/i386/mmx.md | 29 ++++++++++++++++++++++++++++-
> > 1 file changed, 28 insertions(+), 1 deletion(-)
> >
> > diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
> > index 9cf0251293a..0f925c0b1ea 100644
> > --- a/gcc/config/i386/mmx.md
> > +++ b/gcc/config/i386/mmx.md
> > @@ -1848,7 +1848,34 @@
> > [(UNSPECV_EMMS "emms")
> > (UNSPECV_FEMMS "femms")])
> >
> > -(define_insn "mmx_<emms>"
> > +(define_expand "mmx_<emms>"
> > + [(unspec_volatile [(const_int 0)] EMMS)
> > + (clobber (reg:XF ST0_REG))
> > + (clobber (reg:XF ST1_REG))
> > + (clobber (reg:XF ST2_REG))
> > + (clobber (reg:XF ST3_REG))
> > + (clobber (reg:XF ST4_REG))
> > + (clobber (reg:XF ST5_REG))
> > + (clobber (reg:XF ST6_REG))
> > + (clobber (reg:XF ST7_REG))
> > + (clobber (reg:DI MM0_REG))
> > + (clobber (reg:DI MM1_REG))
> > + (clobber (reg:DI MM2_REG))
> > + (clobber (reg:DI MM3_REG))
> > + (clobber (reg:DI MM4_REG))
> > + (clobber (reg:DI MM5_REG))
> > + (clobber (reg:DI MM6_REG))
> > + (clobber (reg:DI MM7_REG))]
> > + "TARGET_MMX || TARGET_MMX_WITH_SSE"
> > +{
> > + if (TARGET_MMX)
> > + emit_insn (gen_mmx_<emms>_1 ());
> > + else
> > + emit_insn (gen_nop ());
> > + DONE;
>
> The above should be written as:
>
> if (!TARGET_MMX)
> {
> emit_insn (gen_nop ()));
> DONE;
> }
>
> > +})
> > +
> > +(define_insn "mmx_<emms>_1"
>
> The old insn should be renamed to "*mmx_<emms>".
>
> Uros.
Tried and got
[hjl@gnu-cfl-2 gcc]$ cat x.c
#include <mmintrin.h>
void
foo (void)
{
_mm_empty ();
}
[hjl@gnu-cfl-2 gcc]$ ./xgcc -B./ -S x.c -da
x.c: In function ‘foo’:
x.c:7:1: error: unrecognizable insn:
7 | }
| ^
(insn 5 2 6 2 (unspec_volatile [
(const_int 0 [0])
] UNSPECV_EMMS) "./include/mmintrin.h":60:3 -1
(nil))
during RTL pass: vregs
dump file: x.c.234r.vregs
x.c:7:1: internal compiler error: in extract_insn, at recog.c:2310
0x10ad84d _fatal_insn(char const*, rtx_def const*, char const*, int,
char const*)
/export/gnu/import/git/gitlab/x86-gcc/gcc/rtl-error.c:108
0x10ad88e _fatal_insn_not_found(rtx_def const*, char const*, int, char const*)
/export/gnu/import/git/gitlab/x86-gcc/gcc/rtl-error.c:116
0x1042abb extract_insn(rtx_insn*)
/export/gnu/import/git/gitlab/x86-gcc/gcc/recog.c:2310
0xc95912 instantiate_virtual_regs_in_insn
/export/gnu/import/git/gitlab/x86-gcc/gcc/function.c:1654
0xc96d44 instantiate_virtual_regs
/export/gnu/import/git/gitlab/x86-gcc/gcc/function.c:1975
0xc96e0e execute
/export/gnu/import/git/gitlab/x86-gcc/gcc/function.c:2024
Please submit a full bug report,
with preprocessed source if appropriate.
Please include the complete backtrace with any bug report.
See <https://gcc.gnu.org/bugs/> for instructions.
[hjl@gnu-cfl-2 gcc]$
;;
;; Full RTL generated for this function:
;;
(note 1 0 3 NOTE_INSN_DELETED)
;; basic block 2, loop depth 0, maybe hot
;; prev block 0, next block 1, flags: (NEW, REACHABLE, RTL)
;; pred: ENTRY (FALLTHRU)
(note 3 1 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
(note 2 3 5 2 NOTE_INSN_FUNCTION_BEG)
(insn 5 2 6 2 (unspec_volatile [
(const_int 0 [0])
] UNSPECV_EMMS) "./include/mmintrin.h":60:3 -1
(nil))
(insn 6 5 7 2 (clobber (reg:XF 8 st)) "./include/mmintrin.h":60:3 -1
(nil))
(insn 7 6 8 2 (clobber (reg:XF 9 st(1))) "./include/mmintrin.h":60:3 -1
(nil))
(insn 8 7 9 2 (clobber (reg:XF 10 st(2))) "./include/mmintrin.h":60:3 -1
(nil))
(insn 9 8 10 2 (clobber (reg:XF 11 st(3))) "./include/mmintrin.h":60:3 -1
(nil))
(insn 10 9 11 2 (clobber (reg:XF 12 st(4))) "./include/mmintrin.h":60:3 -1
(nil))
(insn 11 10 12 2 (clobber (reg:XF 13 st(5))) "./include/mmintrin.h":60:3 -1
(nil))
(insn 12 11 13 2 (clobber (reg:XF 14 st(6))) "./include/mmintrin.h":60:3 -1
(nil))
(insn 13 12 14 2 (clobber (reg:XF 15 st(7))) "./include/mmintrin.h":60:3 -1
(nil))
(insn 14 13 15 2 (clobber (reg:DI 28 mm0)) "./include/mmintrin.h":60:3 -1
(nil))
(insn 15 14 16 2 (clobber (reg:DI 29 mm1)) "./include/mmintrin.h":60:3 -1
(nil))
(insn 16 15 17 2 (clobber (reg:DI 30 mm2)) "./include/mmintrin.h":60:3 -1
(nil))
(insn 17 16 18 2 (clobber (reg:DI 31 mm3)) "./include/mmintrin.h":60:3 -1
(nil))
(insn 18 17 19 2 (clobber (reg:DI 32 mm4)) "./include/mmintrin.h":60:3 -1
(nil))
(insn 19 18 20 2 (clobber (reg:DI 33 mm5)) "./include/mmintrin.h":60:3 -1
(nil))
(insn 20 19 21 2 (clobber (reg:DI 34 mm6)) "./include/mmintrin.h":60:3 -1
(nil))
(insn 21 20 25 2 (clobber (reg:DI 35 mm7)) "./include/mmintrin.h":60:3 -1
(nil))
(insn 25 21 0 2 (const_int 0 [0]) "./include/mmintrin.h":61:1 -1
(nil))
;; succ: EXIT [always] (FALLTHRU)
--
H.J.
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [PATCH 27/42] i386: Make _mm_empty () as NOP when MMX is disabled
2019-02-16 14:56 ` H.J. Lu
@ 2019-02-16 19:02 ` Uros Bizjak
0 siblings, 0 replies; 50+ messages in thread
From: Uros Bizjak @ 2019-02-16 19:02 UTC (permalink / raw)
To: H.J. Lu; +Cc: GCC Patches
On 2/16/19, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Sat, Feb 16, 2019 at 12:58 AM Uros Bizjak <ubizjak@gmail.com> wrote:
>>
>> On 2/16/19, H.J. Lu <hjl.tools@gmail.com> wrote:
>> > With SSE emulation of MMX intrinsics, we should make _mm_empty () as
>> > NOP
>> > when MMX is disabled.
>> >
>> > PR target/89021
>> > * config/i386/mmx.md (mmx_<emms>): Renamed to ...
>> > (mmx_<emms>_1): This.
>> > (mmx_<emms>): New expander.
>> > ---
>> > gcc/config/i386/mmx.md | 29 ++++++++++++++++++++++++++++-
>> > 1 file changed, 28 insertions(+), 1 deletion(-)
>> >
>> > diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
>> > index 9cf0251293a..0f925c0b1ea 100644
>> > --- a/gcc/config/i386/mmx.md
>> > +++ b/gcc/config/i386/mmx.md
>> > @@ -1848,7 +1848,34 @@
>> > [(UNSPECV_EMMS "emms")
>> > (UNSPECV_FEMMS "femms")])
>> >
>> > -(define_insn "mmx_<emms>"
>> > +(define_expand "mmx_<emms>"
>> > + [(unspec_volatile [(const_int 0)] EMMS)
>> > + (clobber (reg:XF ST0_REG))
>> > + (clobber (reg:XF ST1_REG))
>> > + (clobber (reg:XF ST2_REG))
>> > + (clobber (reg:XF ST3_REG))
>> > + (clobber (reg:XF ST4_REG))
>> > + (clobber (reg:XF ST5_REG))
>> > + (clobber (reg:XF ST6_REG))
>> > + (clobber (reg:XF ST7_REG))
>> > + (clobber (reg:DI MM0_REG))
>> > + (clobber (reg:DI MM1_REG))
>> > + (clobber (reg:DI MM2_REG))
>> > + (clobber (reg:DI MM3_REG))
>> > + (clobber (reg:DI MM4_REG))
>> > + (clobber (reg:DI MM5_REG))
>> > + (clobber (reg:DI MM6_REG))
>> > + (clobber (reg:DI MM7_REG))]
>> > + "TARGET_MMX || TARGET_MMX_WITH_SSE"
>> > +{
>> > + if (TARGET_MMX)
>> > + emit_insn (gen_mmx_<emms>_1 ());
>> > + else
>> > + emit_insn (gen_nop ());
>> > + DONE;
>>
>> The above should be written as:
>>
>> if (!TARGET_MMX)
>> {
>> emit_insn (gen_nop ()));
>> DONE;
>> }
>>
>> > +})
>> > +
>> > +(define_insn "mmx_<emms>_1"
>>
>> The old insn should be renamed to "*mmx_<emms>".
>>
>> Uros.
>
> Tried and got
You have to wrap the pattern in a parallel in the expander.
Uros.
>
> [hjl@gnu-cfl-2 gcc]$ cat x.c
> #include <mmintrin.h>
>
> void
> foo (void)
> {
> _mm_empty ();
> }
> [hjl@gnu-cfl-2 gcc]$ ./xgcc -B./ -S x.c -da
> x.c: In function ‘foo’:
> x.c:7:1: error: unrecognizable insn:
> 7 | }
> | ^
> (insn 5 2 6 2 (unspec_volatile [
> (const_int 0 [0])
> ] UNSPECV_EMMS) "./include/mmintrin.h":60:3 -1
> (nil))
> during RTL pass: vregs
> dump file: x.c.234r.vregs
> x.c:7:1: internal compiler error: in extract_insn, at recog.c:2310
> 0x10ad84d _fatal_insn(char const*, rtx_def const*, char const*, int,
> char const*)
> /export/gnu/import/git/gitlab/x86-gcc/gcc/rtl-error.c:108
> 0x10ad88e _fatal_insn_not_found(rtx_def const*, char const*, int, char
> const*)
> /export/gnu/import/git/gitlab/x86-gcc/gcc/rtl-error.c:116
> 0x1042abb extract_insn(rtx_insn*)
> /export/gnu/import/git/gitlab/x86-gcc/gcc/recog.c:2310
> 0xc95912 instantiate_virtual_regs_in_insn
> /export/gnu/import/git/gitlab/x86-gcc/gcc/function.c:1654
> 0xc96d44 instantiate_virtual_regs
> /export/gnu/import/git/gitlab/x86-gcc/gcc/function.c:1975
> 0xc96e0e execute
> /export/gnu/import/git/gitlab/x86-gcc/gcc/function.c:2024
> Please submit a full bug report,
> with preprocessed source if appropriate.
> Please include the complete backtrace with any bug report.
> See <https://gcc.gnu.org/bugs/> for instructions.
> [hjl@gnu-cfl-2 gcc]$
>
> ;;
> ;; Full RTL generated for this function:
> ;;
> (note 1 0 3 NOTE_INSN_DELETED)
> ;; basic block 2, loop depth 0, maybe hot
> ;; prev block 0, next block 1, flags: (NEW, REACHABLE, RTL)
> ;; pred: ENTRY (FALLTHRU)
> (note 3 1 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
> (note 2 3 5 2 NOTE_INSN_FUNCTION_BEG)
> (insn 5 2 6 2 (unspec_volatile [
> (const_int 0 [0])
> ] UNSPECV_EMMS) "./include/mmintrin.h":60:3 -1
> (nil))
> (insn 6 5 7 2 (clobber (reg:XF 8 st)) "./include/mmintrin.h":60:3 -1
> (nil))
> (insn 7 6 8 2 (clobber (reg:XF 9 st(1))) "./include/mmintrin.h":60:3 -1
> (nil))
> (insn 8 7 9 2 (clobber (reg:XF 10 st(2))) "./include/mmintrin.h":60:3 -1
> (nil))
> (insn 9 8 10 2 (clobber (reg:XF 11 st(3))) "./include/mmintrin.h":60:3 -1
> (nil))
> (insn 10 9 11 2 (clobber (reg:XF 12 st(4))) "./include/mmintrin.h":60:3 -1
> (nil))
> (insn 11 10 12 2 (clobber (reg:XF 13 st(5))) "./include/mmintrin.h":60:3 -1
> (nil))
> (insn 12 11 13 2 (clobber (reg:XF 14 st(6))) "./include/mmintrin.h":60:3 -1
> (nil))
> (insn 13 12 14 2 (clobber (reg:XF 15 st(7))) "./include/mmintrin.h":60:3 -1
> (nil))
> (insn 14 13 15 2 (clobber (reg:DI 28 mm0)) "./include/mmintrin.h":60:3 -1
> (nil))
> (insn 15 14 16 2 (clobber (reg:DI 29 mm1)) "./include/mmintrin.h":60:3 -1
> (nil))
> (insn 16 15 17 2 (clobber (reg:DI 30 mm2)) "./include/mmintrin.h":60:3 -1
> (nil))
> (insn 17 16 18 2 (clobber (reg:DI 31 mm3)) "./include/mmintrin.h":60:3 -1
> (nil))
> (insn 18 17 19 2 (clobber (reg:DI 32 mm4)) "./include/mmintrin.h":60:3 -1
> (nil))
> (insn 19 18 20 2 (clobber (reg:DI 33 mm5)) "./include/mmintrin.h":60:3 -1
> (nil))
> (insn 20 19 21 2 (clobber (reg:DI 34 mm6)) "./include/mmintrin.h":60:3 -1
> (nil))
> (insn 21 20 25 2 (clobber (reg:DI 35 mm7)) "./include/mmintrin.h":60:3 -1
> (nil))
> (insn 25 21 0 2 (const_int 0 [0]) "./include/mmintrin.h":61:1 -1
> (nil))
> ;; succ: EXIT [always] (FALLTHRU)
>
>
>
> --
> H.J.
>
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 28/42] i386: Emulate MMX ssse3_ph<plusminus_mnemonic>wv4hi3 with SSE
2019-02-16 0:34 [PATCH 00/42] V7: Emulate MMX intrinsics with SSE H.J. Lu
` (27 preceding siblings ...)
2019-02-16 0:43 ` [PATCH 27/42] i386: Make _mm_empty () as NOP when MMX is disabled H.J. Lu
@ 2019-02-16 0:43 ` H.J. Lu
2019-02-16 0:43 ` [PATCH 41/42] i386: Enable TM MMX intrinsics with SSE2 H.J. Lu
` (12 subsequent siblings)
41 siblings, 0 replies; 50+ messages in thread
From: H.J. Lu @ 2019-02-16 0:43 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX ssse3_ph<plusminus_mnemonic>wv4hi3 with SSE by moving bits
64:95 to bits 32:63 in SSE register. Only SSE register source operand
is allowed.
PR target/89021
* config/i386/sse.md (ssse3_ph<plusminus_mnemonic>wv4hi3):
Changed to define_insn_and_split to support SSE emulation.
---
gcc/config/i386/sse.md | 34 ++++++++++++++++++++++++++--------
1 file changed, 26 insertions(+), 8 deletions(-)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 06c9b5b58f1..38b83c57ffc 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15232,13 +15232,13 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
-(define_insn "ssse3_ph<plusminus_mnemonic>wv4hi3"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
+(define_insn_and_split "ssse3_ph<plusminus_mnemonic>wv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(vec_concat:V4HI
(vec_concat:V2HI
(ssse3_plusminus:HI
(vec_select:HI
- (match_operand:V4HI 1 "register_operand" "0")
+ (match_operand:V4HI 1 "register_operand" "0,0,Yv")
(parallel [(const_int 0)]))
(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
(ssse3_plusminus:HI
@@ -15247,19 +15247,37 @@
(vec_concat:V2HI
(ssse3_plusminus:HI
(vec_select:HI
- (match_operand:V4HI 2 "nonimmediate_operand" "ym")
+ (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")
(parallel [(const_int 0)]))
(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
(ssse3_plusminus:HI
(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
- "TARGET_SSSE3"
- "ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}"
- [(set_attr "type" "sseiadd")
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
+ "@
+ ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
+ #
+ #"
+ "TARGET_MMX_WITH_SSE && reload_completed"
+ [(const_int 0)]
+{
+ /* Generate SSE version of the operation. */
+ rtx op0 = lowpart_subreg (V8HImode, operands[0],
+ GET_MODE (operands[0]));
+ rtx op1 = lowpart_subreg (V8HImode, operands[1],
+ GET_MODE (operands[1]));
+ rtx op2 = lowpart_subreg (V8HImode, operands[2],
+ GET_MODE (operands[2]));
+ emit_insn (gen_ssse3_ph<plusminus_mnemonic>wv8hi3 (op0, op1, op2));
+ ix86_move_vector_high_sse_to_mmx (op0);
+ DONE;
+}
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "sseiadd")
(set_attr "atom_unit" "complex")
(set_attr "prefix_extra" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
- (set_attr "mode" "DI")])
+ (set_attr "mode" "DI,TI,TI")])
(define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
[(set (match_operand:V8SI 0 "register_operand" "=x")
--
2.20.1
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 41/42] i386: Enable TM MMX intrinsics with SSE2
2019-02-16 0:34 [PATCH 00/42] V7: Emulate MMX intrinsics with SSE H.J. Lu
` (28 preceding siblings ...)
2019-02-16 0:43 ` [PATCH 28/42] i386: Emulate MMX ssse3_ph<plusminus_mnemonic>wv4hi3 with SSE H.J. Lu
@ 2019-02-16 0:43 ` H.J. Lu
2019-02-16 0:43 ` [PATCH 26/42] i386: Emulate MMX umulv1siv1di3 " H.J. Lu
` (11 subsequent siblings)
41 siblings, 0 replies; 50+ messages in thread
From: H.J. Lu @ 2019-02-16 0:43 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
This pach enables TM MMX intrinsics with SSE2 when MMX is disabled.
PR target/89021
* config/i386/i386.c (bdesc_tm): Enable MMX intrinsics with
SSE2.
---
gcc/config/i386/i386.c | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 073a2534d1f..319a98f824a 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -31065,13 +31065,13 @@ static const struct builtin_description bdesc_##kind[] = \
we're lazy. Add casts to make them fit. */
static const struct builtin_description bdesc_tm[] =
{
- { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
- { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
- { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
- { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
- { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
- { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
- { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
+ { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
+ { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
+ { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
+ { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
+ { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
+ { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
+ { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
{ OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
{ OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
@@ -31089,7 +31089,7 @@ static const struct builtin_description bdesc_tm[] =
{ OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
{ OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
- { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
+ { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
{ OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
{ OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
};
--
2.20.1
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 26/42] i386: Emulate MMX umulv1siv1di3 with SSE2
2019-02-16 0:34 [PATCH 00/42] V7: Emulate MMX intrinsics with SSE H.J. Lu
` (29 preceding siblings ...)
2019-02-16 0:43 ` [PATCH 41/42] i386: Enable TM MMX intrinsics with SSE2 H.J. Lu
@ 2019-02-16 0:43 ` H.J. Lu
2019-02-16 0:43 ` [PATCH 31/42] i386: Emulate MMX ssse3_pmulhrswv4hi3 with SSE H.J. Lu
` (10 subsequent siblings)
41 siblings, 0 replies; 50+ messages in thread
From: H.J. Lu @ 2019-02-16 0:43 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX umulv1siv1di3 with SSE2. Only SSE register source operand
is allowed.
PR target/89021
* config/i386/mmx.md (sse2_umulv1siv1di3): Add SSE emulation
support.
(*sse2_umulv1siv1di3): Add SSE2 emulation.
---
gcc/config/i386/mmx.md | 26 ++++++++++++++++----------
1 file changed, 16 insertions(+), 10 deletions(-)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 993ad99a36e..9cf0251293a 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -905,30 +905,36 @@
(mult:V1DI
(zero_extend:V1DI
(vec_select:V1SI
- (match_operand:V2SI 1 "nonimmediate_operand")
+ (match_operand:V2SI 1 "register_mmxmem_operand")
(parallel [(const_int 0)])))
(zero_extend:V1DI
(vec_select:V1SI
- (match_operand:V2SI 2 "nonimmediate_operand")
+ (match_operand:V2SI 2 "register_mmxmem_operand")
(parallel [(const_int 0)])))))]
- "TARGET_SSE2"
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE2"
"ix86_fixup_binary_operands_no_copy (MULT, V2SImode, operands);")
(define_insn "*sse2_umulv1siv1di3"
- [(set (match_operand:V1DI 0 "register_operand" "=y")
+ [(set (match_operand:V1DI 0 "register_operand" "=y,x,Yv")
(mult:V1DI
(zero_extend:V1DI
(vec_select:V1SI
- (match_operand:V2SI 1 "nonimmediate_operand" "%0")
+ (match_operand:V2SI 1 "register_mmxmem_operand" "%0,0,Yv")
(parallel [(const_int 0)])))
(zero_extend:V1DI
(vec_select:V1SI
- (match_operand:V2SI 2 "nonimmediate_operand" "ym")
+ (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv")
(parallel [(const_int 0)])))))]
- "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2SImode, operands)"
- "pmuludq\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxmul")
- (set_attr "mode" "DI")])
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && TARGET_SSE2
+ && ix86_binary_operator_ok (MULT, V2SImode, operands)"
+ "@
+ pmuludq\t{%2, %0|%0, %2}
+ pmuludq\t{%2, %0|%0, %2}
+ vpmuludq\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxmul,ssemul,ssemul")
+ (set_attr "mode" "DI,TI,TI")])
(define_expand "mmx_<code>v4hi3"
[(set (match_operand:V4HI 0 "register_operand")
--
2.20.1
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 31/42] i386: Emulate MMX ssse3_pmulhrswv4hi3 with SSE
2019-02-16 0:34 [PATCH 00/42] V7: Emulate MMX intrinsics with SSE H.J. Lu
` (30 preceding siblings ...)
2019-02-16 0:43 ` [PATCH 26/42] i386: Emulate MMX umulv1siv1di3 " H.J. Lu
@ 2019-02-16 0:43 ` H.J. Lu
2019-02-16 0:43 ` [PATCH 32/42] i386: Emulate MMX pshufb with SSE version H.J. Lu
` (9 subsequent siblings)
41 siblings, 0 replies; 50+ messages in thread
From: H.J. Lu @ 2019-02-16 0:43 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX ssse3_pmulhrswv4hi3 with SSE. Only SSE register source
operand is allowed.
PR target/89021
* config/i386/sse.md (*ssse3_pmulhrswv4hi3): Add SSE emulation.
---
gcc/config/i386/sse.md | 20 +++++++++++++-------
1 file changed, 13 insertions(+), 7 deletions(-)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index dc07173cb1c..80b1a46f507 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15652,25 +15652,31 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "*ssse3_pmulhrswv4hi3"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
+ [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(truncate:V4HI
(lshiftrt:V4SI
(plus:V4SI
(lshiftrt:V4SI
(mult:V4SI
(sign_extend:V4SI
- (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+ (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv"))
(sign_extend:V4SI
- (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+ (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))
(const_int 14))
(match_operand:V4HI 3 "const1_operand"))
(const_int 1))))]
- "TARGET_SSSE3 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
- "pmulhrsw\t{%2, %0|%0, %2}"
- [(set_attr "type" "sseimul")
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && TARGET_SSSE3
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "@
+ pmulhrsw\t{%2, %0|%0, %2}
+ pmulhrsw\t{%2, %0|%0, %2}
+ vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "sseimul")
(set_attr "prefix_extra" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
- (set_attr "mode" "DI")])
+ (set_attr "mode" "DI,TI,TI")])
(define_insn "<ssse3_avx2>_pshufb<mode>3<mask_name>"
[(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
--
2.20.1
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 32/42] i386: Emulate MMX pshufb with SSE version
2019-02-16 0:34 [PATCH 00/42] V7: Emulate MMX intrinsics with SSE H.J. Lu
` (31 preceding siblings ...)
2019-02-16 0:43 ` [PATCH 31/42] i386: Emulate MMX ssse3_pmulhrswv4hi3 with SSE H.J. Lu
@ 2019-02-16 0:43 ` H.J. Lu
2019-02-16 0:43 ` [PATCH 37/42] Prevent allocation of MMX registers with TARGET_MMX_WITH_SSE H.J. Lu
` (8 subsequent siblings)
41 siblings, 0 replies; 50+ messages in thread
From: H.J. Lu @ 2019-02-16 0:43 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX version of pshufb with SSE version by masking out the bit 3
of the shuffle control byte. Only SSE register source operand is allowed.
PR target/89021
* config/i386/sse.md (ssse3_pshufbv8qi3): Changed to
define_insn_and_split. Also allow TARGET_MMX_WITH_SSE. Add
SSE emulation.
---
gcc/config/i386/sse.md | 46 +++++++++++++++++++++++++++++++++---------
1 file changed, 37 insertions(+), 9 deletions(-)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 80b1a46f507..704e211c0b8 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15697,17 +15697,45 @@
(set_attr "btver2_decode" "vector")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "ssse3_pshufbv8qi3"
- [(set (match_operand:V8QI 0 "register_operand" "=y")
- (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
- (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
- UNSPEC_PSHUFB))]
- "TARGET_SSSE3"
- "pshufb\t{%2, %0|%0, %2}";
- [(set_attr "type" "sselog1")
+(define_insn_and_split "ssse3_pshufbv8qi3"
+ [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
+ (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yv")
+ (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")]
+ UNSPEC_PSHUFB))
+ (clobber (match_scratch:V4SI 3 "=X,x,Yv"))]
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
+ "@
+ pshufb\t{%2, %0|%0, %2}
+ #
+ #"
+ "TARGET_MMX_WITH_SSE && reload_completed"
+ [(set (match_dup 3) (match_dup 5))
+ (set (match_dup 3)
+ (and:V4SI (match_dup 3) (match_dup 2)))
+ (set (match_dup 0)
+ (unspec:V16QI [(match_dup 1) (match_dup 4)] UNSPEC_PSHUFB))]
+{
+ /* Emulate MMX version of pshufb with SSE version by masking out the
+ bit 3 of the shuffle control byte. */
+ operands[0] = lowpart_subreg (V16QImode, operands[0],
+ GET_MODE (operands[0]));
+ operands[1] = lowpart_subreg (V16QImode, operands[1],
+ GET_MODE (operands[1]));
+ operands[2] = lowpart_subreg (V4SImode, operands[2],
+ GET_MODE (operands[2]));
+ operands[4] = lowpart_subreg (V16QImode, operands[3],
+ GET_MODE (operands[3]));
+ rtvec par = gen_rtvec (4, GEN_INT (0xf7f7f7f7),
+ GEN_INT (0xf7f7f7f7),
+ GEN_INT (0xf7f7f7f7),
+ GEN_INT (0xf7f7f7f7));
+ rtx vec_const = gen_rtx_CONST_VECTOR (V4SImode, par);
+ operands[5] = force_const_mem (V4SImode, vec_const);
+}
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
(set_attr "prefix_extra" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
- (set_attr "mode" "DI")])
+ (set_attr "mode" "DI,TI,TI")])
(define_insn "<ssse3_avx2>_psign<mode>3"
[(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
--
2.20.1
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 37/42] Prevent allocation of MMX registers with TARGET_MMX_WITH_SSE
2019-02-16 0:34 [PATCH 00/42] V7: Emulate MMX intrinsics with SSE H.J. Lu
` (32 preceding siblings ...)
2019-02-16 0:43 ` [PATCH 32/42] i386: Emulate MMX pshufb with SSE version H.J. Lu
@ 2019-02-16 0:43 ` H.J. Lu
2019-02-16 0:43 ` [PATCH 40/42] i386: Allow MMX intrinsic emulation with SSE H.J. Lu
` (7 subsequent siblings)
41 siblings, 0 replies; 50+ messages in thread
From: H.J. Lu @ 2019-02-16 0:43 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
From: Uros Bizjak <ubizjak@gmail.com>
2019-02-14 Uroš Bizjak <ubizjak@gmail.com>
PR target/89021
* config/i386/i386.md (*zero_extendsidi2): Add mmx_isa attribute.
* config/i386/sse.md (*vec_concatv2sf_sse4_1): Ditto.
(*vec_concatv2sf_sse): Ditto.
(*vec_concatv2si_sse4_1): Ditto.
(*vec_concatv2si): Ditto.
(*vec_concatv4si_0): Ditto.
(*vec_concatv2di_0): Ditto.
---
gcc/config/i386/i386.md | 4 ++++
gcc/config/i386/sse.md | 16 ++++++++++++++--
2 files changed, 18 insertions(+), 2 deletions(-)
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index e1727676deb..22172fd77a8 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -3682,6 +3682,10 @@
(const_string "avx512bw")
]
(const_string "*")))
+ (set (attr "mmx_isa")
+ (if_then_else (eq_attr "alternative" "5,6")
+ (const_string "native")
+ (const_string "*")))
(set (attr "type")
(cond [(eq_attr "alternative" "0,1,2,4")
(const_string "multi")
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 379da16615d..b6196b088fd 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -7201,6 +7201,10 @@
(const_string "mmxmov")
]
(const_string "sselog")))
+ (set (attr "mmx_isa")
+ (if_then_else (eq_attr "alternative" "7,8")
+ (const_string "native")
+ (const_string "*")))
(set (attr "prefix_data16")
(if_then_else (eq_attr "alternative" "3,4")
(const_string "1")
@@ -7236,7 +7240,8 @@
movss\t{%1, %0|%0, %1}
punpckldq\t{%2, %0|%0, %2}
movd\t{%1, %0|%0, %1}"
- [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
+ [(set_attr "mmx_isa" "*,*,native,native")
+ (set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
(set_attr "mode" "V4SF,SF,DI,DI")])
(define_insn "*vec_concatv4sf"
@@ -14509,6 +14514,10 @@
punpckldq\t{%2, %0|%0, %2}
movd\t{%1, %0|%0, %1}"
[(set_attr "isa" "noavx,noavx,avx,avx512dq,noavx,noavx,avx,*,*,*")
+ (set (attr "mmx_isa")
+ (if_then_else (eq_attr "alternative" "8,9")
+ (const_string "native")
+ (const_string "*")))
(set (attr "type")
(cond [(eq_attr "alternative" "7")
(const_string "ssemov")
@@ -14546,6 +14555,7 @@
punpckldq\t{%2, %0|%0, %2}
movd\t{%1, %0|%0, %1}"
[(set_attr "isa" "sse2,sse2,*,*,*,*")
+ (set_attr "mmx_isa" "*,*,*,*,native,native")
(set_attr "type" "sselog,ssemov,sselog,ssemov,mmxcvt,mmxmov")
(set_attr "mode" "TI,TI,V4SF,SF,DI,DI")])
@@ -14575,7 +14585,8 @@
"@
%vmovq\t{%1, %0|%0, %1}
movq2dq\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssemov")
+ [(set_attr "mmx_isa" "*,native")
+ (set_attr "type" "ssemov")
(set_attr "prefix" "maybe_vex,orig")
(set_attr "mode" "TI")])
@@ -14650,6 +14661,7 @@
%vmovq\t{%1, %0|%0, %1}
movq2dq\t{%1, %0|%0, %1}"
[(set_attr "isa" "x64,*,*")
+ (set_attr "mmx_isa" "*,*,native")
(set_attr "type" "ssemov")
(set_attr "prefix_rex" "1,*,*")
(set_attr "prefix" "maybe_vex,maybe_vex,orig")
--
2.20.1
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 40/42] i386: Allow MMX intrinsic emulation with SSE
2019-02-16 0:34 [PATCH 00/42] V7: Emulate MMX intrinsics with SSE H.J. Lu
` (33 preceding siblings ...)
2019-02-16 0:43 ` [PATCH 37/42] Prevent allocation of MMX registers with TARGET_MMX_WITH_SSE H.J. Lu
@ 2019-02-16 0:43 ` H.J. Lu
2019-02-16 0:43 ` [PATCH 12/42] i386: Emulate MMX vec_dupv2si " H.J. Lu
` (6 subsequent siblings)
41 siblings, 0 replies; 50+ messages in thread
From: H.J. Lu @ 2019-02-16 0:43 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Allow MMX intrinsic emulation with SSE/SSE2/SSSE3. Don't enable MMX ISA
by default with TARGET_MMX_WITH_SSE.
For pr82483-1.c and pr82483-2.c, "-mssse3 -mno-mmx" compiles in 64-bit
mode since MMX intrinsics can be emulated wit SSE.
gcc/
PR target/89021
* config/i386/i386-builtin.def: Enable MMX intrinsics with
SSE/SSE2/SSSE3.
* config/i386/i386.c (ix86_init_mmx_sse_builtins): Likewise.
(ix86_expand_builtin): Allow SSE/SSE2/SSSE3 to emulate MMX
intrinsics with TARGET_MMX_WITH_SSE.
* config/i386/mmintrin.h: Only require SSE2 if __MMX_WITH_SSE__
is defined.
gcc/testsuite/
PR target/89021
* gcc.target/i386/pr82483-1.c: Error only on ia32.
* gcc.target/i386/pr82483-2.c: Likewise.
---
gcc/config/i386/i386-builtin.def | 126 +++++++++++-----------
gcc/config/i386/i386.c | 29 ++++-
gcc/config/i386/mmintrin.h | 12 ++-
gcc/testsuite/gcc.target/i386/pr82483-1.c | 2 +-
gcc/testsuite/gcc.target/i386/pr82483-2.c | 2 +-
5 files changed, 101 insertions(+), 70 deletions(-)
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 88005f4687f..10a9d631f29 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -100,7 +100,7 @@ BDESC (0, 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKN
BDESC (0, 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID)
/* MMX */
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID)
/* 3DNow! */
BDESC (OPTION_MASK_ISA_3DNOW, 0, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID)
@@ -442,68 +442,68 @@ BDESC (0, 0, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNO
BDESC (0, 0, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT)
/* MMX */
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI)
-
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI)
-
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT)
-
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT)
-
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
+
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
+
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
+
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
+
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
+
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
+
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI)
+
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI)
+
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT)
+
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT)
+
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT)
/* 3DNow! */
BDESC (OPTION_MASK_ISA_3DNOW, 0, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 25e0dc43a9e..073a2534d1f 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -31764,14 +31764,17 @@ ix86_init_mmx_sse_builtins (void)
VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
/* MMX access to the vec_init patterns. */
- def_builtin_const (OPTION_MASK_ISA_MMX, 0, "__builtin_ia32_vec_init_v2si",
+ def_builtin_const (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
+ "__builtin_ia32_vec_init_v2si",
V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
- def_builtin_const (OPTION_MASK_ISA_MMX, 0, "__builtin_ia32_vec_init_v4hi",
+ def_builtin_const (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
+ "__builtin_ia32_vec_init_v4hi",
V4HI_FTYPE_HI_HI_HI_HI,
IX86_BUILTIN_VEC_INIT_V4HI);
- def_builtin_const (OPTION_MASK_ISA_MMX, 0, "__builtin_ia32_vec_init_v8qi",
+ def_builtin_const (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
+ "__builtin_ia32_vec_init_v8qi",
V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
IX86_BUILTIN_VEC_INIT_V8QI);
@@ -31793,7 +31796,8 @@ ix86_init_mmx_sse_builtins (void)
"__builtin_ia32_vec_ext_v4hi",
HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
- def_builtin_const (OPTION_MASK_ISA_MMX, 0, "__builtin_ia32_vec_ext_v2si",
+ def_builtin_const (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0,
+ "__builtin_ia32_vec_ext_v2si",
SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
def_builtin_const (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_vec_ext_v16qi",
@@ -36926,6 +36930,23 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
== (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4))
&& (isa & (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4)) != 0)
isa |= (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4);
+ /* Use SSE/SSE2/SSSE3 to emulate MMX intrinsics in 64-bit mode when
+ MMX is disabled. */
+ if (TARGET_MMX_WITH_SSE)
+ {
+ if (((bisa & (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX))
+ == (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX))
+ && (isa & (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX)) != 0)
+ isa |= (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX);
+ if (((bisa & (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_MMX))
+ == (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_MMX))
+ && (isa & (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_MMX)) != 0)
+ isa |= (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_MMX);
+ if (((bisa & (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX))
+ == (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX))
+ && (isa & (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX)) != 0)
+ isa |= (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX);
+ }
if ((bisa & isa) != bisa || (bisa2 & isa2) != bisa2)
{
char *opts = ix86_target_string (bisa, bisa2, 0, 0, NULL, NULL,
diff --git a/gcc/config/i386/mmintrin.h b/gcc/config/i386/mmintrin.h
index 238b3df3121..c4b2e0c7b25 100644
--- a/gcc/config/i386/mmintrin.h
+++ b/gcc/config/i386/mmintrin.h
@@ -29,7 +29,9 @@
#if defined __x86_64__ && !defined __SSE__ || !defined __MMX__
#pragma GCC push_options
-#ifdef __x86_64__
+#ifdef __MMX_WITH_SSE__
+#pragma GCC target("sse2")
+#elif defined __x86_64__
#pragma GCC target("sse,mmx")
#else
#pragma GCC target("mmx")
@@ -315,7 +317,11 @@ _m_paddd (__m64 __m1, __m64 __m2)
/* Add the 64-bit values in M1 to the 64-bit values in M2. */
#ifndef __SSE2__
#pragma GCC push_options
+#ifdef __MMX_WITH_SSE__
+#pragma GCC target("sse2")
+#else
#pragma GCC target("sse2,mmx")
+#endif
#define __DISABLE_SSE2__
#endif /* __SSE2__ */
@@ -427,7 +433,11 @@ _m_psubd (__m64 __m1, __m64 __m2)
/* Add the 64-bit values in M1 to the 64-bit values in M2. */
#ifndef __SSE2__
#pragma GCC push_options
+#ifdef __MMX_WITH_SSE__
+#pragma GCC target("sse2")
+#else
#pragma GCC target("sse2,mmx")
+#endif
#define __DISABLE_SSE2__
#endif /* __SSE2__ */
diff --git a/gcc/testsuite/gcc.target/i386/pr82483-1.c b/gcc/testsuite/gcc.target/i386/pr82483-1.c
index 59a59dc8dfe..b2028d8dc5e 100644
--- a/gcc/testsuite/gcc.target/i386/pr82483-1.c
+++ b/gcc/testsuite/gcc.target/i386/pr82483-1.c
@@ -1,7 +1,7 @@
/* PR target/82483 */
/* { dg-do compile } */
/* { dg-options "-mssse3 -mno-mmx -Wno-psabi" } */
-/* { dg-error "needs isa option" "" { target *-*-* } 0 } */
+/* { dg-error "needs isa option" "" { target ia32 } 0 } */
#include <x86intrin.h>
diff --git a/gcc/testsuite/gcc.target/i386/pr82483-2.c b/gcc/testsuite/gcc.target/i386/pr82483-2.c
index 305ddbd6c64..c92de405cb3 100644
--- a/gcc/testsuite/gcc.target/i386/pr82483-2.c
+++ b/gcc/testsuite/gcc.target/i386/pr82483-2.c
@@ -1,7 +1,7 @@
/* PR target/82483 */
/* { dg-do compile } */
/* { dg-options "-mssse3 -mno-mmx -Wno-psabi" } */
-/* { dg-error "needs isa option" "" { target *-*-* } 0 } */
+/* { dg-error "needs isa option" "" { target ia32 } 0 } */
#include <x86intrin.h>
--
2.20.1
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 12/42] i386: Emulate MMX vec_dupv2si with SSE
2019-02-16 0:34 [PATCH 00/42] V7: Emulate MMX intrinsics with SSE H.J. Lu
` (34 preceding siblings ...)
2019-02-16 0:43 ` [PATCH 40/42] i386: Allow MMX intrinsic emulation with SSE H.J. Lu
@ 2019-02-16 0:43 ` H.J. Lu
2019-02-16 0:43 ` [PATCH 38/42] i386: Allow MMXMODE moves with TARGET_MMX_WITH_SSE H.J. Lu
` (5 subsequent siblings)
41 siblings, 0 replies; 50+ messages in thread
From: H.J. Lu @ 2019-02-16 0:43 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX vec_dupv2si with SSE. Add the "Yw" constraint to allow
broadcast from integer register for AVX512BW with TARGET_AVX512VL.
Only SSE register source operand is allowed.
PR target/89021
* config/i386/constraints.md (Yw): New constraint.
* config/i386/mmx.md (*vec_dupv2si): Changed to
define_insn_and_split and also allow TARGET_MMX_WITH_SSE to
support SSE emulation.
---
gcc/config/i386/constraints.md | 6 ++++++
gcc/config/i386/mmx.md | 24 +++++++++++++++++-------
2 files changed, 23 insertions(+), 7 deletions(-)
diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md
index 16075b4acf3..c546b20d9dc 100644
--- a/gcc/config/i386/constraints.md
+++ b/gcc/config/i386/constraints.md
@@ -110,6 +110,8 @@
;; v any EVEX encodable SSE register for AVX512VL target,
;; otherwise any SSE register
;; h EVEX encodable SSE register with number factor of four
+;; w any EVEX encodable SSE register for AVX512BW with TARGET_AVX512VL
+;; target.
(define_register_constraint "Yz" "TARGET_SSE ? SSE_FIRST_REG : NO_REGS"
"First SSE register (@code{%xmm0}).")
@@ -146,6 +148,10 @@
"TARGET_AVX512VL ? ALL_SSE_REGS : TARGET_SSE ? SSE_REGS : NO_REGS"
"@internal For AVX512VL, any EVEX encodable SSE register (@code{%xmm0-%xmm31}), otherwise any SSE register.")
+(define_register_constraint "Yw"
+ "TARGET_AVX512BW && TARGET_AVX512VL ? ALL_SSE_REGS : NO_REGS"
+ "@internal Any EVEX encodable SSE register (@code{%xmm0-%xmm31}) for AVX512BW with TARGET_AVX512VL target.")
+
;; We use the B prefix to denote any number of internal operands:
;; f FLAGS_REG
;; g GOT memory operand.
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index b0c6a8c8077..d568a534956 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1381,14 +1381,24 @@
(set_attr "length_immediate" "1")
(set_attr "mode" "DI")])
-(define_insn "*vec_dupv2si"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
+(define_insn_and_split "*vec_dupv2si"
+ [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv,Yw")
(vec_duplicate:V2SI
- (match_operand:SI 1 "register_operand" "0")))]
- "TARGET_MMX"
- "punpckldq\t%0, %0"
- [(set_attr "type" "mmxcvt")
- (set_attr "mode" "DI")])
+ (match_operand:SI 1 "register_operand" "0,0,Yv,r")))]
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "@
+ punpckldq\t%0, %0
+ #
+ #
+ #"
+ "TARGET_MMX_WITH_SSE && reload_completed"
+ [(set (match_dup 0)
+ (vec_duplicate:V4SI (match_dup 1)))]
+ "operands[0] = lowpart_subreg (V4SImode, operands[0],
+ GET_MODE (operands[0]));"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx,x64_avx")
+ (set_attr "type" "mmxcvt,ssemov,ssemov,ssemov")
+ (set_attr "mode" "DI,TI,TI,TI")])
(define_insn "*mmx_concatv2si"
[(set (match_operand:V2SI 0 "register_operand" "=y,y")
--
2.20.1
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 38/42] i386: Allow MMXMODE moves with TARGET_MMX_WITH_SSE
2019-02-16 0:34 [PATCH 00/42] V7: Emulate MMX intrinsics with SSE H.J. Lu
` (35 preceding siblings ...)
2019-02-16 0:43 ` [PATCH 12/42] i386: Emulate MMX vec_dupv2si " H.J. Lu
@ 2019-02-16 0:43 ` H.J. Lu
2019-02-16 0:43 ` [PATCH 22/42] i386: Emulate MMX mmx_uavgv8qi3 with SSE H.J. Lu
` (4 subsequent siblings)
41 siblings, 0 replies; 50+ messages in thread
From: H.J. Lu @ 2019-02-16 0:43 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
PR target/89021
* config/i386/mmx.md (MMXMODE:mov<mode>): Also allow
TARGET_MMX_WITH_SSE.
(MMXMODE:*mov<mode>_internal): Likewise.
(MMXMODE:movmisalign<mode>): Likewise.
---
gcc/config/i386/mmx.md | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 0f925c0b1ea..a21e11c8dfb 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -70,7 +70,7 @@
(define_expand "mov<mode>"
[(set (match_operand:MMXMODE 0 "nonimmediate_operand")
(match_operand:MMXMODE 1 "nonimmediate_operand"))]
- "TARGET_MMX"
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
{
ix86_expand_vector_move (<MODE>mode, operands);
DONE;
@@ -81,7 +81,7 @@
"=r ,o ,r,r ,m ,?!y,!y,?!y,m ,r ,?!y,v,v,v,m,r,v,!y,*x")
(match_operand:MMXMODE 1 "nonimm_or_0_operand"
"rCo,rC,C,rm,rC,C ,!y,m ,?!y,?!y,r ,C,v,m,v,v,r,*x,!y"))]
- "TARGET_MMX
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& !(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
switch (get_attr_type (insn))
@@ -207,7 +207,7 @@
(define_expand "movmisalign<mode>"
[(set (match_operand:MMXMODE 0 "nonimmediate_operand")
(match_operand:MMXMODE 1 "nonimmediate_operand"))]
- "TARGET_MMX"
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
{
ix86_expand_vector_move (<MODE>mode, operands);
DONE;
--
2.20.1
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 22/42] i386: Emulate MMX mmx_uavgv8qi3 with SSE
2019-02-16 0:34 [PATCH 00/42] V7: Emulate MMX intrinsics with SSE H.J. Lu
` (36 preceding siblings ...)
2019-02-16 0:43 ` [PATCH 38/42] i386: Allow MMXMODE moves with TARGET_MMX_WITH_SSE H.J. Lu
@ 2019-02-16 0:43 ` H.J. Lu
2019-02-16 0:43 ` [PATCH 39/42] i386: Allow MMX vector expanders with TARGET_MMX_WITH_SSE H.J. Lu
` (3 subsequent siblings)
41 siblings, 0 replies; 50+ messages in thread
From: H.J. Lu @ 2019-02-16 0:43 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX mmx_uavgv8qi3 with SSE. Only SSE register source operand is
allowed.
PR target/89021
* config/i386/mmx.md (mmx_uavgv8qi3): Also check TARGET_MMX
and TARGET_MMX_WITH_SSE.
(*mmx_uavgv8qi3): Add SSE emulation.
---
gcc/config/i386/mmx.md | 25 +++++++++++++++----------
1 file changed, 15 insertions(+), 10 deletions(-)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index d78c6a31962..570153521a1 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1678,50 +1678,55 @@
(plus:V8HI
(plus:V8HI
(zero_extend:V8HI
- (match_operand:V8QI 1 "nonimmediate_operand"))
+ (match_operand:V8QI 1 "register_mmxmem_operand"))
(zero_extend:V8HI
- (match_operand:V8QI 2 "nonimmediate_operand")))
+ (match_operand:V8QI 2 "register_mmxmem_operand")))
(const_vector:V8HI [(const_int 1) (const_int 1)
(const_int 1) (const_int 1)
(const_int 1) (const_int 1)
(const_int 1) (const_int 1)]))
(const_int 1))))]
- "TARGET_SSE || TARGET_3DNOW"
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && (TARGET_SSE || TARGET_3DNOW_A)"
"ix86_fixup_binary_operands_no_copy (PLUS, V8QImode, operands);")
(define_insn "*mmx_uavgv8qi3"
- [(set (match_operand:V8QI 0 "register_operand" "=y")
+ [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
(truncate:V8QI
(lshiftrt:V8HI
(plus:V8HI
(plus:V8HI
(zero_extend:V8HI
- (match_operand:V8QI 1 "nonimmediate_operand" "%0"))
+ (match_operand:V8QI 1 "register_mmxmem_operand" "%0,0,Yv"))
(zero_extend:V8HI
- (match_operand:V8QI 2 "nonimmediate_operand" "ym")))
+ (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")))
(const_vector:V8HI [(const_int 1) (const_int 1)
(const_int 1) (const_int 1)
(const_int 1) (const_int 1)
(const_int 1) (const_int 1)]))
(const_int 1))))]
- "(TARGET_SSE || TARGET_3DNOW)
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && (TARGET_SSE || TARGET_3DNOW_A)
&& ix86_binary_operator_ok (PLUS, V8QImode, operands)"
{
/* These two instructions have the same operation, but their encoding
is different. Prefer the one that is de facto standard. */
- if (TARGET_SSE || TARGET_3DNOW_A)
+ if (TARGET_MMX_WITH_SSE && TARGET_AVX)
+ return "vpavgb\t{%2, %1, %0|%0, %1, %2}";
+ else if (TARGET_SSE || TARGET_3DNOW_A)
return "pavgb\t{%2, %0|%0, %2}";
else
return "pavgusb\t{%2, %0|%0, %2}";
}
- [(set_attr "type" "mmxshft")
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxshft,sseiadd,sseiadd")
(set (attr "prefix_extra")
(if_then_else
(not (ior (match_test "TARGET_SSE")
(match_test "TARGET_3DNOW_A")))
(const_string "1")
(const_string "*")))
- (set_attr "mode" "DI")])
+ (set_attr "mode" "DI,TI,TI")])
(define_expand "mmx_uavgv4hi3"
[(set (match_operand:V4HI 0 "register_operand")
--
2.20.1
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 39/42] i386: Allow MMX vector expanders with TARGET_MMX_WITH_SSE
2019-02-16 0:34 [PATCH 00/42] V7: Emulate MMX intrinsics with SSE H.J. Lu
` (37 preceding siblings ...)
2019-02-16 0:43 ` [PATCH 22/42] i386: Emulate MMX mmx_uavgv8qi3 with SSE H.J. Lu
@ 2019-02-16 0:43 ` H.J. Lu
2019-02-16 9:50 ` Uros Bizjak
2019-02-16 0:43 ` [PATCH 21/42] i386: Emulate MMX maskmovq with SSE2 maskmovdqu H.J. Lu
` (2 subsequent siblings)
41 siblings, 1 reply; 50+ messages in thread
From: H.J. Lu @ 2019-02-16 0:43 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
PR target/89021
* config/i386/i386.c (ix86_expand_vector_init_duplicate): Set
mmx_ok to true if TARGET_MMX_WITH_SSE is true.
(ix86_expand_vector_init_one_nonzero): Likewise.
(ix86_expand_vector_init_one_var): Likewise.
(ix86_expand_vector_init_general): Likewise.
(ix86_expand_vector_init): Likewise.
(ix86_expand_vector_set): Likewise.
(ix86_expand_vector_extract): Likewise.
* config/i386/mmx.md (*vec_dupv2sf): Changed to
define_insn_and_split to support SSE emulation.
(*vec_extractv2sf_0): Likewise.
(*vec_extractv2sf_1): Likewise.
(*vec_extractv2si_0): Likewise.
(*vec_extractv2si_1): Likewise.
(*vec_extractv2si_zext_mem): Likewise.
(vec_setv2sf): Also allow TARGET_MMX_WITH_SSE.
(vec_extractv2sf_1 splitter): Likewise.
(vec_extractv2sfsf): Likewise.
(vec_setv2si): Likewise.
(vec_extractv2si_1 splitter): Likewise.
(vec_extractv2sisi): Likewise.
(vec_setv4hi): Likewise.
(vec_extractv4hihi): Likewise.
(vec_setv8qi): Likewise.
(vec_extractv8qiqi): Likewise.
---
gcc/config/i386/i386.c | 8 +++++
gcc/config/i386/mmx.md | 69 +++++++++++++++++++++++++++---------------
2 files changed, 52 insertions(+), 25 deletions(-)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index a76c17beece..25e0dc43a9e 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -42620,6 +42620,7 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
{
bool ok;
+ mmx_ok |= TARGET_MMX_WITH_SSE;
switch (mode)
{
case E_V2SImode:
@@ -42779,6 +42780,7 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
bool use_vector_set = false;
rtx (*gen_vec_set_0) (rtx, rtx, rtx) = NULL;
+ mmx_ok |= TARGET_MMX_WITH_SSE;
switch (mode)
{
case E_V2DImode:
@@ -42972,6 +42974,7 @@ ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
+ mmx_ok |= TARGET_MMX_WITH_SSE;
switch (mode)
{
case E_V2DFmode:
@@ -43357,6 +43360,7 @@ ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
machine_mode quarter_mode = VOIDmode;
int n, i;
+ mmx_ok |= TARGET_MMX_WITH_SSE;
switch (mode)
{
case E_V2SFmode:
@@ -43556,6 +43560,8 @@ ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
int i;
rtx x;
+ mmx_ok |= TARGET_MMX_WITH_SSE;
+
/* Handle first initialization from vector elts. */
if (n_elts != XVECLEN (vals, 0))
{
@@ -43655,6 +43661,7 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
machine_mode mmode = VOIDmode;
rtx (*gen_blendm) (rtx, rtx, rtx, rtx);
+ mmx_ok |= TARGET_MMX_WITH_SSE;
switch (mode)
{
case E_V2SFmode:
@@ -44010,6 +44017,7 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
bool use_vec_extr = false;
rtx tmp;
+ mmx_ok |= TARGET_MMX_WITH_SSE;
switch (mode)
{
case E_V2SImode:
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index a21e11c8dfb..fa0b0126e91 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -555,14 +555,23 @@
(set_attr "prefix_extra" "1")
(set_attr "mode" "V2SF")])
-(define_insn "*vec_dupv2sf"
- [(set (match_operand:V2SF 0 "register_operand" "=y")
+(define_insn_and_split "*vec_dupv2sf"
+ [(set (match_operand:V2SF 0 "register_operand" "=y,x,Yv")
(vec_duplicate:V2SF
- (match_operand:SF 1 "register_operand" "0")))]
- "TARGET_MMX"
- "punpckldq\t%0, %0"
- [(set_attr "type" "mmxcvt")
- (set_attr "mode" "DI")])
+ (match_operand:SF 1 "register_operand" "0,0,Yv")))]
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "@
+ punpckldq\t%0, %0
+ #
+ #"
+ "TARGET_MMX_WITH_SSE && reload_completed"
+ [(set (match_dup 0)
+ (vec_duplicate:V4SF (match_dup 1)))]
+ "operands[0] = lowpart_subreg (V4SFmode, operands[0],
+ GET_MODE (operands[0]));"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxcvt,ssemov,ssemov")
+ (set_attr "mode" "DI,TI,TI")])
(define_insn "*mmx_concatv2sf"
[(set (match_operand:V2SF 0 "register_operand" "=y,y")
@@ -580,7 +589,7 @@
[(match_operand:V2SF 0 "register_operand")
(match_operand:SF 1 "register_operand")
(match_operand 2 "const_int_operand")]
- "TARGET_MMX"
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
{
ix86_expand_vector_set (false, operands[0], operands[1],
INTVAL (operands[2]));
@@ -594,11 +603,13 @@
(vec_select:SF
(match_operand:V2SF 1 "nonimmediate_operand" " xm,x,ym,y,m,m")
(parallel [(const_int 0)])))]
- "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"#"
"&& reload_completed"
[(set (match_dup 0) (match_dup 1))]
- "operands[1] = gen_lowpart (SFmode, operands[1]);")
+ "operands[1] = gen_lowpart (SFmode, operands[1]);"
+ [(set_attr "mmx_isa" "*,*,native,native,*,*")])
;; Avoid combining registers from different units in a single alternative,
;; see comment above inline_secondary_memory_needed function in i386.c
@@ -607,7 +618,8 @@
(vec_select:SF
(match_operand:V2SF 1 "nonimmediate_operand" " 0,x,x,o,o,o,o")
(parallel [(const_int 1)])))]
- "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"@
punpckhdq\t%0, %0
%vmovshdup\t{%1, %0|%0, %1}
@@ -617,6 +629,7 @@
#
#"
[(set_attr "isa" "*,sse3,noavx,*,*,*,*")
+ (set_attr "mmx_isa" "native,*,*,native,*,*,*")
(set_attr "type" "mmxcvt,sse,sseshuf1,mmxmov,ssemov,fmov,imov")
(set (attr "length_immediate")
(if_then_else (eq_attr "alternative" "2")
@@ -634,7 +647,7 @@
(vec_select:SF
(match_operand:V2SF 1 "memory_operand")
(parallel [(const_int 1)])))]
- "TARGET_MMX && reload_completed"
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE) && reload_completed"
[(set (match_dup 0) (match_dup 1))]
"operands[1] = adjust_address (operands[1], SFmode, 4);")
@@ -642,7 +655,7 @@
[(match_operand:SF 0 "register_operand")
(match_operand:V2SF 1 "register_operand")
(match_operand 2 "const_int_operand")]
- "TARGET_MMX"
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
{
ix86_expand_vector_extract (false, operands[0], operands[1],
INTVAL (operands[2]));
@@ -1526,7 +1539,7 @@
[(match_operand:V2SI 0 "register_operand")
(match_operand:SI 1 "register_operand")
(match_operand 2 "const_int_operand")]
- "TARGET_MMX"
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
{
ix86_expand_vector_set (false, operands[0], operands[1],
INTVAL (operands[2]));
@@ -1540,11 +1553,13 @@
(vec_select:SI
(match_operand:V2SI 1 "nonimmediate_operand" "xm,x,ym,y,m")
(parallel [(const_int 0)])))]
- "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"#"
"&& reload_completed"
[(set (match_dup 0) (match_dup 1))]
- "operands[1] = gen_lowpart (SImode, operands[1]);")
+ "operands[1] = gen_lowpart (SImode, operands[1]);"
+ [(set_attr "mmx_isa" "*,*,native,native,*")])
;; Avoid combining registers from different units in a single alternative,
;; see comment above inline_secondary_memory_needed function in i386.c
@@ -1553,7 +1568,8 @@
(vec_select:SI
(match_operand:V2SI 1 "nonimmediate_operand" " 0,x,x,o,o,o")
(parallel [(const_int 1)])))]
- "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"@
punpckhdq\t%0, %0
%vpshufd\t{$0xe5, %1, %0|%0, %1, 0xe5}
@@ -1562,6 +1578,7 @@
#
#"
[(set_attr "isa" "*,sse2,noavx,*,*,*")
+ (set_attr "mmx_isa" "native,*,*,native,*,*")
(set_attr "type" "mmxcvt,sseshuf1,sseshuf1,mmxmov,ssemov,imov")
(set (attr "length_immediate")
(if_then_else (eq_attr "alternative" "1,2")
@@ -1575,7 +1592,7 @@
(vec_select:SI
(match_operand:V2SI 1 "memory_operand")
(parallel [(const_int 1)])))]
- "TARGET_MMX && reload_completed"
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE) && reload_completed"
[(set (match_dup 0) (match_dup 1))]
"operands[1] = adjust_address (operands[1], SImode, 4);")
@@ -1585,19 +1602,21 @@
(vec_select:SI
(match_operand:V2SI 1 "memory_operand" "o,o,o")
(parallel [(match_operand:SI 2 "const_0_to_1_operand")]))))]
- "TARGET_64BIT && TARGET_MMX"
+ "TARGET_64BIT"
"#"
"&& reload_completed"
[(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
{
operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
-})
+}
+ [(set_attr "isa" "*,sse2,*")
+ (set_attr "mmx_isa" "native,*,*")])
(define_expand "vec_extractv2sisi"
[(match_operand:SI 0 "register_operand")
(match_operand:V2SI 1 "register_operand")
(match_operand 2 "const_int_operand")]
- "TARGET_MMX"
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
{
ix86_expand_vector_extract (false, operands[0], operands[1],
INTVAL (operands[2]));
@@ -1617,7 +1636,7 @@
[(match_operand:V4HI 0 "register_operand")
(match_operand:HI 1 "register_operand")
(match_operand 2 "const_int_operand")]
- "TARGET_MMX"
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
{
ix86_expand_vector_set (false, operands[0], operands[1],
INTVAL (operands[2]));
@@ -1628,7 +1647,7 @@
[(match_operand:HI 0 "register_operand")
(match_operand:V4HI 1 "register_operand")
(match_operand 2 "const_int_operand")]
- "TARGET_MMX"
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
{
ix86_expand_vector_extract (false, operands[0], operands[1],
INTVAL (operands[2]));
@@ -1648,7 +1667,7 @@
[(match_operand:V8QI 0 "register_operand")
(match_operand:QI 1 "register_operand")
(match_operand 2 "const_int_operand")]
- "TARGET_MMX"
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
{
ix86_expand_vector_set (false, operands[0], operands[1],
INTVAL (operands[2]));
@@ -1659,7 +1678,7 @@
[(match_operand:QI 0 "register_operand")
(match_operand:V8QI 1 "register_operand")
(match_operand 2 "const_int_operand")]
- "TARGET_MMX"
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
{
ix86_expand_vector_extract (false, operands[0], operands[1],
INTVAL (operands[2]));
--
2.20.1
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [PATCH 39/42] i386: Allow MMX vector expanders with TARGET_MMX_WITH_SSE
2019-02-16 0:43 ` [PATCH 39/42] i386: Allow MMX vector expanders with TARGET_MMX_WITH_SSE H.J. Lu
@ 2019-02-16 9:50 ` Uros Bizjak
0 siblings, 0 replies; 50+ messages in thread
From: Uros Bizjak @ 2019-02-16 9:50 UTC (permalink / raw)
To: H.J. Lu; +Cc: gcc-patches
On 2/16/19, H.J. Lu <hjl.tools@gmail.com> wrote:
> PR target/89021
> * config/i386/i386.c (ix86_expand_vector_init_duplicate): Set
> mmx_ok to true if TARGET_MMX_WITH_SSE is true.
> (ix86_expand_vector_init_one_nonzero): Likewise.
> (ix86_expand_vector_init_one_var): Likewise.
> (ix86_expand_vector_init_general): Likewise.
> (ix86_expand_vector_init): Likewise.
> (ix86_expand_vector_set): Likewise.
> (ix86_expand_vector_extract): Likewise.
> * config/i386/mmx.md (*vec_dupv2sf): Changed to
> define_insn_and_split to support SSE emulation.
> (*vec_extractv2sf_0): Likewise.
> (*vec_extractv2sf_1): Likewise.
> (*vec_extractv2si_0): Likewise.
> (*vec_extractv2si_1): Likewise.
> (*vec_extractv2si_zext_mem): Likewise.
> (vec_setv2sf): Also allow TARGET_MMX_WITH_SSE.
> (vec_extractv2sf_1 splitter): Likewise.
> (vec_extractv2sfsf): Likewise.
> (vec_setv2si): Likewise.
> (vec_extractv2si_1 splitter): Likewise.
> (vec_extractv2sisi): Likewise.
> (vec_setv4hi): Likewise.
> (vec_extractv4hihi): Likewise.
> (vec_setv8qi): Likewise.
> (vec_extractv8qiqi): Likewise.
> ---
> gcc/config/i386/i386.c | 8 +++++
> gcc/config/i386/mmx.md | 69 +++++++++++++++++++++++++++---------------
> 2 files changed, 52 insertions(+), 25 deletions(-)
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index a76c17beece..25e0dc43a9e 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -42620,6 +42620,7 @@ ix86_expand_vector_init_duplicate (bool mmx_ok,
> machine_mode mode,
> {
> bool ok;
>
> + mmx_ok |= TARGET_MMX_WITH_SSE;
> switch (mode)
> {
> case E_V2SImode:
> @@ -42779,6 +42780,7 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok,
> machine_mode mode,
> bool use_vector_set = false;
> rtx (*gen_vec_set_0) (rtx, rtx, rtx) = NULL;
>
> + mmx_ok |= TARGET_MMX_WITH_SSE;
> switch (mode)
> {
> case E_V2DImode:
> @@ -42972,6 +42974,7 @@ ix86_expand_vector_init_one_var (bool mmx_ok,
> machine_mode mode,
> XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
> const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
>
> + mmx_ok |= TARGET_MMX_WITH_SSE;
> switch (mode)
> {
> case E_V2DFmode:
> @@ -43357,6 +43360,7 @@ ix86_expand_vector_init_general (bool mmx_ok,
> machine_mode mode,
> machine_mode quarter_mode = VOIDmode;
> int n, i;
>
> + mmx_ok |= TARGET_MMX_WITH_SSE;
> switch (mode)
> {
> case E_V2SFmode:
> @@ -43556,6 +43560,8 @@ ix86_expand_vector_init (bool mmx_ok, rtx target,
> rtx vals)
> int i;
> rtx x;
>
> + mmx_ok |= TARGET_MMX_WITH_SSE;
> +
> /* Handle first initialization from vector elts. */
> if (n_elts != XVECLEN (vals, 0))
> {
> @@ -43655,6 +43661,7 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx
> val, int elt)
> machine_mode mmode = VOIDmode;
> rtx (*gen_blendm) (rtx, rtx, rtx, rtx);
>
> + mmx_ok |= TARGET_MMX_WITH_SSE;
> switch (mode)
> {
> case E_V2SFmode:
> @@ -44010,6 +44017,7 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target,
> rtx vec, int elt)
> bool use_vec_extr = false;
> rtx tmp;
>
> + mmx_ok |= TARGET_MMX_WITH_SSE;
> switch (mode)
> {
> case E_V2SImode:
> diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
> index a21e11c8dfb..fa0b0126e91 100644
> --- a/gcc/config/i386/mmx.md
> +++ b/gcc/config/i386/mmx.md
> @@ -555,14 +555,23 @@
> (set_attr "prefix_extra" "1")
> (set_attr "mode" "V2SF")])
>
> -(define_insn "*vec_dupv2sf"
> - [(set (match_operand:V2SF 0 "register_operand" "=y")
> +(define_insn_and_split "*vec_dupv2sf"
> + [(set (match_operand:V2SF 0 "register_operand" "=y,x,Yv")
> (vec_duplicate:V2SF
> - (match_operand:SF 1 "register_operand" "0")))]
> - "TARGET_MMX"
> - "punpckldq\t%0, %0"
> - [(set_attr "type" "mmxcvt")
> - (set_attr "mode" "DI")])
> + (match_operand:SF 1 "register_operand" "0,0,Yv")))]
> + "TARGET_MMX || TARGET_MMX_WITH_SSE"
> + "@
> + punpckldq\t%0, %0
> + #
> + #"
> + "TARGET_MMX_WITH_SSE && reload_completed"
> + [(set (match_dup 0)
> + (vec_duplicate:V4SF (match_dup 1)))]
> + "operands[0] = lowpart_subreg (V4SFmode, operands[0],
> + GET_MODE (operands[0]));"
> + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
> + (set_attr "type" "mmxcvt,ssemov,ssemov")
> + (set_attr "mode" "DI,TI,TI")])
>
> (define_insn "*mmx_concatv2sf"
> [(set (match_operand:V2SF 0 "register_operand" "=y,y")
> @@ -580,7 +589,7 @@
> [(match_operand:V2SF 0 "register_operand")
> (match_operand:SF 1 "register_operand")
> (match_operand 2 "const_int_operand")]
> - "TARGET_MMX"
> + "TARGET_MMX || TARGET_MMX_WITH_SSE"
> {
> ix86_expand_vector_set (false, operands[0], operands[1],
> INTVAL (operands[2]));
> @@ -594,11 +603,13 @@
> (vec_select:SF
> (match_operand:V2SF 1 "nonimmediate_operand" " xm,x,ym,y,m,m")
> (parallel [(const_int 0)])))]
> - "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
> + "(TARGET_MMX || TARGET_MMX_WITH_SSE)
> + && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
> "#"
> "&& reload_completed"
> [(set (match_dup 0) (match_dup 1))]
> - "operands[1] = gen_lowpart (SFmode, operands[1]);")
> + "operands[1] = gen_lowpart (SFmode, operands[1]);"
> + [(set_attr "mmx_isa" "*,*,native,native,*,*")])
>
> ;; Avoid combining registers from different units in a single alternative,
> ;; see comment above inline_secondary_memory_needed function in i386.c
> @@ -607,7 +618,8 @@
> (vec_select:SF
> (match_operand:V2SF 1 "nonimmediate_operand" " 0,x,x,o,o,o,o")
> (parallel [(const_int 1)])))]
> - "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
> + "(TARGET_MMX || TARGET_MMX_WITH_SSE)
> + && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
> "@
> punpckhdq\t%0, %0
> %vmovshdup\t{%1, %0|%0, %1}
> @@ -617,6 +629,7 @@
> #
> #"
> [(set_attr "isa" "*,sse3,noavx,*,*,*,*")
> + (set_attr "mmx_isa" "native,*,*,native,*,*,*")
> (set_attr "type" "mmxcvt,sse,sseshuf1,mmxmov,ssemov,fmov,imov")
> (set (attr "length_immediate")
> (if_then_else (eq_attr "alternative" "2")
> @@ -634,7 +647,7 @@
> (vec_select:SF
> (match_operand:V2SF 1 "memory_operand")
> (parallel [(const_int 1)])))]
> - "TARGET_MMX && reload_completed"
> + "(TARGET_MMX || TARGET_MMX_WITH_SSE) && reload_completed"
> [(set (match_dup 0) (match_dup 1))]
> "operands[1] = adjust_address (operands[1], SFmode, 4);")
>
> @@ -642,7 +655,7 @@
> [(match_operand:SF 0 "register_operand")
> (match_operand:V2SF 1 "register_operand")
> (match_operand 2 "const_int_operand")]
> - "TARGET_MMX"
> + "TARGET_MMX || TARGET_MMX_WITH_SSE"
> {
> ix86_expand_vector_extract (false, operands[0], operands[1],
> INTVAL (operands[2]));
> @@ -1526,7 +1539,7 @@
> [(match_operand:V2SI 0 "register_operand")
> (match_operand:SI 1 "register_operand")
> (match_operand 2 "const_int_operand")]
> - "TARGET_MMX"
> + "TARGET_MMX || TARGET_MMX_WITH_SSE"
> {
> ix86_expand_vector_set (false, operands[0], operands[1],
> INTVAL (operands[2]));
> @@ -1540,11 +1553,13 @@
> (vec_select:SI
> (match_operand:V2SI 1 "nonimmediate_operand" "xm,x,ym,y,m")
> (parallel [(const_int 0)])))]
> - "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
> + "(TARGET_MMX || TARGET_MMX_WITH_SSE)
> + && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
> "#"
> "&& reload_completed"
> [(set (match_dup 0) (match_dup 1))]
> - "operands[1] = gen_lowpart (SImode, operands[1]);")
> + "operands[1] = gen_lowpart (SImode, operands[1]);"
> + [(set_attr "mmx_isa" "*,*,native,native,*")])
>
> ;; Avoid combining registers from different units in a single alternative,
> ;; see comment above inline_secondary_memory_needed function in i386.c
> @@ -1553,7 +1568,8 @@
> (vec_select:SI
> (match_operand:V2SI 1 "nonimmediate_operand" " 0,x,x,o,o,o")
> (parallel [(const_int 1)])))]
> - "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
> + "(TARGET_MMX || TARGET_MMX_WITH_SSE)
> + && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
> "@
> punpckhdq\t%0, %0
> %vpshufd\t{$0xe5, %1, %0|%0, %1, 0xe5}
> @@ -1562,6 +1578,7 @@
> #
> #"
> [(set_attr "isa" "*,sse2,noavx,*,*,*")
> + (set_attr "mmx_isa" "native,*,*,native,*,*")
> (set_attr "type" "mmxcvt,sseshuf1,sseshuf1,mmxmov,ssemov,imov")
> (set (attr "length_immediate")
> (if_then_else (eq_attr "alternative" "1,2")
> @@ -1575,7 +1592,7 @@
> (vec_select:SI
> (match_operand:V2SI 1 "memory_operand")
> (parallel [(const_int 1)])))]
> - "TARGET_MMX && reload_completed"
> + "(TARGET_MMX || TARGET_MMX_WITH_SSE) && reload_completed"
> [(set (match_dup 0) (match_dup 1))]
> "operands[1] = adjust_address (operands[1], SImode, 4);")
>
> @@ -1585,19 +1602,21 @@
> (vec_select:SI
> (match_operand:V2SI 1 "memory_operand" "o,o,o")
> (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))))]
> - "TARGET_64BIT && TARGET_MMX"
> + "TARGET_64BIT"
> "#"
> "&& reload_completed"
> [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
> {
> operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) *
> 4);
> -})
> +}
> + [(set_attr "isa" "*,sse2,*")
I think the above hunk should be committed as a separate bugfix patch
before gcc-9 is released.
Uros.
> + (set_attr "mmx_isa" "native,*,*")])
>
> (define_expand "vec_extractv2sisi"
> [(match_operand:SI 0 "register_operand")
> (match_operand:V2SI 1 "register_operand")
> (match_operand 2 "const_int_operand")]
> - "TARGET_MMX"
> + "TARGET_MMX || TARGET_MMX_WITH_SSE"
> {
> ix86_expand_vector_extract (false, operands[0], operands[1],
> INTVAL (operands[2]));
> @@ -1617,7 +1636,7 @@
> [(match_operand:V4HI 0 "register_operand")
> (match_operand:HI 1 "register_operand")
> (match_operand 2 "const_int_operand")]
> - "TARGET_MMX"
> + "TARGET_MMX || TARGET_MMX_WITH_SSE"
> {
> ix86_expand_vector_set (false, operands[0], operands[1],
> INTVAL (operands[2]));
> @@ -1628,7 +1647,7 @@
> [(match_operand:HI 0 "register_operand")
> (match_operand:V4HI 1 "register_operand")
> (match_operand 2 "const_int_operand")]
> - "TARGET_MMX"
> + "TARGET_MMX || TARGET_MMX_WITH_SSE"
> {
> ix86_expand_vector_extract (false, operands[0], operands[1],
> INTVAL (operands[2]));
> @@ -1648,7 +1667,7 @@
> [(match_operand:V8QI 0 "register_operand")
> (match_operand:QI 1 "register_operand")
> (match_operand 2 "const_int_operand")]
> - "TARGET_MMX"
> + "TARGET_MMX || TARGET_MMX_WITH_SSE"
> {
> ix86_expand_vector_set (false, operands[0], operands[1],
> INTVAL (operands[2]));
> @@ -1659,7 +1678,7 @@
> [(match_operand:QI 0 "register_operand")
> (match_operand:V8QI 1 "register_operand")
> (match_operand 2 "const_int_operand")]
> - "TARGET_MMX"
> + "TARGET_MMX || TARGET_MMX_WITH_SSE"
> {
> ix86_expand_vector_extract (false, operands[0], operands[1],
> INTVAL (operands[2]));
> --
> 2.20.1
>
>
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 21/42] i386: Emulate MMX maskmovq with SSE2 maskmovdqu
2019-02-16 0:34 [PATCH 00/42] V7: Emulate MMX intrinsics with SSE H.J. Lu
` (38 preceding siblings ...)
2019-02-16 0:43 ` [PATCH 39/42] i386: Allow MMX vector expanders with TARGET_MMX_WITH_SSE H.J. Lu
@ 2019-02-16 0:43 ` H.J. Lu
2019-02-16 0:43 ` [PATCH 35/42] i386: Emulate MMX abs<mode>2 with SSE H.J. Lu
2019-02-16 0:43 ` [PATCH 24/42] i386: Emulate MMX mmx_psadbw " H.J. Lu
41 siblings, 0 replies; 50+ messages in thread
From: H.J. Lu @ 2019-02-16 0:43 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX maskmovq with SSE2 maskmovdqu for TARGET_MMX_WITH_SSE by
zero-extending source and mask operands to 128 bits. Handle unmapped
bits 64:127 at memory address by adjusting source and mask operands
together with memory address.
PR target/89021
* config/i386/xmmintrin.h: Emulate MMX maskmovq with SSE2
maskmovdqu for __MMX_WITH_SSE__.
---
gcc/config/i386/xmmintrin.h | 61 +++++++++++++++++++++++++++++++++++++
1 file changed, 61 insertions(+)
diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h
index 58284378514..a915f6c87d7 100644
--- a/gcc/config/i386/xmmintrin.h
+++ b/gcc/config/i386/xmmintrin.h
@@ -1165,7 +1165,68 @@ _m_pshufw (__m64 __A, int const __N)
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskmove_si64 (__m64 __A, __m64 __N, char *__P)
{
+#ifdef __MMX_WITH_SSE__
+ /* Emulate MMX maskmovq with SSE2 maskmovdqu and handle unmapped bits
+ 64:127 at address __P. */
+ typedef long long __v2di __attribute__ ((__vector_size__ (16)));
+ typedef char __v16qi __attribute__ ((__vector_size__ (16)));
+ /* Zero-extend __A and __N to 128 bits. */
+ __v2di __A128 = __extension__ (__v2di) { ((__v1di) __A)[0], 0 };
+ __v2di __N128 = __extension__ (__v2di) { ((__v1di) __N)[0], 0 };
+
+ /* Check the alignment of __P. */
+ __SIZE_TYPE__ offset = ((__SIZE_TYPE__) __P) & 0xf;
+ if (offset)
+ {
+ /* If the misalignment of __P > 8, subtract __P by 8 bytes.
+ Otherwise, subtract __P by the misalignment. */
+ if (offset > 8)
+ offset = 8;
+ __P = (char *) (((__SIZE_TYPE__) __P) - offset);
+
+ /* Shift __A128 and __N128 to the left by the adjustment. */
+ switch (offset)
+ {
+ case 1:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 8);
+ break;
+ case 2:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 2 * 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 2 * 8);
+ break;
+ case 3:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 3 * 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 3 * 8);
+ break;
+ case 4:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 4 * 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 4 * 8);
+ break;
+ case 5:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 5 * 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 5 * 8);
+ break;
+ case 6:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 6 * 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 6 * 8);
+ break;
+ case 7:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 7 * 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 7 * 8);
+ break;
+ case 8:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 8 * 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 8 * 8);
+ break;
+ default:
+ break;
+ }
+ }
+ __builtin_ia32_maskmovdqu ((__v16qi)__A128, (__v16qi)__N128, __P);
+#else
__builtin_ia32_maskmovq ((__v8qi)__A, (__v8qi)__N, __P);
+#endif
}
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
--
2.20.1
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 35/42] i386: Emulate MMX abs<mode>2 with SSE
2019-02-16 0:34 [PATCH 00/42] V7: Emulate MMX intrinsics with SSE H.J. Lu
` (39 preceding siblings ...)
2019-02-16 0:43 ` [PATCH 21/42] i386: Emulate MMX maskmovq with SSE2 maskmovdqu H.J. Lu
@ 2019-02-16 0:43 ` H.J. Lu
2019-02-16 0:43 ` [PATCH 24/42] i386: Emulate MMX mmx_psadbw " H.J. Lu
41 siblings, 0 replies; 50+ messages in thread
From: H.J. Lu @ 2019-02-16 0:43 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX abs<mode>2 with SSE. Only SSE register source operand is
allowed.
PR target/89021
* config/i386/sse.md (abs<mode>2): Add SSE emulation.
---
gcc/config/i386/sse.md | 15 +++++++++------
1 file changed, 9 insertions(+), 6 deletions(-)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index ec68b5dc2ce..92f5ad17156 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15942,16 +15942,19 @@
})
(define_insn "abs<mode>2"
- [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+ [(set (match_operand:MMXMODEI 0 "register_operand" "=y,Yv")
(abs:MMXMODEI
- (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
- "TARGET_SSSE3"
- "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
- [(set_attr "type" "sselog1")
+ (match_operand:MMXMODEI 1 "register_mmxmem_operand" "ym,Yv")))]
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
+ "@
+ pabs<mmxvecsize>\t{%1, %0|%0, %1}
+ %vpabs<mmxvecsize>\t{%1, %0|%0, %1}"
+ [(set_attr "mmx_isa" "native,x64")
+ (set_attr "type" "sselog1")
(set_attr "prefix_rep" "0")
(set_attr "prefix_extra" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
- (set_attr "mode" "DI")])
+ (set_attr "mode" "DI,TI")])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
--
2.20.1
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 24/42] i386: Emulate MMX mmx_psadbw with SSE
2019-02-16 0:34 [PATCH 00/42] V7: Emulate MMX intrinsics with SSE H.J. Lu
` (40 preceding siblings ...)
2019-02-16 0:43 ` [PATCH 35/42] i386: Emulate MMX abs<mode>2 with SSE H.J. Lu
@ 2019-02-16 0:43 ` H.J. Lu
41 siblings, 0 replies; 50+ messages in thread
From: H.J. Lu @ 2019-02-16 0:43 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
Emulate MMX mmx_psadbw with SSE. Only SSE register source operand is
allowed.
PR target/89021
* config/i386/mmx.md (mmx_psadbw): Add SSE emulation.
---
gcc/config/i386/mmx.md | 19 ++++++++++++-------
1 file changed, 12 insertions(+), 7 deletions(-)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index b8983e1755a..bcce7c06c4f 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1770,14 +1770,19 @@
(set_attr "mode" "DI,TI,TI")])
(define_insn "mmx_psadbw"
- [(set (match_operand:V1DI 0 "register_operand" "=y")
- (unspec:V1DI [(match_operand:V8QI 1 "register_operand" "0")
- (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
+ [(set (match_operand:V1DI 0 "register_operand" "=y,x,Yv")
+ (unspec:V1DI [(match_operand:V8QI 1 "register_operand" "0,0,Yv")
+ (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")]
UNSPEC_PSADBW))]
- "TARGET_SSE || TARGET_3DNOW_A"
- "psadbw\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxshft")
- (set_attr "mode" "DI")])
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && (TARGET_SSE || TARGET_3DNOW_A)"
+ "@
+ psadbw\t{%2, %0|%0, %2}
+ psadbw\t{%2, %0|%0, %2}
+ vpsadbw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxshft,sseiadd,sseiadd")
+ (set_attr "mode" "DI,TI,TI")])
(define_insn_and_split "mmx_pmovmskb"
[(set (match_operand:SI 0 "register_operand" "=r,r")
--
2.20.1
^ permalink raw reply [flat|nested] 50+ messages in thread